dlls/msvcrt/math.c

   1 /*
   2  * msvcrt.dll math functions
   3  *
   4  * Copyright 2000 Jon Griffiths
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
  19  *
  20  *
  21  * For functions copied from musl libc (http://musl.libc.org/):
  22  * ====================================================
  23  * Copyright 2005-2020 Rich Felker, et al.
  24  *
  25  * Permission is hereby granted, free of charge, to any person obtaining
  26  * a copy of this software and associated documentation files (the
  27  * "Software"), to deal in the Software without restriction, including
  28  * without limitation the rights to use, copy, modify, merge, publish,
  29  * distribute, sublicense, and/or sell copies of the Software, and to
  30  * permit persons to whom the Software is furnished to do so, subject to
  31  * the following conditions:
  32  *
  33  * The above copyright notice and this permission notice shall be
  34  * included in all copies or substantial portions of the Software.
  35  * ====================================================
  36  */
  37
  38 #include <complex.h>
  39 #include <stdio.h>
  40 #include <fenv.h>
  41 #include <fpieee.h>
  42 #include <limits.h>
  43 #include <locale.h>
  44 #include <math.h>
  45
  46 #include "msvcrt.h"
  47 #include "winternl.h"
  48
  49 #include "wine/asm.h"
  50 #include "wine/debug.h"
  51
  52 WINE_DEFAULT_DEBUG_CHANNEL(msvcrt);
  53
  54 #undef div
  55 #undef ldiv
  56
  57 #define _DOMAIN         1       /* domain error in argument */
  58 #define _SING           2       /* singularity */
  59 #define _OVERFLOW       3       /* range overflow */
  60 #define _UNDERFLOW      4       /* range underflow */
  61
  62 typedef int (CDECL *MSVCRT_matherr_func)(struct _exception *);
  63
  64 static MSVCRT_matherr_func MSVCRT_default_matherr_func = NULL;
  65
  66 BOOL sse2_supported;
  67 static BOOL sse2_enabled;
  68
  69 static const struct unix_funcs *unix_funcs;
  70
  71 void msvcrt_init_math( void *module )
  72 {
  73     sse2_supported = IsProcessorFeaturePresent( PF_XMMI64_INSTRUCTIONS_AVAILABLE );
  74 #if _MSVCR_VER <=71
  75     sse2_enabled = FALSE;
  76 #else
  77     sse2_enabled = sse2_supported;
  78 #endif
  79     __wine_init_unix_lib( module, DLL_PROCESS_ATTACH, NULL, &unix_funcs );
  80 }
  81
  82 /* Copied from musl: src/internal/libm.h */
  83 static inline float fp_barrierf(float x)
  84 {
  85     volatile float y = x;
  86     return y;
  87 }
  88
  89 static inline double fp_barrier(double x)
  90 {
  91     volatile double y = x;
  92     return y;
  93 }
  94
  95 static inline double CDECL ret_nan( BOOL update_sw )
  96 {
  97     double x = 1.0;
  98     if (!update_sw) return -NAN;
  99     return (x - x) / (x - x);
 100 }
 101
 102 #define SET_X87_CW(MASK) \
 103     "subl $4, %esp\n\t" \
 104     __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t") \
 105     "fnstcw (%esp)\n\t" \
 106     "movw (%esp), %ax\n\t" \
 107     "movw %ax, 2(%esp)\n\t" \
 108     "testw $" #MASK ", %ax\n\t" \
 109     "jz 1f\n\t" \
 110     "andw $~" #MASK ", %ax\n\t" \
 111     "movw %ax, 2(%esp)\n\t" \
 112     "fldcw 2(%esp)\n\t" \
 113     "1:\n\t"
 114
 115 #define RESET_X87_CW \
 116     "movw (%esp), %ax\n\t" \
 117     "cmpw %ax, 2(%esp)\n\t" \
 118     "je 1f\n\t" \
 119     "fstpl 8(%esp)\n\t" \
 120     "fldcw (%esp)\n\t" \
 121     "fldl 8(%esp)\n\t" \
 122     "fwait\n\t" \
 123     "1:\n\t" \
 124     "addl $4, %esp\n\t" \
 125     __ASM_CFI(".cfi_adjust_cfa_offset -4\n\t")
 126
 127 /*********************************************************************
 128  *      _matherr (CRTDLL.@)
 129  */
 130 int CDECL _matherr(struct _exception *e)
 131 {
 132     return 0;
 133 }
 134
 135
 136 static double math_error(int type, const char *name, double arg1, double arg2, double retval)
 137 {
 138     struct _exception exception = {type, (char *)name, arg1, arg2, retval};
 139
 140     TRACE("(%d, %s, %g, %g, %g)\n", type, debugstr_a(name), arg1, arg2, retval);
 141
 142     if (MSVCRT_default_matherr_func && MSVCRT_default_matherr_func(&exception))
 143         return exception.retval;
 144
 145     switch (type)
 146     {
 147     case 0:
 148         /* don't set errno */
 149         break;
 150     case _DOMAIN:
 151         *_errno() = EDOM;
 152         break;
 153     case _SING:
 154     case _OVERFLOW:
 155         *_errno() = ERANGE;
 156         break;
 157     case _UNDERFLOW:
 158         /* don't set errno */
 159         break;
 160     default:
 161         ERR("Unhandled math error!\n");
 162     }
 163
 164     return exception.retval;
 165 }
 166
 167 /*********************************************************************
 168  *      __setusermatherr (MSVCRT.@)
 169  */
 170 void CDECL __setusermatherr(MSVCRT_matherr_func func)
 171 {
 172     MSVCRT_default_matherr_func = func;
 173     TRACE("new matherr handler %p\n", func);
 174 }
 175
 176 /*********************************************************************
 177  *      _set_SSE2_enable (MSVCRT.@)
 178  */
 179 int CDECL _set_SSE2_enable(int flag)
 180 {
 181     sse2_enabled = flag && sse2_supported;
 182     return sse2_enabled;
 183 }
 184
 185 #if defined(_WIN64)
 186 # if _MSVCR_VER>=140
 187 /*********************************************************************
 188  *      _get_FMA3_enable (UCRTBASE.@)
 189  */
 190 int CDECL _get_FMA3_enable(void)
 191 {
 192     FIXME("() stub\n");
 193     return 0;
 194 }
 195 # endif
 196
 197 # if _MSVCR_VER>=120
 198 /*********************************************************************
 199  *      _set_FMA3_enable (MSVCR120.@)
 200  */
 201 int CDECL _set_FMA3_enable(int flag)
 202 {
 203     FIXME("(%x) stub\n", flag);
 204     return 0;
 205 }
 206 # endif
 207 #endif
 208
 209 #if !defined(__i386__) || _MSVCR_VER>=120
 210
 211 /*********************************************************************
 212  *      _chgsignf (MSVCRT.@)
 213  */
 214 float CDECL _chgsignf( float num )
 215 {
 216     union { float f; UINT32 i; } u = { num };
 217     u.i ^= 0x80000000;
 218     return u.f;
 219 }
 220
 221 /*********************************************************************
 222  *      _copysignf (MSVCRT.@)
 223  *
 224  * Copied from musl: src/math/copysignf.c
 225  */
 226 float CDECL _copysignf( float x, float y )
 227 {
 228     union { float f; UINT32 i; } ux = { x }, uy = { y };
 229     ux.i &= 0x7fffffff;
 230     ux.i |= uy.i & 0x80000000;
 231     return ux.f;
 232 }
 233
 234 /*********************************************************************
 235  *      _nextafterf (MSVCRT.@)
 236  *
 237  * Copied from musl: src/math/nextafterf.c
 238  */
 239 float CDECL _nextafterf( float x, float y )
 240 {
 241     unsigned int ix = *(unsigned int*)&x;
 242     unsigned int iy = *(unsigned int*)&y;
 243     unsigned int ax, ay, e;
 244
 245     if (isnan(x) || isnan(y))
 246         return x + y;
 247     if (x == y) {
 248         if (_fpclassf(y) & (_FPCLASS_ND | _FPCLASS_PD | _FPCLASS_NZ | _FPCLASS_PZ ))
 249             *_errno() = ERANGE;
 250         return y;
 251     }
 252     ax = ix & 0x7fffffff;
 253     ay = iy & 0x7fffffff;
 254     if (ax == 0) {
 255         if (ay == 0)
 256             return y;
 257         ix = (iy & 0x80000000) | 1;
 258     } else if (ax > ay || ((ix ^ iy) & 0x80000000))
 259         ix--;
 260     else
 261         ix++;
 262     e = ix & 0x7f800000;
 263     /* raise overflow if ix is infinite and x is finite */
 264     if (e == 0x7f800000) {
 265         fp_barrierf(x + x);
 266         *_errno() = ERANGE;
 267     }
 268     /* raise underflow if ix is subnormal or zero */
 269     y = *(float*)&ix;
 270     if (e == 0) {
 271         fp_barrierf(x * x + y * y);
 272         *_errno() = ERANGE;
 273     }
 274     return y;
 275 }
 276
 277 /* Copied from musl: src/math/ilogbf.c */
 278 static int __ilogbf(float x)
 279 {
 280     union { float f; UINT32 i; } u = { x };
 281     int e = u.i >> 23 & 0xff;
 282
 283     if (!e)
 284     {
 285         u.i <<= 9;
 286         if (u.i == 0) return FP_ILOGB0;
 287         /* subnormal x */
 288         for (e = -0x7f; u.i >> 31 == 0; e--, u.i <<= 1);
 289         return e;
 290     }
 291     if (e == 0xff) return u.i << 9 ? FP_ILOGBNAN : INT_MAX;
 292     return e - 0x7f;
 293 }
 294
 295 /*********************************************************************
 296  *      _logbf (MSVCRT.@)
 297  *
 298  * Copied from musl: src/math/logbf.c
 299  */
 300 float CDECL _logbf(float x)
 301 {
 302     if (!isfinite(x))
 303         return x * x;
 304     if (x == 0) {
 305         *_errno() = ERANGE;
 306         return -1 / (x * x);
 307     }
 308     return __ilogbf(x);
 309 }
 310
 311 #endif
 312
 313 /* Copied from musl: src/math/scalbn.c */
 314 static double __scalbn(double x, int n)
 315 {
 316     union {double f; UINT64 i;} u;
 317     double y = x;
 318
 319     if (n > 1023) {
 320         y *= 0x1p1023;
 321         n -= 1023;
 322         if (n > 1023) {
 323             y *= 0x1p1023;
 324             n -= 1023;
 325             if (n > 1023)
 326                 n = 1023;
 327         }
 328     } else if (n < -1022) {
 329         /* make sure final n < -53 to avoid double
 330            rounding in the subnormal range */
 331         y *= 0x1p-1022 * 0x1p53;
 332         n += 1022 - 53;
 333         if (n < -1022) {
 334             y *= 0x1p-1022 * 0x1p53;
 335             n += 1022 - 53;
 336             if (n < -1022)
 337                 n = -1022;
 338         }
 339     }
 340     u.i = (UINT64)(0x3ff + n) << 52;
 341     x = y * u.f;
 342     return x;
 343 }
 344
 345 /* Copied from musl: src/math/__rem_pio2_large.c */
 346 static int __rem_pio2_large(double *x, double *y, int e0, int nx, int prec)
 347 {
 348     static const int init_jk[] = {3, 4};
 349     static const INT32 ipio2[] = {
 350         0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
 351         0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
 352         0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
 353         0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41,
 354         0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8,
 355         0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,
 356         0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
 357         0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08,
 358         0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,
 359         0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,
 360         0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
 361     };
 362     static const double PIo2[] = {
 363         1.57079625129699707031e+00,
 364         7.54978941586159635335e-08,
 365         5.39030252995776476554e-15,
 366         3.28200341580791294123e-22,
 367         1.27065575308067607349e-29,
 368         1.22933308981111328932e-36,
 369         2.73370053816464559624e-44,
 370         2.16741683877804819444e-51,
 371     };
 372
 373     INT32 jz, jx, jv, jp, jk, carry, n, iq[20], i, j, k, m, q0, ih;
 374     double z, fw, f[20], fq[20] = {0}, q[20];
 375
 376     /* initialize jk*/
 377     jk = init_jk[prec];
 378     jp = jk;
 379
 380     /* determine jx,jv,q0, note that 3>q0 */
 381     jx = nx - 1;
 382     jv = (e0 - 3) / 24;
 383     if(jv < 0) jv = 0;
 384     q0 = e0 - 24 * (jv + 1);
 385
 386     /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
 387     j = jv - jx;
 388     m = jx + jk;
 389     for (i = 0; i <= m; i++, j++)
 390         f[i] = j < 0 ? 0.0 : (double)ipio2[j];
 391
 392     /* compute q[0],q[1],...q[jk] */
 393     for (i = 0; i <= jk; i++) {
 394         for (j = 0, fw = 0.0; j <= jx; j++)
 395             fw += x[j] * f[jx + i - j];
 396         q[i] = fw;
 397     }
 398
 399     jz = jk;
 400 recompute:
 401     /* distill q[] into iq[] reversingly */
 402     for (i = 0, j = jz, z = q[jz]; j > 0; i++, j--) {
 403         fw = (double)(INT32)(0x1p-24 * z);
 404         iq[i] = (INT32)(z - 0x1p24 * fw);
 405         z = q[j - 1] + fw;
 406     }
 407
 408     /* compute n */
 409     z = __scalbn(z, q0); /* actual value of z */
 410     z -= 8.0 * floor(z * 0.125); /* trim off integer >= 8 */
 411     n = (INT32)z;
 412     z -= (double)n;
 413     ih = 0;
 414     if (q0 > 0) {  /* need iq[jz-1] to determine n */
 415         i = iq[jz - 1] >> (24 - q0);
 416         n += i;
 417         iq[jz - 1] -= i << (24 - q0);
 418         ih = iq[jz - 1] >> (23 - q0);
 419     }
 420     else if (q0 == 0) ih = iq[jz - 1] >> 23;
 421     else if (z >= 0.5) ih = 2;
 422
 423     if (ih > 0) {  /* q > 0.5 */
 424         n += 1;
 425         carry = 0;
 426         for (i = 0; i < jz; i++) {  /* compute 1-q */
 427             j = iq[i];
 428             if (carry == 0) {
 429                 if (j != 0) {
 430                     carry = 1;
 431                     iq[i] = 0x1000000 - j;
 432                 }
 433             } else
 434                 iq[i] = 0xffffff - j;
 435         }
 436         if (q0 > 0) {  /* rare case: chance is 1 in 12 */
 437             switch(q0) {
 438             case 1:
 439                 iq[jz - 1] &= 0x7fffff;
 440                 break;
 441             case 2:
 442                 iq[jz - 1] &= 0x3fffff;
 443                 break;
 444             }
 445         }
 446         if (ih == 2) {
 447             z = 1.0 - z;
 448             if (carry != 0)
 449                 z -= __scalbn(1.0, q0);
 450         }
 451     }
 452
 453     /* check if recomputation is needed */
 454     if (z == 0.0) {
 455         j = 0;
 456         for (i = jz - 1; i >= jk; i--) j |= iq[i];
 457         if (j == 0) {  /* need recomputation */
 458             for (k = 1; iq[jk - k] == 0; k++);  /* k = no. of terms needed */
 459
 460             for (i = jz + 1; i <= jz + k; i++) {  /* add q[jz+1] to q[jz+k] */
 461                 f[jx + i] = (double)ipio2[jv + i];
 462                 for (j = 0, fw = 0.0; j <= jx; j++)
 463                     fw += x[j] * f[jx + i - j];
 464                 q[i] = fw;
 465             }
 466             jz += k;
 467             goto recompute;
 468         }
 469     }
 470
 471     /* chop off zero terms */
 472     if (z == 0.0) {
 473         jz -= 1;
 474         q0 -= 24;
 475         while (iq[jz] == 0) {
 476             jz--;
 477             q0 -= 24;
 478         }
 479     } else { /* break z into 24-bit if necessary */
 480         z = __scalbn(z, -q0);
 481         if (z >= 0x1p24) {
 482             fw = (double)(INT32)(0x1p-24 * z);
 483             iq[jz] = (INT32)(z - 0x1p24 * fw);
 484             jz += 1;
 485             q0 += 24;
 486             iq[jz] = (INT32)fw;
 487         } else
 488             iq[jz] = (INT32)z;
 489     }
 490
 491     /* convert integer "bit" chunk to floating-point value */
 492     fw = __scalbn(1.0, q0);
 493     for (i = jz; i >= 0; i--) {
 494         q[i] = fw * (double)iq[i];
 495         fw *= 0x1p-24;
 496     }
 497
 498     /* compute PIo2[0,...,jp]*q[jz,...,0] */
 499     for(i = jz; i >= 0; i--) {
 500         for (fw = 0.0, k = 0; k <= jp && k <= jz - i; k++)
 501             fw += PIo2[k] * q[i + k];
 502         fq[jz - i] = fw;
 503     }
 504
 505     /* compress fq[] into y[] */
 506     switch(prec) {
 507     case 0:
 508         fw = 0.0;
 509         for (i = jz; i >= 0; i--)
 510             fw += fq[i];
 511         y[0] = ih == 0 ? fw : -fw;
 512         break;
 513     case 1:
 514     case 2:
 515         fw = 0.0;
 516         for (i = jz; i >= 0; i--)
 517             fw += fq[i];
 518         fw = (double)fw;
 519         y[0] = ih==0 ? fw : -fw;
 520         fw = fq[0] - fw;
 521         for (i = 1; i <= jz; i++)
 522             fw += fq[i];
 523         y[1] = ih == 0 ? fw : -fw;
 524         break;
 525     case 3:  /* painful */
 526         for (i = jz; i > 0; i--) {
 527             fw = fq[i - 1] + fq[i];
 528             fq[i] += fq[i - 1] - fw;
 529             fq[i - 1] = fw;
 530         }
 531         for (i = jz; i > 1; i--) {
 532             fw = fq[i - 1] + fq[i];
 533             fq[i] += fq[i - 1] - fw;
 534             fq[i - 1] = fw;
 535         }
 536         for (fw = 0.0, i = jz; i >= 2; i--)
 537             fw += fq[i];
 538         if (ih == 0) {
 539             y[0] = fq[0];
 540             y[1] = fq[1];
 541             y[2] = fw;
 542         } else {
 543             y[0] = -fq[0];
 544             y[1] = -fq[1];
 545             y[2] = -fw;
 546         }
 547     }
 548     return n & 7;
 549 }
 550
 551 /* Based on musl implementation: src/math/round.c */
 552 static double __round(double x)
 553 {
 554     ULONGLONG llx = *(ULONGLONG*)&x, tmp;
 555     int e = (llx >> 52 & 0x7ff) - 0x3ff;
 556
 557     if (e >= 52)
 558         return x;
 559     if (e < -1)
 560         return 0 * x;
 561     else if (e == -1)
 562         return signbit(x) ? -1 : 1;
 563
 564     tmp = 0x000fffffffffffffULL >> e;
 565     if (!(llx & tmp))
 566         return x;
 567     llx += 0x0008000000000000ULL >> e;
 568     llx &= ~tmp;
 569     return *(double*)&llx;
 570 }
 571
 572 #if !defined(__i386__) || _MSVCR_VER >= 120
 573 /* Copied from musl: src/math/expm1f.c */
 574 static float __expm1f(float x)
 575 {
 576     static const float ln2_hi = 6.9313812256e-01,
 577         ln2_lo = 9.0580006145e-06,
 578         invln2 = 1.4426950216e+00,
 579         Q1 = -3.3333212137e-2,
 580         Q2 = 1.5807170421e-3;
 581
 582     float y, hi, lo, c, t, e, hxs, hfx, r1, twopk;
 583     union {float f; UINT32 i;} u = {x};
 584     UINT32 hx = u.i & 0x7fffffff;
 585     int k, sign = u.i >> 31;
 586
 587     /* filter out huge and non-finite argument */
 588     if (hx >= 0x4195b844) { /* if |x|>=27*ln2 */
 589         if (hx >= 0x7f800000) /* NaN */
 590             return u.i == 0xff800000 ? -1 : x;
 591         if (sign)
 592             return math_error(_UNDERFLOW, "exp", x, 0, -1);
 593         if (hx > 0x42b17217) /* x > log(FLT_MAX) */
 594             return math_error(_OVERFLOW, "exp", x, 0, fp_barrierf(x * FLT_MAX));
 595     }
 596
 597     /* argument reduction */
 598     if (hx > 0x3eb17218) { /* if |x| > 0.5 ln2 */
 599         if (hx < 0x3F851592) { /* and |x| < 1.5 ln2 */
 600             if (!sign) {
 601                 hi = x - ln2_hi;
 602                 lo = ln2_lo;
 603                 k = 1;
 604             } else {
 605                 hi = x + ln2_hi;
 606                 lo = -ln2_lo;
 607                 k = -1;
 608             }
 609         } else {
 610             k = invln2 * x + (sign ? -0.5f : 0.5f);
 611             t = k;
 612             hi = x - t * ln2_hi; /* t*ln2_hi is exact here */
 613             lo = t * ln2_lo;
 614         }
 615         x = hi - lo;
 616         c = (hi - x) - lo;
 617     } else if (hx < 0x33000000) { /* when |x|<2**-25, return x */
 618         if (hx < 0x00800000)
 619             fp_barrierf(x * x);
 620         return x;
 621     } else
 622         k = 0;
 623
 624     /* x is now in primary range */
 625     hfx = 0.5f * x;
 626     hxs = x * hfx;
 627     r1 = 1.0f + hxs * (Q1 + hxs * Q2);
 628     t = 3.0f - r1 * hfx;
 629     e = hxs * ((r1 - t) / (6.0f - x * t));
 630     if (k == 0) /* c is 0 */
 631         return x - (x * e - hxs);
 632     e = x * (e - c) - c;
 633     e -= hxs;
 634     /* exp(x) ~ 2^k (x_reduced - e + 1) */
 635     if (k == -1)
 636         return 0.5f * (x - e) - 0.5f;
 637     if (k == 1) {
 638         if (x < -0.25f)
 639             return -2.0f * (e - (x + 0.5f));
 640         return 1.0f + 2.0f * (x - e);
 641     }
 642     u.i = (0x7f + k) << 23; /* 2^k */
 643     twopk = u.f;
 644     if (k < 0 || k > 56) { /* suffice to return exp(x)-1 */
 645         y = x - e + 1.0f;
 646         if (k == 128)
 647             y = y * 2.0f * 0x1p127f;
 648         else
 649             y = y * twopk;
 650         return y - 1.0f;
 651     }
 652     u.i = (0x7f-k) << 23; /* 2^-k */
 653     if (k < 23)
 654         y = (x - e + (1 - u.f)) * twopk;
 655     else
 656         y = (x - (e + u.f) + 1) * twopk;
 657     return y;
 658 }
 659
 660 /* Copied from musl: src/math/__sindf.c */
 661 static float __sindf(double x)
 662 {
 663     static const double S1 = -0x1.5555555555555p-3,
 664         S2 = 0x1.1111111111111p-7,
 665         S3 = -0x1.a01a01a01a01ap-13,
 666         S4 = 0x1.71de3a556c734p-19;
 667
 668     double r, s, w, z;
 669
 670     z = x * x;
 671     if (x > -7.8175831586122513e-03 && x < 7.8175831586122513e-03)
 672         return x * (1 + S1 * z);
 673
 674     w = z * z;
 675     r = S3 + z * S4;
 676     s = z * x;
 677     return (x + s * (S1 + z * S2)) + s * w * r;
 678 }
 679
 680 /* Copied from musl: src/math/__cosdf.c */
 681 static float __cosdf(double x)
 682 {
 683     static const double C0 = -0x1.0000000000000p-1,
 684         C1 = 0x1.5555555555555p-5,
 685         C2 = -0x1.6c16c16c16c17p-10,
 686         C3 = 0x1.a01a01a01a01ap-16,
 687         C4 = -0x1.27e4fb7789f5cp-22;
 688     double z;
 689
 690     z = x * x;
 691     if (x > -7.8163146972656250e-03 && x < 7.8163146972656250e-03)
 692         return 1 + C0 * z;
 693     return 1.0 + z * (C0 + z * (C1 + z * (C2 + z * (C3 + z * C4))));
 694 }
 695
 696 static const UINT64 exp2f_T[] = {
 697     0x3ff0000000000000ULL, 0x3fefd9b0d3158574ULL, 0x3fefb5586cf9890fULL, 0x3fef9301d0125b51ULL,
 698     0x3fef72b83c7d517bULL, 0x3fef54873168b9aaULL, 0x3fef387a6e756238ULL, 0x3fef1e9df51fdee1ULL,
 699     0x3fef06fe0a31b715ULL, 0x3feef1a7373aa9cbULL, 0x3feedea64c123422ULL, 0x3feece086061892dULL,
 700     0x3feebfdad5362a27ULL, 0x3feeb42b569d4f82ULL, 0x3feeab07dd485429ULL, 0x3feea47eb03a5585ULL,
 701     0x3feea09e667f3bcdULL, 0x3fee9f75e8ec5f74ULL, 0x3feea11473eb0187ULL, 0x3feea589994cce13ULL,
 702     0x3feeace5422aa0dbULL, 0x3feeb737b0cdc5e5ULL, 0x3feec49182a3f090ULL, 0x3feed503b23e255dULL,
 703     0x3feee89f995ad3adULL, 0x3feeff76f2fb5e47ULL, 0x3fef199bdd85529cULL, 0x3fef3720dcef9069ULL,
 704     0x3fef5818dcfba487ULL, 0x3fef7c97337b9b5fULL, 0x3fefa4afa2a490daULL, 0x3fefd0765b6e4540ULL
 705 };
 706 #endif
 707
 708 #ifndef __i386__
 709
 710 /*********************************************************************
 711  *      _fpclassf (MSVCRT.@)
 712  */
 713 int CDECL _fpclassf( float num )
 714 {
 715     union { float f; UINT32 i; } u = { num };
 716     int e = u.i >> 23 & 0xff;
 717     int s = u.i >> 31;
 718
 719     switch (e)
 720     {
 721     case 0:
 722         if (u.i << 1) return s ? _FPCLASS_ND : _FPCLASS_PD;
 723         return s ? _FPCLASS_NZ : _FPCLASS_PZ;
 724     case 0xff:
 725         if (u.i << 9) return ((u.i >> 22) & 1) ? _FPCLASS_QNAN : _FPCLASS_SNAN;
 726         return s ? _FPCLASS_NINF : _FPCLASS_PINF;
 727     default:
 728         return s ? _FPCLASS_NN : _FPCLASS_PN;
 729     }
 730 }
 731
 732 /*********************************************************************
 733  *      _finitef (MSVCRT.@)
 734  */
 735 int CDECL _finitef( float num )
 736 {
 737     union { float f; UINT32 i; } u = { num };
 738     return (u.i & 0x7fffffff) < 0x7f800000;
 739 }
 740
 741 /*********************************************************************
 742  *      _isnanf (MSVCRT.@)
 743  */
 744 int CDECL _isnanf( float num )
 745 {
 746     union { float f; UINT32 i; } u = { num };
 747     return (u.i & 0x7fffffff) > 0x7f800000;
 748 }
 749
 750 static float asinf_R(float z)
 751 {
 752     /* coefficients for R(x^2) */
 753     static const float p1 = 1.66666672e-01,
 754                  p2 = -5.11644611e-02,
 755                  p3 = -1.21124933e-02,
 756                  p4 = -3.58742251e-03,
 757                  q1 = -7.56982703e-01;
 758
 759     float p, q;
 760     p = z * (p1 + z * (p2 + z * (p3 + z * p4)));
 761     q = 1.0f + z * q1;
 762     return p / q;
 763 }
 764
 765 /*********************************************************************
 766  *      acosf (MSVCRT.@)
 767  *
 768  * Copied from musl: src/math/acosf.c
 769  */
 770 float CDECL acosf( float x )
 771 {
 772     static const double pio2_lo = 6.12323399573676603587e-17;
 773
 774     float z, w, s, c, df;
 775     unsigned int hx, ix;
 776
 777     hx = *(unsigned int*)&x;
 778     ix = hx & 0x7fffffff;
 779     /* |x| >= 1 or nan */
 780     if (ix >= 0x3f800000) {
 781         if (ix == 0x3f800000) {
 782             if (hx >> 31)
 783                 return M_PI;
 784             return 0;
 785         }
 786         if (isnan(x)) return x;
 787         return math_error(_DOMAIN, "acosf", x, 0, 0 / (x - x));
 788     }
 789     /* |x| < 0.5 */
 790     if (ix < 0x3f000000) {
 791         if (ix <= 0x32800000) /* |x| < 2**-26 */
 792             return M_PI_2;
 793         return M_PI_2 - (x - (pio2_lo - x * asinf_R(x * x)));
 794     }
 795     /* x < -0.5 */
 796     if (hx >> 31) {
 797         z = (1 + x) * 0.5f;
 798         s = sqrtf(z);
 799         return M_PI - 2 * (s + ((double)s * asinf_R(z)));
 800     }
 801     /* x > 0.5 */
 802     z = (1 - x) * 0.5f;
 803     s = sqrtf(z);
 804     hx = *(unsigned int*)&s & 0xffff0000;
 805     df = *(float*)&hx;
 806     c = (z - df * df) / (s + df);
 807     w = asinf_R(z) * s + c;
 808     return 2 * (df + w);
 809 }
 810
 811 /*********************************************************************
 812  *      asinf (MSVCRT.@)
 813  *
 814  * Copied from musl: src/math/asinf.c
 815  */
 816 float CDECL asinf( float x )
 817 {
 818     static const double pio2 = 1.570796326794896558e+00;
 819     static const float pio4_hi = 0.785398125648;
 820     static const float pio2_lo = 7.54978941586e-08;
 821
 822     float s, z, f, c;
 823     unsigned int hx, ix;
 824
 825     hx = *(unsigned int*)&x;
 826     ix = hx & 0x7fffffff;
 827     if (ix >= 0x3f800000) {  /* |x| >= 1 */
 828         if (ix == 0x3f800000)  /* |x| == 1 */
 829             return x * pio2 + 7.5231638453e-37;  /* asin(+-1) = +-pi/2 with inexact */
 830         if (isnan(x)) return x;
 831         return math_error(_DOMAIN, "asinf", x, 0, 0 / (x - x));
 832     }
 833     if (ix < 0x3f000000) {  /* |x| < 0.5 */
 834         /* if 0x1p-126 <= |x| < 0x1p-12, avoid raising underflow */
 835         if (ix < 0x39800000 && ix >= 0x00800000)
 836             return x;
 837         return x + x * asinf_R(x * x);
 838     }
 839     /* 1 > |x| >= 0.5 */
 840     z = (1 - fabsf(x)) * 0.5f;
 841     s = sqrtf(z);
 842     /* f+c = sqrt(z) */
 843     *(unsigned int*)&f = *(unsigned int*)&s & 0xffff0000;
 844     c = (z - f * f) / (s + f);
 845     x = pio4_hi - (2 * s * asinf_R(z) - (pio2_lo - 2 * c) - (pio4_hi - 2 * f));
 846     if (hx >> 31)
 847         return -x;
 848     return x;
 849 }
 850
 851 /*********************************************************************
 852  *      atanf (MSVCRT.@)
 853  *
 854  * Copied from musl: src/math/atanf.c
 855  */
 856 float CDECL atanf( float x )
 857 {
 858     static const float atanhi[] = {
 859         4.6364760399e-01,
 860         7.8539812565e-01,
 861         9.8279368877e-01,
 862         1.5707962513e+00,
 863     };
 864     static const float atanlo[] = {
 865         5.0121582440e-09,
 866         3.7748947079e-08,
 867         3.4473217170e-08,
 868         7.5497894159e-08,
 869     };
 870     static const float aT[] = {
 871         3.3333328366e-01,
 872         -1.9999158382e-01,
 873         1.4253635705e-01,
 874         -1.0648017377e-01,
 875         6.1687607318e-02,
 876     };
 877
 878     float w, s1, s2, z;
 879     unsigned int ix, sign;
 880     int id;
 881
 882 #if _MSVCR_VER == 0
 883     if (isnan(x)) return math_error(_DOMAIN, "atanf", x, 0, x);
 884 #endif
 885
 886     ix = *(unsigned int*)&x;
 887     sign = ix >> 31;
 888     ix &= 0x7fffffff;
 889     if (ix >= 0x4c800000) {  /* if |x| >= 2**26 */
 890         if (isnan(x))
 891             return x;
 892         z = atanhi[3] + 7.5231638453e-37;
 893         return sign ? -z : z;
 894     }
 895     if (ix < 0x3ee00000) {   /* |x| < 0.4375 */
 896         if (ix < 0x39800000) {  /* |x| < 2**-12 */
 897             if (ix < 0x00800000)
 898                 /* raise underflow for subnormal x */
 899                 fp_barrierf(x*x);
 900             return x;
 901         }
 902         id = -1;
 903     } else {
 904         x = fabsf(x);
 905         if (ix < 0x3f980000) {  /* |x| < 1.1875 */
 906             if (ix < 0x3f300000) {  /*  7/16 <= |x| < 11/16 */
 907                 id = 0;
 908                 x = (2.0f * x - 1.0f) / (2.0f + x);
 909             } else {                /* 11/16 <= |x| < 19/16 */
 910                 id = 1;
 911                 x = (x - 1.0f) / (x + 1.0f);
 912             }
 913         } else {
 914             if (ix < 0x401c0000) {  /* |x| < 2.4375 */
 915                 id = 2;
 916                 x = (x - 1.5f) / (1.0f + 1.5f * x);
 917             } else {                /* 2.4375 <= |x| < 2**26 */
 918                 id = 3;
 919                 x = -1.0f / x;
 920             }
 921         }
 922     }
 923     /* end of argument reduction */
 924     z = x * x;
 925     w = z * z;
 926     /* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
 927     s1 = z * (aT[0] + w * (aT[2] + w * aT[4]));
 928     s2 = w * (aT[1] + w * aT[3]);
 929     if (id < 0)
 930         return x - x * (s1 + s2);
 931     z = atanhi[id] - ((x * (s1 + s2) - atanlo[id]) - x);
 932     return sign ? -z : z;
 933 }
 934
 935 /*********************************************************************
 936  *              atan2f (MSVCRT.@)
 937  *
 938  * Copied from musl: src/math/atan2f.c
 939  */
 940 float CDECL atan2f( float y, float x )
 941 {
 942     static const float pi     = 3.1415927410e+00,
 943                  pi_lo  = -8.7422776573e-08;
 944
 945     float z;
 946     unsigned int m, ix, iy;
 947
 948     if (isnan(x) || isnan(y))
 949         return x + y;
 950     ix = *(unsigned int*)&x;
 951     iy = *(unsigned int*)&y;
 952     if (ix == 0x3f800000)  /* x=1.0 */
 953         return atanf(y);
 954     m = ((iy >> 31) & 1) | ((ix >> 30) & 2);  /* 2*sign(x)+sign(y) */
 955     ix &= 0x7fffffff;
 956     iy &= 0x7fffffff;
 957
 958     /* when y = 0 */
 959     if (iy == 0) {
 960         switch (m) {
 961         case 0:
 962         case 1: return y;   /* atan(+-0,+anything)=+-0 */
 963         case 2: return pi;  /* atan(+0,-anything) = pi */
 964         case 3: return -pi; /* atan(-0,-anything) =-pi */
 965         }
 966     }
 967     /* when x = 0 */
 968     if (ix == 0)
 969         return m & 1 ? -pi / 2 : pi / 2;
 970     /* when x is INF */
 971     if (ix == 0x7f800000) {
 972         if (iy == 0x7f800000) {
 973             switch (m) {
 974             case 0: return pi / 4;      /* atan(+INF,+INF) */
 975             case 1: return -pi / 4;     /* atan(-INF,+INF) */
 976             case 2: return 3 * pi / 4;  /*atan(+INF,-INF)*/
 977             case 3: return -3 * pi / 4; /*atan(-INF,-INF)*/
 978             }
 979         } else {
 980             switch (m) {
 981             case 0: return 0.0f;    /* atan(+...,+INF) */
 982             case 1: return -0.0f;   /* atan(-...,+INF) */
 983             case 2: return pi;      /* atan(+...,-INF) */
 984             case 3: return -pi;     /* atan(-...,-INF) */
 985             }
 986         }
 987     }
 988     /* |y/x| > 0x1p26 */
 989     if (ix + (26 << 23) < iy || iy == 0x7f800000)
 990         return m & 1 ? -pi / 2 : pi / 2;
 991
 992     /* z = atan(|y/x|) with correct underflow */
 993     if ((m & 2) && iy + (26 << 23) < ix)  /*|y/x| < 0x1p-26, x < 0 */
 994         z = 0.0;
 995     else
 996         z = atanf(fabsf(y / x));
 997     switch (m) {
 998     case 0: return z;                /* atan(+,+) */
 999     case 1: return -z;               /* atan(-,+) */
1000     case 2: return pi - (z - pi_lo); /* atan(+,-) */
1001     default: /* case 3 */
1002         return (z - pi_lo) - pi;     /* atan(-,-) */
1003     }
1004 }
1005
1006 /* Copied from musl: src/math/__rem_pio2f.c */
1007 static int __rem_pio2f(float x, double *y)
1008 {
1009     static const double toint = 1.5 / DBL_EPSILON,
1010         pio4 = 0x1.921fb6p-1,
1011         invpio2 = 6.36619772367581382433e-01,
1012         pio2_1 = 1.57079631090164184570e+00,
1013         pio2_1t = 1.58932547735281966916e-08;
1014
1015     union {float f; uint32_t i;} u = {x};
1016     double tx[1], ty[1], fn;
1017     UINT32 ix;
1018     int n, sign, e0;
1019
1020     ix = u.i & 0x7fffffff;
1021     /* 25+53 bit pi is good enough for medium size */
1022     if (ix < 0x4dc90fdb) { /* |x| ~< 2^28*(pi/2), medium size */
1023         /* Use a specialized rint() to get fn. */
1024         fn = fp_barrier(x * invpio2 + toint) - toint;
1025         n  = (int)fn;
1026         *y = x - fn * pio2_1 - fn * pio2_1t;
1027         /* Matters with directed rounding. */
1028         if (*y < -pio4) {
1029             n--;
1030             fn--;
1031             *y = x - fn * pio2_1 - fn * pio2_1t;
1032         } else if (*y > pio4) {
1033             n++;
1034             fn++;
1035             *y = x - fn * pio2_1 - fn * pio2_1t;
1036         }
1037         return n;
1038     }
1039     if(ix >= 0x7f800000) { /* x is inf or NaN */
1040         *y = x - x;
1041         return 0;
1042     }
1043     /* scale x into [2^23, 2^24-1] */
1044     sign = u.i >> 31;
1045     e0 = (ix >> 23) - (0x7f + 23); /* e0 = ilogb(|x|)-23, positive */
1046     u.i = ix - (e0 << 23);
1047     tx[0] = u.f;
1048     n = __rem_pio2_large(tx, ty, e0, 1, 0);
1049     if (sign) {
1050         *y = -ty[0];
1051         return -n;
1052     }
1053     *y = ty[0];
1054     return n;
1055 }
1056
1057 /*********************************************************************
1058  *      cosf (MSVCRT.@)
1059  *
1060  * Copied from musl: src/math/cosf.c
1061  */
1062 float CDECL cosf( float x )
1063 {
1064     static const double c1pio2 = 1*M_PI_2,
1065         c2pio2 = 2*M_PI_2,
1066         c3pio2 = 3*M_PI_2,
1067         c4pio2 = 4*M_PI_2;
1068
1069     double y;
1070     UINT32 ix;
1071     unsigned n, sign;
1072
1073     ix = *(UINT32*)&x;
1074     sign = ix >> 31;
1075     ix &= 0x7fffffff;
1076
1077     if (ix <= 0x3f490fda) { /* |x| ~<= pi/4 */
1078         if (ix < 0x39800000) { /* |x| < 2**-12 */
1079             /* raise inexact if x != 0 */
1080             fp_barrierf(x + 0x1p120f);
1081             return 1.0f;
1082         }
1083         return __cosdf(x);
1084     }
1085     if (ix <= 0x407b53d1) { /* |x| ~<= 5*pi/4 */
1086         if (ix > 0x4016cbe3) /* |x| ~> 3*pi/4 */
1087             return -__cosdf(sign ? x + c2pio2 : x - c2pio2);
1088         else {
1089             if (sign)
1090                 return __sindf(x + c1pio2);
1091             else
1092                 return __sindf(c1pio2 - x);
1093         }
1094     }
1095     if (ix <= 0x40e231d5) { /* |x| ~<= 9*pi/4 */
1096         if (ix > 0x40afeddf) /* |x| ~> 7*pi/4 */
1097             return __cosdf(sign ? x + c4pio2 : x - c4pio2);
1098         else {
1099             if (sign)
1100                 return __sindf(-x - c3pio2);
1101             else
1102                 return __sindf(x - c3pio2);
1103         }
1104     }
1105
1106     /* cos(Inf or NaN) is NaN */
1107     if (isinf(x)) return math_error(_DOMAIN, "cosf", x, 0, x - x);
1108     if (ix >= 0x7f800000)
1109         return x - x;
1110
1111     /* general argument reduction needed */
1112     n = __rem_pio2f(x, &y);
1113     switch (n & 3) {
1114     case 0: return __cosdf(y);
1115     case 1: return __sindf(-y);
1116     case 2: return -__cosdf(y);
1117     default: return __sindf(y);
1118     }
1119 }
1120
1121 /* Copied from musl: src/math/__expo2f.c */
1122 static float __expo2f(float x, float sign)
1123 {
1124     static const int k = 235;
1125     static const float kln2 = 0x1.45c778p+7f;
1126     float scale;
1127
1128     *(UINT32*)&scale = (UINT32)(0x7f + k/2) << 23;
1129     return expf(x - kln2) * (sign * scale) * scale;
1130 }
1131
1132 /*********************************************************************
1133  *      coshf (MSVCRT.@)
1134  *
1135  * Copied from musl: src/math/coshf.c
1136  */
1137 float CDECL coshf( float x )
1138 {
1139     UINT32 ui = *(UINT32*)&x;
1140     UINT32 sign = ui & 0x80000000;
1141     float t;
1142
1143     /* |x| */
1144     ui &= 0x7fffffff;
1145     x = *(float*)&ui;
1146
1147     /* |x| < log(2) */
1148     if (ui < 0x3f317217) {
1149         if (ui < 0x3f800000 - (12 << 23)) {
1150             fp_barrierf(x + 0x1p120f);
1151             return 1;
1152         }
1153         t = __expm1f(x);
1154         return 1 + t * t / (2 * (1 + t));
1155     }
1156
1157     /* |x| < log(FLT_MAX) */
1158     if (ui < 0x42b17217) {
1159         t = expf(x);
1160         return 0.5f * (t + 1 / t);
1161     }
1162
1163     /* |x| > log(FLT_MAX) or nan */
1164     if (ui > 0x7f800000)
1165         *(UINT32*)&t = ui | sign | 0x400000;
1166     else
1167         t = __expo2f(x, 1.0f);
1168     return t;
1169 }
1170
1171 /*********************************************************************
1172  *      expf (MSVCRT.@)
1173  */
1174 float CDECL expf( float x )
1175 {
1176     static const double C[] = {
1177         0x1.c6af84b912394p-5 / (1 << 5) / (1 << 5) / (1 << 5),
1178         0x1.ebfce50fac4f3p-3 / (1 << 5) / (1 << 5),
1179         0x1.62e42ff0c52d6p-1 / (1 << 5)
1180     };
1181     static const double invln2n = 0x1.71547652b82fep+0 * (1 << 5);
1182
1183     double kd, z, r, r2, y, s;
1184     UINT32 abstop;
1185     UINT64 ki, t;
1186
1187     abstop = (*(UINT32*)&x >> 20) & 0x7ff;
1188     if (abstop >= 0x42b) {
1189         /* |x| >= 88 or x is nan.  */
1190         if (*(UINT32*)&x == 0xff800000)
1191             return 0.0f;
1192         if (abstop >= 0x7f8)
1193             return x + x;
1194         if (x > 0x1.62e42ep6f) /* x > log(0x1p128) ~= 88.72 */
1195             return math_error(_OVERFLOW, "expf", x, 0, x * FLT_MAX);
1196         if (x < -0x1.9fe368p6f) /* x < log(0x1p-150) ~= -103.97 */
1197             return math_error(_UNDERFLOW, "expf", x, 0, fp_barrierf(FLT_MIN) * FLT_MIN);
1198     }
1199
1200     /* x*N/Ln2 = k + r with r in [-1/2, 1/2] and int k.  */
1201     z = invln2n * x;
1202
1203     /* Round and convert z to int, the result is in [-150*N, 128*N] and
1204        ideally ties-to-even rule is used, otherwise the magnitude of r
1205        can be bigger which gives larger approximation error.  */
1206     kd = __round(z);
1207     ki = kd;
1208     r = z - kd;
1209
1210     /* exp(x) = 2^(k/N) * 2^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
1211     t = exp2f_T[ki % (1 << 5)];
1212     t += ki << (52 - 5);
1213     s = *(double*)&t;
1214     z = C[0] * r + C[1];
1215     r2 = r * r;
1216     y = C[2] * r + 1;
1217     y = z * r2 + y;
1218     y = y * s;
1219     return y;
1220 }
1221
1222 /*********************************************************************
1223  *      fmodf (MSVCRT.@)
1224  *
1225  * Copied from musl: src/math/fmodf.c
1226  */
1227 float CDECL fmodf( float x, float y )
1228 {
1229     UINT32 xi = *(UINT32*)&x;
1230     UINT32 yi = *(UINT32*)&y;
1231     int ex = xi>>23 & 0xff;
1232     int ey = yi>>23 & 0xff;
1233     UINT32 sx = xi & 0x80000000;
1234     UINT32 i;
1235
1236     if (isinf(x)) return math_error(_DOMAIN, "fmodf", x, y, (x * y) / (x * y));
1237     if (yi << 1 == 0 || isnan(y) || ex == 0xff)
1238         return (x * y) / (x * y);
1239     if (xi << 1 <= yi << 1) {
1240         if (xi << 1 == yi << 1)
1241             return 0 * x;
1242         return x;
1243     }
1244
1245     /* normalize x and y */
1246     if (!ex) {
1247         for (i = xi << 9; i >> 31 == 0; ex--, i <<= 1);
1248         xi <<= -ex + 1;
1249     } else {
1250         xi &= -1U >> 9;
1251         xi |= 1U << 23;
1252     }
1253     if (!ey) {
1254         for (i = yi << 9; i >> 31 == 0; ey--, i <<= 1);
1255         yi <<= -ey + 1;
1256     } else {
1257         yi &= -1U >> 9;
1258         yi |= 1U << 23;
1259     }
1260
1261     /* x mod y */
1262     for (; ex > ey; ex--) {
1263         i = xi - yi;
1264         if (i >> 31 == 0) {
1265             if (i == 0)
1266                 return 0 * x;
1267             xi = i;
1268         }
1269         xi <<= 1;
1270     }
1271     i = xi - yi;
1272     if (i >> 31 == 0) {
1273         if (i == 0)
1274             return 0 * x;
1275         xi = i;
1276     }
1277     for (; xi>>23 == 0; xi <<= 1, ex--);
1278
1279     /* scale result up */
1280     if (ex > 0) {
1281         xi -= 1U << 23;
1282         xi |= (UINT32)ex << 23;
1283     } else {
1284         xi >>= -ex + 1;
1285     }
1286     xi |= sx;
1287     return *(float*)&xi;
1288 }
1289
1290 /*********************************************************************
1291  *      logf (MSVCRT.@)
1292  *
1293  * Copied from musl: src/math/logf.c src/math/logf_data.c
1294  */
1295 float CDECL logf( float x )
1296 {
1297     static const double Ln2 = 0x1.62e42fefa39efp-1;
1298     static const double A[] = {
1299         -0x1.00ea348b88334p-2,
1300         0x1.5575b0be00b6ap-2,
1301         -0x1.ffffef20a4123p-2
1302     };
1303     static const struct {
1304         double invc, logc;
1305     } T[] = {
1306         { 0x1.661ec79f8f3bep+0, -0x1.57bf7808caadep-2 },
1307         { 0x1.571ed4aaf883dp+0, -0x1.2bef0a7c06ddbp-2 },
1308         { 0x1.49539f0f010bp+0, -0x1.01eae7f513a67p-2 },
1309         { 0x1.3c995b0b80385p+0, -0x1.b31d8a68224e9p-3 },
1310         { 0x1.30d190c8864a5p+0, -0x1.6574f0ac07758p-3 },
1311         { 0x1.25e227b0b8eap+0, -0x1.1aa2bc79c81p-3 },
1312         { 0x1.1bb4a4a1a343fp+0, -0x1.a4e76ce8c0e5ep-4 },
1313         { 0x1.12358f08ae5bap+0, -0x1.1973c5a611cccp-4 },
1314         { 0x1.0953f419900a7p+0, -0x1.252f438e10c1ep-5 },
1315         { 0x1p+0, 0x0p+0 },
1316         { 0x1.e608cfd9a47acp-1, 0x1.aa5aa5df25984p-5 },
1317         { 0x1.ca4b31f026aap-1, 0x1.c5e53aa362eb4p-4 },
1318         { 0x1.b2036576afce6p-1, 0x1.526e57720db08p-3 },
1319         { 0x1.9c2d163a1aa2dp-1, 0x1.bc2860d22477p-3 },
1320         { 0x1.886e6037841edp-1, 0x1.1058bc8a07ee1p-2 },
1321         { 0x1.767dcf5534862p-1, 0x1.4043057b6ee09p-2 }
1322     };
1323
1324     double z, r, r2, y, y0, invc, logc;
1325     UINT32 ix, iz, tmp;
1326     int k, i;
1327
1328     ix = *(UINT32*)&x;
1329     /* Fix sign of zero with downward rounding when x==1. */
1330     if (ix == 0x3f800000)
1331         return 0;
1332     if (ix - 0x00800000 >= 0x7f800000 - 0x00800000) {
1333         /* x < 0x1p-126 or inf or nan. */
1334         if (ix * 2 == 0)
1335             return math_error(_SING, "logf", x, 0, (ix & 0x80000000 ? 1.0 : -1.0) / x);
1336         if (ix == 0x7f800000) /* log(inf) == inf. */
1337             return x;
1338         if (ix * 2 > 0xff000000)
1339             return x;
1340         if (ix & 0x80000000)
1341             return math_error(_DOMAIN, "logf", x, 0, (x - x) / (x - x));
1342         /* x is subnormal, normalize it. */
1343         x *= 0x1p23f;
1344         ix = *(UINT32*)&x;
1345         ix -= 23 << 23;
1346     }
1347
1348     /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
1349        The range is split into N subintervals.
1350        The ith subinterval contains z and c is near its center. */
1351     tmp = ix - 0x3f330000;
1352     i = (tmp >> (23 - 4)) % (1 << 4);
1353     k = (INT32)tmp >> 23; /* arithmetic shift */
1354     iz = ix - (tmp & (0x1ffu << 23));
1355     invc = T[i].invc;
1356     logc = T[i].logc;
1357     z = *(float*)&iz;
1358
1359     /* log(x) = log1p(z/c-1) + log(c) + k*Ln2 */
1360     r = z * invc - 1;
1361     y0 = logc + (double)k * Ln2;
1362
1363     /* Pipelined polynomial evaluation to approximate log1p(r). */
1364     r2 = r * r;
1365     y = A[1] * r + A[2];
1366     y = A[0] * r2 + y;
1367     y = y * r2 + (y0 + r);
1368     return y;
1369 }
1370
1371 /*********************************************************************
1372  *      log10f (MSVCRT.@)
1373  */
1374 float CDECL log10f( float x )
1375 {
1376     static const float ivln10hi = 4.3432617188e-01,
1377         ivln10lo = -3.1689971365e-05,
1378         log10_2hi = 3.0102920532e-01,
1379         log10_2lo = 7.9034151668e-07,
1380         Lg1 = 0xaaaaaa.0p-24,
1381         Lg2 = 0xccce13.0p-25,
1382         Lg3 = 0x91e9ee.0p-25,
1383         Lg4 = 0xf89e26.0p-26;
1384
1385     union {float f; UINT32 i;} u = {x};
1386     float hfsq, f, s, z, R, w, t1, t2, dk, hi, lo;
1387     UINT32 ix;
1388     int k;
1389
1390     ix = u.i;
1391     k = 0;
1392     if (ix < 0x00800000 || ix >> 31) { /* x < 2**-126 */
1393         if (ix << 1 == 0)
1394             return math_error(_SING, "log10f", x, 0, -1 / (x * x));
1395         if ((ix & ~(1u << 31)) > 0x7f800000)
1396             return x;
1397         if (ix >> 31)
1398             return math_error(_DOMAIN, "log10f", x, 0, (x - x) / (x - x));
1399         /* subnormal number, scale up x */
1400         k -= 25;
1401         x *= 0x1p25f;
1402         u.f = x;
1403         ix = u.i;
1404     } else if (ix >= 0x7f800000) {
1405         return x;
1406     } else if (ix == 0x3f800000)
1407         return 0;
1408
1409     /* reduce x into [sqrt(2)/2, sqrt(2)] */
1410     ix += 0x3f800000 - 0x3f3504f3;
1411     k += (int)(ix >> 23) - 0x7f;
1412     ix = (ix & 0x007fffff) + 0x3f3504f3;
1413     u.i = ix;
1414     x = u.f;
1415
1416     f = x - 1.0f;
1417     s = f / (2.0f + f);
1418     z = s * s;
1419     w = z * z;
1420     t1= w * (Lg2 + w * Lg4);
1421     t2= z * (Lg1 + w * Lg3);
1422     R = t2 + t1;
1423     hfsq = 0.5f * f * f;
1424
1425     hi = f - hfsq;
1426     u.f = hi;
1427     u.i &= 0xfffff000;
1428     hi = u.f;
1429     lo = f - hi - hfsq + s * (hfsq + R);
1430     dk = k;
1431     return dk * log10_2lo + (lo + hi) * ivln10lo + lo * ivln10hi + hi * ivln10hi + dk * log10_2hi;
1432 }
1433
1434 /* Subnormal input is normalized so ix has negative biased exponent.
1435    Output is multiplied by POWF_SCALE (where 1 << 5). */
1436 static double powf_log2(UINT32 ix)
1437 {
1438     static const struct {
1439         double invc, logc;
1440     } T[] = {
1441         { 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 * (1 << 5) },
1442         { 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 * (1 << 5) },
1443         { 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 * (1 << 5) },
1444         { 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 * (1 << 5) },
1445         { 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 * (1 << 5) },
1446         { 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 * (1 << 5) },
1447         { 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 * (1 << 5) },
1448         { 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 * (1 << 5) },
1449         { 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 * (1 << 5) },
1450         { 0x1p+0, 0x0p+0 * (1 << 4) },
1451         { 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 * (1 << 5) },
1452         { 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 * (1 << 5) },
1453         { 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 * (1 << 5) },
1454         { 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 * (1 << 5) },
1455         { 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 * (1 << 5) },
1456         { 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 * (1 << 5) }
1457     };
1458     static const double A[] = {
1459         0x1.27616c9496e0bp-2 * (1 << 5), -0x1.71969a075c67ap-2 * (1 << 5),
1460         0x1.ec70a6ca7baddp-2 * (1 << 5), -0x1.7154748bef6c8p-1 * (1 << 5),
1461         0x1.71547652ab82bp0 * (1 << 5)
1462     };
1463
1464     double z, r, r2, r4, p, q, y, y0, invc, logc;
1465     UINT32 iz, top, tmp;
1466     int k, i;
1467
1468     /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
1469        The range is split into N subintervals.
1470        The ith subinterval contains z and c is near its center. */
1471     tmp = ix - 0x3f330000;
1472     i = (tmp >> (23 - 4)) % (1 << 4);
1473     top = tmp & 0xff800000;
1474     iz = ix - top;
1475     k = (INT32)top >> (23 - 5); /* arithmetic shift */
1476     invc = T[i].invc;
1477     logc = T[i].logc;
1478     z = *(float*)&iz;
1479
1480     /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
1481     r = z * invc - 1;
1482     y0 = logc + (double)k;
1483
1484     /* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */
1485     r2 = r * r;
1486     y = A[0] * r + A[1];
1487     p = A[2] * r + A[3];
1488     r4 = r2 * r2;
1489     q = A[4] * r + y0;
1490     q = p * r2 + q;
1491     y = y * r4 + q;
1492     return y;
1493 }
1494
1495 /* The output of log2 and thus the input of exp2 is either scaled by N
1496    (in case of fast toint intrinsics) or not. The unscaled xd must be
1497    in [-1021,1023], sign_bias sets the sign of the result. */
1498 static float powf_exp2(double xd, UINT32 sign_bias)
1499 {
1500     static const double C[] = {
1501         0x1.c6af84b912394p-5 / (1 << 5) / (1 << 5) / (1 << 5),
1502         0x1.ebfce50fac4f3p-3 / (1 << 5) / (1 << 5),
1503         0x1.62e42ff0c52d6p-1 / (1 << 5)
1504     };
1505
1506     UINT64 ki, ski, t;
1507     double kd, z, r, r2, y, s;
1508
1509     /* N*x = k + r with r in [-1/2, 1/2] */
1510     kd = __round(xd); /* k */
1511     ki = kd;
1512     r = xd - kd;
1513
1514     /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
1515     t = exp2f_T[ki % (1 << 5)];
1516     ski = ki + sign_bias;
1517     t += ski << (52 - 5);
1518     s = *(double*)&t;
1519     z = C[0] * r + C[1];
1520     r2 = r * r;
1521     y = C[2] * r + 1;
1522     y = z * r2 + y;
1523     y = y * s;
1524     return y;
1525 }
1526
1527 /* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
1528    the bit representation of a non-zero finite floating-point value. */
1529 static int powf_checkint(UINT32 iy)
1530 {
1531     int e = iy >> 23 & 0xff;
1532     if (e < 0x7f)
1533         return 0;
1534     if (e > 0x7f + 23)
1535         return 2;
1536     if (iy & ((1 << (0x7f + 23 - e)) - 1))
1537         return 0;
1538     if (iy & (1 << (0x7f + 23 - e)))
1539         return 1;
1540     return 2;
1541 }
1542
1543 /*********************************************************************
1544  *      powf (MSVCRT.@)
1545  *
1546  * Copied from musl: src/math/powf.c src/math/powf_data.c
1547  */
1548 float CDECL powf( float x, float y )
1549 {
1550     UINT32 sign_bias = 0;
1551     UINT32 ix, iy;
1552     double logx, ylogx;
1553
1554     ix = *(UINT32*)&x;
1555     iy = *(UINT32*)&y;
1556     if (ix - 0x00800000 >= 0x7f800000 - 0x00800000 ||
1557             2 * iy - 1 >= 2u * 0x7f800000 - 1) {
1558         /* Either (x < 0x1p-126 or inf or nan) or (y is 0 or inf or nan). */
1559         if (2 * iy - 1 >= 2u * 0x7f800000 - 1) {
1560             if (2 * iy == 0)
1561                 return 1.0f;
1562             if (ix == 0x3f800000)
1563                 return 1.0f;
1564             if (2 * ix > 2u * 0x7f800000 || 2 * iy > 2u * 0x7f800000)
1565                 return x + y;
1566             if (2 * ix == 2 * 0x3f800000)
1567                 return 1.0f;
1568             if ((2 * ix < 2 * 0x3f800000) == !(iy & 0x80000000))
1569                 return 0.0f; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
1570             return y * y;
1571         }
1572         if (2 * ix - 1 >= 2u * 0x7f800000 - 1) {
1573             float x2 = x * x;
1574             if (ix & 0x80000000 && powf_checkint(iy) == 1)
1575                 x2 = -x2;
1576             if (iy & 0x80000000 && x2 == 0.0)
1577                 return math_error(_SING, "powf", x, y, 1 / x2);
1578             /* Without the barrier some versions of clang hoist the 1/x2 and
1579                thus division by zero exception can be signaled spuriously. */
1580             return iy & 0x80000000 ? fp_barrierf(1 / x2) : x2;
1581         }
1582         /* x and y are non-zero finite. */
1583         if (ix & 0x80000000) {
1584             /* Finite x < 0. */
1585             int yint = powf_checkint(iy);
1586             if (yint == 0)
1587                 return math_error(_DOMAIN, "powf", x, y, 0 / (x - x));
1588             if (yint == 1)
1589                 sign_bias = 1 << (5 + 11);
1590             ix &= 0x7fffffff;
1591         }
1592         if (ix < 0x00800000) {
1593             /* Normalize subnormal x so exponent becomes negative. */
1594             x *= 0x1p23f;
1595             ix = *(UINT32*)&x;
1596             ix &= 0x7fffffff;
1597             ix -= 23 << 23;
1598         }
1599     }
1600     logx = powf_log2(ix);
1601     ylogx = y * logx; /* cannot overflow, y is single prec. */
1602     if ((*(UINT64*)&ylogx >> 47 & 0xffff) >= 0x40af800000000000llu >> 47) {
1603         /* |y*log(x)| >= 126. */
1604         if (ylogx > 0x1.fffffffd1d571p+6 * (1 << 5))
1605             return math_error(_OVERFLOW, "powf", x, y, (sign_bias ? -1.0 : 1.0) * 0x1p1023);
1606         if (ylogx <= -150.0 * (1 << 5))
1607             return math_error(_UNDERFLOW, "powf", x, y, (sign_bias ? -1.0 : 1.0) / 0x1p1023);
1608     }
1609     return powf_exp2(ylogx, sign_bias);
1610 }
1611
1612 /*********************************************************************
1613  *      sinf (MSVCRT.@)
1614  *
1615  * Copied from musl: src/math/sinf.c
1616  */
1617 float CDECL sinf( float x )
1618 {
1619     static const double s1pio2 = 1*M_PI_2,
1620         s2pio2 = 2*M_PI_2,
1621         s3pio2 = 3*M_PI_2,
1622         s4pio2 = 4*M_PI_2;
1623
1624     double y;
1625     UINT32 ix;
1626     int n, sign;
1627
1628     ix = *(UINT32*)&x;
1629     sign = ix >> 31;
1630     ix &= 0x7fffffff;
1631
1632     if (ix <= 0x3f490fda) { /* |x| ~<= pi/4 */
1633         if (ix < 0x39800000) { /* |x| < 2**-12 */
1634             /* raise inexact if x!=0 and underflow if subnormal */
1635             fp_barrierf(ix < 0x00800000 ? x / 0x1p120f : x + 0x1p120f);
1636             return x;
1637         }
1638         return __sindf(x);
1639     }
1640     if (ix <= 0x407b53d1) { /* |x| ~<= 5*pi/4 */
1641         if (ix <= 0x4016cbe3) { /* |x| ~<= 3pi/4 */
1642             if (sign)
1643                 return -__cosdf(x + s1pio2);
1644             else
1645                 return __cosdf(x - s1pio2);
1646         }
1647         return __sindf(sign ? -(x + s2pio2) : -(x - s2pio2));
1648     }
1649     if (ix <= 0x40e231d5) { /* |x| ~<= 9*pi/4 */
1650         if (ix <= 0x40afeddf) { /* |x| ~<= 7*pi/4 */
1651             if (sign)
1652                 return __cosdf(x + s3pio2);
1653             else
1654                 return -__cosdf(x - s3pio2);
1655         }
1656         return __sindf(sign ? x + s4pio2 : x - s4pio2);
1657     }
1658
1659     /* sin(Inf or NaN) is NaN */
1660     if (isinf(x))
1661         return math_error(_DOMAIN, "sinf", x, 0, x - x);
1662     if (ix >= 0x7f800000)
1663         return x - x;
1664
1665     /* general argument reduction needed */
1666     n = __rem_pio2f(x, &y);
1667     switch (n&3) {
1668     case 0: return __sindf(y);
1669     case 1: return __cosdf(y);
1670     case 2: return __sindf(-y);
1671     default: return -__cosdf(y);
1672     }
1673 }
1674
1675 /*********************************************************************
1676  *      sinhf (MSVCRT.@)
1677  */
1678 float CDECL sinhf( float x )
1679 {
1680     UINT32 ui = *(UINT32*)&x;
1681     float t, h, absx;
1682
1683     h = 0.5;
1684     if (ui >> 31)
1685         h = -h;
1686     /* |x| */
1687     ui &= 0x7fffffff;
1688     absx = *(float*)&ui;
1689
1690     /* |x| < log(FLT_MAX) */
1691     if (ui < 0x42b17217) {
1692         t = __expm1f(absx);
1693         if (ui < 0x3f800000) {
1694             if (ui < 0x3f800000 - (12 << 23))
1695                 return x;
1696             return h * (2 * t - t * t / (t + 1));
1697         }
1698         return h * (t + t / (t + 1));
1699     }
1700
1701     /* |x| > logf(FLT_MAX) or nan */
1702     if (ui > 0x7f800000)
1703         *(DWORD*)&t = *(DWORD*)&x | 0x400000;
1704     else
1705         t = __expo2f(absx, 2 * h);
1706     return t;
1707 }
1708
1709 static BOOL sqrtf_validate( float *x )
1710 {
1711     short c = _fdclass(*x);
1712
1713     if (c == FP_ZERO) return FALSE;
1714     if (c == FP_NAN) return FALSE;
1715     if (signbit(*x))
1716     {
1717         *x = math_error(_DOMAIN, "sqrtf", *x, 0, ret_nan(TRUE));
1718         return FALSE;
1719     }
1720     if (c == FP_INFINITE) return FALSE;
1721     return TRUE;
1722 }
1723
1724 #if defined(__x86_64__) || defined(__i386__)
1725 float CDECL sse2_sqrtf(float);
1726 __ASM_GLOBAL_FUNC( sse2_sqrtf,
1727         "sqrtss %xmm0, %xmm0\n\t"
1728         "ret" )
1729 #endif
1730
1731 /*********************************************************************
1732  *      sqrtf (MSVCRT.@)
1733  *
1734  * Copied from musl: src/math/sqrtf.c
1735  */
1736 float CDECL sqrtf( float x )
1737 {
1738 #ifdef __x86_64__
1739     if (!sqrtf_validate(&x))
1740         return x;
1741
1742     return sse2_sqrtf(x);
1743 #else
1744     static const float tiny = 1.0e-30;
1745
1746     float z;
1747     int ix,s,q,m,t,i;
1748     unsigned int r;
1749
1750     ix = *(int*)&x;
1751
1752     if (!sqrtf_validate(&x))
1753         return x;
1754
1755     /* normalize x */
1756     m = ix >> 23;
1757     if (m == 0) {  /* subnormal x */
1758         for (i = 0; (ix & 0x00800000) == 0; i++)
1759             ix <<= 1;
1760         m -= i - 1;
1761     }
1762     m -= 127;  /* unbias exponent */
1763     ix = (ix & 0x007fffff) | 0x00800000;
1764     if (m & 1)  /* odd m, double x to make it even */
1765         ix += ix;
1766     m >>= 1;  /* m = [m/2] */
1767
1768     /* generate sqrt(x) bit by bit */
1769     ix += ix;
1770     q = s = 0;       /* q = sqrt(x) */
1771     r = 0x01000000;  /* r = moving bit from right to left */
1772
1773     while (r != 0) {
1774         t = s + r;
1775         if (t <= ix) {
1776             s = t + r;
1777             ix -= t;
1778             q += r;
1779         }
1780         ix += ix;
1781         r >>= 1;
1782     }
1783
1784     /* use floating add to find out rounding direction */
1785     if (ix != 0) {
1786         z = 1.0f - tiny; /* raise inexact flag */
1787         if (z >= 1.0f) {
1788             z = 1.0f + tiny;
1789             if (z > 1.0f)
1790                 q += 2;
1791             else
1792                 q += q & 1;
1793         }
1794     }
1795     ix = (q >> 1) + 0x3f000000;
1796     r = ix + ((unsigned int)m << 23);
1797     z = *(float*)&r;
1798     return z;
1799 #endif
1800 }
1801
1802 /* Copied from musl: src/math/__tandf.c */
1803 static float __tandf(double x, int odd)
1804 {
1805     static const double T[] = {
1806         0x15554d3418c99f.0p-54,
1807         0x1112fd38999f72.0p-55,
1808         0x1b54c91d865afe.0p-57,
1809         0x191df3908c33ce.0p-58,
1810         0x185dadfcecf44e.0p-61,
1811         0x1362b9bf971bcd.0p-59,
1812     };
1813
1814     double z, r, w, s, t, u;
1815
1816     z = x * x;
1817     r = T[4] + z * T[5];
1818     t = T[2] + z * T[3];
1819     w = z * z;
1820     s = z * x;
1821     u = T[0] + z * T[1];
1822     r = (x + s * u) + (s * w) * (t + w * r);
1823     return odd ? -1.0 / r : r;
1824 }
1825
1826 /*********************************************************************
1827  *      tanf (MSVCRT.@)
1828  *
1829  * Copied from musl: src/math/tanf.c
1830  */
1831 float CDECL tanf( float x )
1832 {
1833     static const double t1pio2 = 1*M_PI_2,
1834         t2pio2 = 2*M_PI_2,
1835         t3pio2 = 3*M_PI_2,
1836         t4pio2 = 4*M_PI_2;
1837
1838     double y;
1839     UINT32 ix;
1840     unsigned n, sign;
1841
1842     ix = *(UINT32*)&x;
1843     sign = ix >> 31;
1844     ix &= 0x7fffffff;
1845
1846     if (ix <= 0x3f490fda) { /* |x| ~<= pi/4 */
1847         if (ix < 0x39800000) { /* |x| < 2**-12 */
1848             /* raise inexact if x!=0 and underflow if subnormal */
1849             fp_barrierf(ix < 0x00800000 ? x / 0x1p120f : x + 0x1p120f);
1850             return x;
1851         }
1852         return __tandf(x, 0);
1853     }
1854     if (ix <= 0x407b53d1) { /* |x| ~<= 5*pi/4 */
1855         if (ix <= 0x4016cbe3) /* |x| ~<= 3pi/4 */
1856             return __tandf((sign ? x + t1pio2 : x - t1pio2), 1);
1857         else
1858             return __tandf((sign ? x + t2pio2 : x - t2pio2), 0);
1859     }
1860     if (ix <= 0x40e231d5) { /* |x| ~<= 9*pi/4 */
1861         if (ix <= 0x40afeddf) /* |x| ~<= 7*pi/4 */
1862             return __tandf((sign ? x + t3pio2 : x - t3pio2), 1);
1863         else
1864             return __tandf((sign ? x + t4pio2 : x - t4pio2), 0);
1865     }
1866
1867     /* tan(Inf or NaN) is NaN */
1868     if (isinf(x))
1869         return math_error(_DOMAIN, "tanf", x, 0, x - x);
1870     if (ix >= 0x7f800000)
1871         return x - x;
1872
1873     /* argument reduction */
1874     n = __rem_pio2f(x, &y);
1875     return __tandf(y, n & 1);
1876 }
1877
1878 /*********************************************************************
1879  *      tanhf (MSVCRT.@)
1880  */
1881 float CDECL tanhf( float x )
1882 {
1883     UINT32 ui = *(UINT32*)&x;
1884     UINT32 sign = ui & 0x80000000;
1885     float t;
1886
1887     /* x = |x| */
1888     ui &= 0x7fffffff;
1889     x = *(float*)&ui;
1890
1891     if (ui > 0x3f0c9f54) {
1892         /* |x| > log(3)/2 ~= 0.5493 or nan */
1893         if (ui > 0x41200000) {
1894             if (ui > 0x7f800000) {
1895                 *(UINT32*)&x = ui | sign | 0x400000;
1896 #if _MSVCR_VER < 140
1897                 return math_error(_DOMAIN, "tanhf", x, 0, x);
1898 #else
1899                 return x;
1900 #endif
1901             }
1902             /* |x| > 10 */
1903             fp_barrierf(x + 0x1p120f);
1904             t = 1 + 0 / x;
1905         } else {
1906             t = __expm1f(2 * x);
1907             t = 1 - 2 / (t + 2);
1908         }
1909     } else if (ui > 0x3e82c578) {
1910         /* |x| > log(5/3)/2 ~= 0.2554 */
1911         t = __expm1f(2 * x);
1912         t = t / (t + 2);
1913     } else if (ui >= 0x00800000) {
1914         /* |x| >= 0x1p-126 */
1915         t = __expm1f(-2 * x);
1916         t = -t / (t + 2);
1917     } else {
1918         /* |x| is subnormal */
1919         fp_barrierf(x * x);
1920         t = x;
1921     }
1922     return sign ? -t : t;
1923 }
1924
1925 /*********************************************************************
1926  *      ceilf (MSVCRT.@)
1927  *
1928  * Copied from musl: src/math/ceilf.c
1929  */
1930 float CDECL ceilf( float x )
1931 {
1932     union {float f; UINT32 i;} u = {x};
1933     int e = (int)(u.i >> 23 & 0xff) - 0x7f;
1934     UINT32 m;
1935
1936     if (e >= 23)
1937         return x;
1938     if (e >= 0) {
1939         m = 0x007fffff >> e;
1940         if ((u.i & m) == 0)
1941             return x;
1942         if (u.i >> 31 == 0)
1943             u.i += m;
1944         u.i &= ~m;
1945     } else {
1946         if (u.i >> 31)
1947             return -0.0;
1948         else if (u.i << 1)
1949             return 1.0;
1950     }
1951     return u.f;
1952 }
1953
1954 /*********************************************************************
1955  *      floorf (MSVCRT.@)
1956  *
1957  * Copied from musl: src/math/floorf.c
1958  */
1959 float CDECL floorf( float x )
1960 {
1961     union {float f; UINT32 i;} u = {x};
1962     int e = (int)(u.i >> 23 & 0xff) - 0x7f;
1963     UINT32 m;
1964
1965     if (e >= 23)
1966         return x;
1967     if (e >= 0) {
1968         m = 0x007fffff >> e;
1969         if ((u.i & m) == 0)
1970             return x;
1971         if (u.i >> 31)
1972             u.i += m;
1973         u.i &= ~m;
1974     } else {
1975         if (u.i >> 31 == 0)
1976             return 0;
1977         else if (u.i << 1)
1978             return -1;
1979     }
1980     return u.f;
1981 }
1982
1983 /*********************************************************************
1984  *      frexpf (MSVCRT.@)
1985  *
1986  * Copied from musl: src/math/frexpf.c
1987  */
1988 float CDECL frexpf( float x, int *e )
1989 {
1990     UINT32 ux = *(UINT32*)&x;
1991     int ee = ux >> 23 & 0xff;
1992
1993     if (!ee) {
1994         if (x) {
1995             x = frexpf(x * 0x1p64, e);
1996             *e -= 64;
1997         } else *e = 0;
1998         return x;
1999     } else if (ee == 0xff) {
2000         return x;
2001     }
2002
2003     *e = ee - 0x7e;
2004     ux &= 0x807ffffful;
2005     ux |= 0x3f000000ul;
2006     return *(float*)&ux;
2007 }
2008
2009 /*********************************************************************
2010  *      modff (MSVCRT.@)
2011  *
2012  * Copied from musl: src/math/modff.c
2013  */
2014 float CDECL modff( float x, float *iptr )
2015 {
2016     union {float f; UINT32 i;} u = {x};
2017     UINT32 mask;
2018     int e = (u.i >> 23 & 0xff) - 0x7f;
2019
2020     /* no fractional part */
2021     if (e >= 23) {
2022         *iptr = x;
2023         if (e == 0x80 && u.i << 9 != 0) { /* nan */
2024             return x;
2025         }
2026         u.i &= 0x80000000;
2027         return u.f;
2028     }
2029     /* no integral part */
2030     if (e < 0) {
2031         u.i &= 0x80000000;
2032         *iptr = u.f;
2033         return x;
2034     }
2035
2036     mask = 0x007fffff >> e;
2037     if ((u.i & mask) == 0) {
2038         *iptr = x;
2039         u.i &= 0x80000000;
2040         return u.f;
2041     }
2042     u.i &= ~mask;
2043     *iptr = u.f;
2044     return x - u.f;
2045 }
2046
2047 #endif
2048
2049 #if !defined(__i386__) && !defined(__x86_64__) && (_MSVCR_VER == 0 || _MSVCR_VER >= 110)
2050
2051 /*********************************************************************
2052  *      fabsf (MSVCRT.@)
2053  *
2054  * Copied from musl: src/math/fabsf.c
2055  */
2056 float CDECL fabsf( float x )
2057 {
2058     union { float f; UINT32 i; } u = { x };
2059     u.i &= 0x7fffffff;
2060     return u.f;
2061 }
2062
2063 #endif
2064
2065 /*********************************************************************
2066  *              acos (MSVCRT.@)
2067  *
2068  * Copied from musl: src/math/acos.c
2069  */
2070 static double acos_R(double z)
2071 {
2072     static const double pS0 =  1.66666666666666657415e-01,
2073                  pS1 = -3.25565818622400915405e-01,
2074                  pS2 =  2.01212532134862925881e-01,
2075                  pS3 = -4.00555345006794114027e-02,
2076                  pS4 =  7.91534994289814532176e-04,
2077                  pS5 =  3.47933107596021167570e-05,
2078                  qS1 = -2.40339491173441421878e+00,
2079                  qS2 =  2.02094576023350569471e+00,
2080                  qS3 = -6.88283971605453293030e-01,
2081                  qS4 =  7.70381505559019352791e-02;
2082
2083     double p, q;
2084     p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5)))));
2085     q = 1.0 + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4)));
2086     return p/q;
2087 }
2088
2089 double CDECL acos( double x )
2090 {
2091     static const double pio2_hi = 1.57079632679489655800e+00,
2092                  pio2_lo = 6.12323399573676603587e-17;
2093
2094     double z, w, s, c, df;
2095     unsigned int hx, ix;
2096     ULONGLONG llx;
2097
2098     hx = *(ULONGLONG*)&x >> 32;
2099     ix = hx & 0x7fffffff;
2100     /* |x| >= 1 or nan */
2101     if (ix >= 0x3ff00000) {
2102         unsigned int lx;
2103
2104         lx = *(ULONGLONG*)&x;
2105         if (((ix - 0x3ff00000) | lx) == 0) {
2106             /* acos(1)=0, acos(-1)=pi */
2107             if (hx >> 31)
2108                 return 2 * pio2_hi + 7.5231638452626401e-37;
2109             return 0;
2110         }
2111         if (isnan(x)) return x;
2112         return math_error(_DOMAIN, "acos", x, 0, 0 / (x - x));
2113     }
2114     /* |x| < 0.5 */
2115     if (ix < 0x3fe00000) {
2116         if (ix <= 0x3c600000)  /* |x| < 2**-57 */
2117             return pio2_hi + 7.5231638452626401e-37;
2118         return pio2_hi - (x - (pio2_lo - x * acos_R(x * x)));
2119     }
2120     /* x < -0.5 */
2121     if (hx >> 31) {
2122         z = (1.0 + x) * 0.5;
2123         s = sqrt(z);
2124         w = acos_R(z) * s - pio2_lo;
2125         return 2 * (pio2_hi - (s + w));
2126     }
2127     /* x > 0.5 */
2128     z = (1.0 - x) * 0.5;
2129     s = sqrt(z);
2130     df = s;
2131     llx = (*(ULONGLONG*)&df >> 32) << 32;
2132     df = *(double*)&llx;
2133     c = (z - df * df) / (s + df);
2134     w = acos_R(z) * s + c;
2135     return 2 * (df + w);
2136 }
2137
2138 /*********************************************************************
2139  *              asin (MSVCRT.@)
2140  *
2141  * Copied from musl: src/math/asin.c
2142  */
2143 static double asin_R(double z)
2144 {
2145     /* coefficients for R(x^2) */
2146     static const double pS0 =  1.66666666666666657415e-01,
2147                  pS1 = -3.25565818622400915405e-01,
2148                  pS2 =  2.01212532134862925881e-01,
2149                  pS3 = -4.00555345006794114027e-02,
2150                  pS4 =  7.91534994289814532176e-04,
2151                  pS5 =  3.47933107596021167570e-05,
2152                  qS1 = -2.40339491173441421878e+00,
2153                  qS2 =  2.02094576023350569471e+00,
2154                  qS3 = -6.88283971605453293030e-01,
2155                  qS4 =  7.70381505559019352791e-02;
2156
2157     double p, q;
2158     p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5)))));
2159     q = 1.0 + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4)));
2160     return p / q;
2161 }
2162
2163 #ifdef __i386__
2164 double CDECL x87_asin(double);
2165 __ASM_GLOBAL_FUNC( x87_asin,
2166         "fldl 4(%esp)\n\t"
2167         SET_X87_CW(~0x37f)
2168         "fld %st\n\t"
2169         "fld1\n\t"
2170         "fsubp\n\t"
2171         "fld1\n\t"
2172         "fadd %st(2)\n\t"
2173         "fmulp\n\t"
2174         "fsqrt\n\t"
2175         "fpatan\n\t"
2176         RESET_X87_CW
2177         "ret" )
2178 #endif
2179
2180 double CDECL asin( double x )
2181 {
2182     static const double pio2_hi = 1.57079632679489655800e+00,
2183                  pio2_lo = 6.12323399573676603587e-17;
2184
2185     double z, r, s;
2186     unsigned int hx, ix;
2187     ULONGLONG llx;
2188 #ifdef __i386__
2189     unsigned int x87_cw, sse2_cw;
2190 #endif
2191
2192     hx = *(ULONGLONG*)&x >> 32;
2193     ix = hx & 0x7fffffff;
2194     /* |x| >= 1 or nan */
2195     if (ix >= 0x3ff00000) {
2196         unsigned int lx;
2197         lx = *(ULONGLONG*)&x;
2198         if (((ix - 0x3ff00000) | lx) == 0)
2199             /* asin(1) = +-pi/2 with inexact */
2200             return x * pio2_hi + 7.5231638452626401e-37;
2201         if (isnan(x))
2202         {
2203 #ifdef __i386__
2204             return math_error(_DOMAIN, "asin", x, 0, x);
2205 #else
2206             return x;
2207 #endif
2208         }
2209         return math_error(_DOMAIN, "asin", x, 0, 0 / (x - x));
2210     }
2211
2212 #ifdef __i386__
2213     __control87_2(0, 0, &x87_cw, &sse2_cw);
2214     if (!sse2_enabled || (x87_cw & _MCW_EM) != _MCW_EM
2215             || (sse2_cw & (_MCW_EM | _MCW_RC)) != _MCW_EM)
2216         return x87_asin(x);
2217 #endif
2218
2219     /* |x| < 0.5 */
2220     if (ix < 0x3fe00000) {
2221         /* if 0x1p-1022 <= |x| < 0x1p-26, avoid raising underflow */
2222         if (ix < 0x3e500000 && ix >= 0x00100000)
2223             return x;
2224         return x + x * asin_R(x * x);
2225     }
2226     /* 1 > |x| >= 0.5 */
2227     z = (1 - fabs(x)) * 0.5;
2228     s = sqrt(z);
2229     r = asin_R(z);
2230     if (ix >= 0x3fef3333) {  /* if |x| > 0.975 */
2231         x = pio2_hi - (2 * (s + s * r) - pio2_lo);
2232     } else {
2233         double f, c;
2234         /* f+c = sqrt(z) */
2235         f = s;
2236         llx = (*(ULONGLONG*)&f >> 32) << 32;
2237         f = *(double*)&llx;
2238         c = (z - f * f) / (s + f);
2239         x = 0.5 * pio2_hi - (2 * s * r - (pio2_lo - 2 * c) - (0.5 * pio2_hi - 2 * f));
2240     }
2241     if (hx >> 31)
2242         return -x;
2243     return x;
2244 }
2245
2246 /*********************************************************************
2247  *              atan (MSVCRT.@)
2248  *
2249  * Copied from musl: src/math/atan.c
2250  */
2251 double CDECL atan( double x )
2252 {
2253     static const double atanhi[] = {
2254         4.63647609000806093515e-01,
2255         7.85398163397448278999e-01,
2256         9.82793723247329054082e-01,
2257         1.57079632679489655800e+00,
2258     };
2259     static const double atanlo[] = {
2260         2.26987774529616870924e-17,
2261         3.06161699786838301793e-17,
2262         1.39033110312309984516e-17,
2263         6.12323399573676603587e-17,
2264     };
2265     static const double aT[] = {
2266         3.33333333333329318027e-01,
2267         -1.99999999998764832476e-01,
2268         1.42857142725034663711e-01,
2269         -1.11111104054623557880e-01,
2270         9.09088713343650656196e-02,
2271         -7.69187620504482999495e-02,
2272         6.66107313738753120669e-02,
2273         -5.83357013379057348645e-02,
2274         4.97687799461593236017e-02,
2275         -3.65315727442169155270e-02,
2276         1.62858201153657823623e-02,
2277     };
2278
2279     double w, s1, s2, z;
2280     unsigned int ix, sign;
2281     int id;
2282
2283 #if _MSVCR_VER == 0
2284     if (isnan(x)) return math_error(_DOMAIN, "atan", x, 0, x);
2285 #endif
2286
2287     ix = *(ULONGLONG*)&x >> 32;
2288     sign = ix >> 31;
2289     ix &= 0x7fffffff;
2290     if (ix >= 0x44100000) {   /* if |x| >= 2^66 */
2291         if (isnan(x))
2292             return x;
2293         z = atanhi[3] + 7.5231638452626401e-37;
2294         return sign ? -z : z;
2295     }
2296     if (ix < 0x3fdc0000) {    /* |x| < 0.4375 */
2297         if (ix < 0x3e400000) {  /* |x| < 2^-27 */
2298             if (ix < 0x00100000)
2299                 /* raise underflow for subnormal x */
2300                 fp_barrierf((float)x);
2301             return x;
2302         }
2303         id = -1;
2304     } else {
2305         x = fabs(x);
2306         if (ix < 0x3ff30000) {  /* |x| < 1.1875 */
2307             if (ix < 0x3fe60000) {  /*  7/16 <= |x| < 11/16 */
2308                 id = 0;
2309                 x = (2.0 * x - 1.0) / (2.0 + x);
2310             } else {                /* 11/16 <= |x| < 19/16 */
2311                 id = 1;
2312                 x = (x - 1.0) / (x + 1.0);
2313             }
2314         } else {
2315             if (ix < 0x40038000) {  /* |x| < 2.4375 */
2316                 id = 2;
2317                 x = (x - 1.5) / (1.0 + 1.5 * x);
2318             } else {                /* 2.4375 <= |x| < 2^66 */
2319                 id = 3;
2320                 x = -1.0 / x;
2321             }
2322         }
2323     }
2324     /* end of argument reduction */
2325     z = x * x;
2326     w = z * z;
2327     /* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
2328     s1 = z * (aT[0] + w * (aT[2] + w * (aT[4] + w * (aT[6] + w * (aT[8] + w * aT[10])))));
2329     s2 = w * (aT[1] + w * (aT[3] + w * (aT[5] + w * (aT[7] + w * aT[9]))));
2330     if (id < 0)
2331         return x - x * (s1 + s2);
2332     z = atanhi[id] - (x * (s1 + s2) - atanlo[id] - x);
2333     return sign ? -z : z;
2334 }
2335
2336 /*********************************************************************
2337  *              atan2 (MSVCRT.@)
2338  *
2339  * Copied from musl: src/math/atan2.c
2340  */
2341 double CDECL atan2( double y, double x )
2342 {
2343     static const double pi     = 3.1415926535897931160E+00,
2344                  pi_lo  = 1.2246467991473531772E-16;
2345
2346     double z;
2347     unsigned int m, lx, ly, ix, iy;
2348
2349     if (isnan(x) || isnan(y))
2350         return x+y;
2351     ix = *(ULONGLONG*)&x >> 32;
2352     lx = *(ULONGLONG*)&x;
2353     iy = *(ULONGLONG*)&y >> 32;
2354     ly = *(ULONGLONG*)&y;
2355     if (((ix - 0x3ff00000) | lx) == 0)  /* x = 1.0 */
2356         return atan(y);
2357     m = ((iy >> 31) & 1) | ((ix >> 30) & 2);  /* 2*sign(x)+sign(y) */
2358     ix = ix & 0x7fffffff;
2359     iy = iy & 0x7fffffff;
2360
2361     /* when y = 0 */
2362     if ((iy | ly) == 0) {
2363         switch(m) {
2364         case 0:
2365         case 1: return y;   /* atan(+-0,+anything)=+-0 */
2366         case 2: return pi;  /* atan(+0,-anything) = pi */
2367         case 3: return -pi; /* atan(-0,-anything) =-pi */
2368         }
2369     }
2370     /* when x = 0 */
2371     if ((ix | lx) == 0)
2372         return m & 1 ? -pi / 2 : pi / 2;
2373     /* when x is INF */
2374     if (ix == 0x7ff00000) {
2375         if (iy == 0x7ff00000) {
2376             switch(m) {
2377             case 0: return pi / 4;      /* atan(+INF,+INF) */
2378             case 1: return -pi / 4;     /* atan(-INF,+INF) */
2379             case 2: return 3 * pi / 4;  /* atan(+INF,-INF) */
2380             case 3: return -3 * pi / 4; /* atan(-INF,-INF) */
2381             }
2382         } else {
2383             switch(m) {
2384             case 0: return 0.0;  /* atan(+...,+INF) */
2385             case 1: return -0.0; /* atan(-...,+INF) */
2386             case 2: return pi;   /* atan(+...,-INF) */
2387             case 3: return -pi;  /* atan(-...,-INF) */
2388             }
2389         }
2390     }
2391     /* |y/x| > 0x1p64 */
2392     if (ix + (64 << 20) < iy || iy == 0x7ff00000)
2393         return m & 1 ? -pi / 2 : pi / 2;
2394
2395     /* z = atan(|y/x|) without spurious underflow */
2396     if ((m & 2) && iy + (64 << 20) < ix)  /* |y/x| < 0x1p-64, x<0 */
2397         z = 0;
2398     else
2399         z = atan(fabs(y / x));
2400     switch (m) {
2401     case 0: return z;                /* atan(+,+) */
2402     case 1: return -z;               /* atan(-,+) */
2403     case 2: return pi - (z - pi_lo); /* atan(+,-) */
2404     default: /* case 3 */
2405         return (z - pi_lo) - pi;     /* atan(-,-) */
2406     }
2407 }
2408
2409 /* Copied from musl: src/math/rint.c */
2410 static double __rint(double x)
2411 {
2412     static const double toint = 1 / DBL_EPSILON;
2413
2414     ULONGLONG llx = *(ULONGLONG*)&x;
2415     int e = llx >> 52 & 0x7ff;
2416     int s = llx >> 63;
2417     unsigned cw;
2418     double y;
2419
2420     if (e >= 0x3ff+52)
2421         return x;
2422     cw = _controlfp(0, 0);
2423     if ((cw & _MCW_PC) != _PC_53)
2424         _controlfp(_PC_53, _MCW_PC);
2425     if (s)
2426         y = fp_barrier(x - toint) + toint;
2427     else
2428         y = fp_barrier(x + toint) - toint;
2429     if ((cw & _MCW_PC) != _PC_53)
2430         _controlfp(cw, _MCW_PC);
2431     if (y == 0)
2432         return s ? -0.0 : 0;
2433     return y;
2434 }
2435
2436 /* Copied from musl: src/math/__rem_pio2.c */
2437 static int __rem_pio2(double x, double *y)
2438 {
2439     static const double pio4    = 0x1.921fb54442d18p-1,
2440                  invpio2 = 6.36619772367581382433e-01,
2441                  pio2_1  = 1.57079632673412561417e+00,
2442                  pio2_1t = 6.07710050650619224932e-11,
2443                  pio2_2  = 6.07710050630396597660e-11,
2444                  pio2_2t = 2.02226624879595063154e-21,
2445                  pio2_3  = 2.02226624871116645580e-21,
2446                  pio2_3t = 8.47842766036889956997e-32;
2447
2448     union {double f; UINT64 i;} u = {x};
2449     double z, w, t, r, fn, tx[3], ty[2];
2450     UINT32 ix;
2451     int sign, n, ex, ey, i;
2452
2453     sign = u.i >> 63;
2454     ix = u.i >> 32 & 0x7fffffff;
2455     if (ix <= 0x400f6a7a) { /* |x| ~<= 5pi/4 */
2456         if ((ix & 0xfffff) == 0x921fb) /* |x| ~= pi/2 or 2pi/2 */
2457             goto medium; /* cancellation -- use medium case */
2458         if (ix <= 0x4002d97c) { /* |x| ~<= 3pi/4 */
2459             if (!sign) {
2460                 z = x - pio2_1; /* one round good to 85 bits */
2461                 y[0] = z - pio2_1t;
2462                 y[1] = (z - y[0]) - pio2_1t;
2463                 return 1;
2464             } else {
2465                 z = x + pio2_1;
2466                 y[0] = z + pio2_1t;
2467                 y[1] = (z - y[0]) + pio2_1t;
2468                 return -1;
2469             }
2470         } else {
2471             if (!sign) {
2472                 z = x - 2 * pio2_1;
2473                 y[0] = z - 2 * pio2_1t;
2474                 y[1] = (z - y[0]) - 2 * pio2_1t;
2475                 return 2;
2476             } else {
2477                 z = x + 2 * pio2_1;
2478                 y[0] = z + 2 * pio2_1t;
2479                 y[1] = (z - y[0]) + 2 * pio2_1t;
2480                 return -2;
2481             }
2482         }
2483     }
2484     if (ix <= 0x401c463b) { /* |x| ~<= 9pi/4 */
2485         if (ix <= 0x4015fdbc) { /* |x| ~<= 7pi/4 */
2486             if (ix == 0x4012d97c) /* |x| ~= 3pi/2 */
2487                 goto medium;
2488             if (!sign) {
2489                 z = x - 3 * pio2_1;
2490                 y[0] = z - 3 * pio2_1t;
2491                 y[1] = (z - y[0]) - 3 * pio2_1t;
2492                 return 3;
2493             } else {
2494                 z = x + 3 * pio2_1;
2495                 y[0] = z + 3 * pio2_1t;
2496                 y[1] = (z - y[0]) + 3 * pio2_1t;
2497                 return -3;
2498             }
2499         } else {
2500             if (ix == 0x401921fb) /* |x| ~= 4pi/2 */
2501                 goto medium;
2502             if (!sign) {
2503                 z = x - 4 * pio2_1;
2504                 y[0] = z - 4 * pio2_1t;
2505                 y[1] = (z - y[0]) - 4 * pio2_1t;
2506                 return 4;
2507             } else {
2508                 z = x + 4 * pio2_1;
2509                 y[0] = z + 4 * pio2_1t;
2510                 y[1] = (z - y[0]) + 4 * pio2_1t;
2511                 return -4;
2512             }
2513         }
2514     }
2515     if (ix < 0x413921fb) { /* |x| ~< 2^20*(pi/2), medium size */
2516 medium:
2517         fn = __rint(x * invpio2);
2518         n = (INT32)fn;
2519         r = x - fn * pio2_1;
2520         w = fn * pio2_1t; /* 1st round, good to 85 bits */
2521         /* Matters with directed rounding. */
2522         if (r - w < -pio4) {
2523             n--;
2524             fn--;
2525             r = x - fn * pio2_1;
2526             w = fn * pio2_1t;
2527         } else if (r - w > pio4) {
2528             n++;
2529             fn++;
2530             r = x - fn * pio2_1;
2531             w = fn * pio2_1t;
2532         }
2533         y[0] = r - w;
2534         u.f = y[0];
2535         ey = u.i >> 52 & 0x7ff;
2536         ex = ix >> 20;
2537         if (ex - ey > 16) { /* 2nd round, good to 118 bits */
2538             t = r;
2539             w = fn * pio2_2;
2540             r = t - w;
2541             w = fn * pio2_2t - ((t - r) - w);
2542             y[0] = r - w;
2543             u.f = y[0];
2544             ey = u.i >> 52 & 0x7ff;
2545             if (ex - ey > 49) { /* 3rd round, good to 151 bits, covers all cases */
2546                 t = r;
2547                 w = fn * pio2_3;
2548                 r = t - w;
2549                 w = fn * pio2_3t - ((t - r) - w);
2550                 y[0] = r - w;
2551             }
2552         }
2553         y[1] = (r - y[0]) - w;
2554         return n;
2555     }
2556     /*
2557      * all other (large) arguments
2558      */
2559     if (ix >= 0x7ff00000) {  /* x is inf or NaN */
2560         y[0] = y[1] = x - x;
2561         return 0;
2562     }
2563     /* set z = scalbn(|x|,-ilogb(x)+23) */
2564     u.f = x;
2565     u.i &= (UINT64)-1 >> 12;
2566     u.i |= (UINT64)(0x3ff + 23) << 52;
2567     z = u.f;
2568     for (i = 0; i < 2; i++) {
2569         tx[i] = (double)(INT32)z;
2570         z = (z - tx[i]) * 0x1p24;
2571     }
2572     tx[i] = z;
2573     /* skip zero terms, first term is non-zero */
2574     while (tx[i] == 0.0)
2575         i--;
2576     n = __rem_pio2_large(tx, ty, (int)(ix >> 20) - (0x3ff + 23), i + 1, 1);
2577     if (sign) {
2578         y[0] = -ty[0];
2579         y[1] = -ty[1];
2580         return -n;
2581     }
2582     y[0] = ty[0];
2583     y[1] = ty[1];
2584     return n;
2585 }
2586
2587 /* Copied from musl: src/math/__sin.c */
2588 static double __sin(double x, double y, int iy)
2589 {
2590     static const double S1  = -1.66666666666666324348e-01,
2591                  S2  =  8.33333333332248946124e-03,
2592                  S3  = -1.98412698298579493134e-04,
2593                  S4  =  2.75573137070700676789e-06,
2594                  S5  = -2.50507602534068634195e-08,
2595                  S6  =  1.58969099521155010221e-10;
2596
2597     double z, r, v, w;
2598
2599     z = x * x;
2600     w = z * z;
2601     r = S2 + z * (S3 + z * S4) + z * w * (S5 + z * S6);
2602     v = z * x;
2603     if (iy == 0)
2604         return x + v * (S1 + z * r);
2605     else
2606         return x - ((z * (0.5 * y - v * r) - y) - v * S1);
2607 }
2608
2609 /* Copied from musl: src/math/__cos.c */
2610 static double __cos(double x, double y)
2611 {
2612     static const double C1  =  4.16666666666666019037e-02,
2613                  C2  = -1.38888888888741095749e-03,
2614                  C3  =  2.48015872894767294178e-05,
2615                  C4  = -2.75573143513906633035e-07,
2616                  C5  =  2.08757232129817482790e-09,
2617                  C6  = -1.13596475577881948265e-11;
2618     double hz, z, r, w;
2619
2620     z = x * x;
2621     w = z * z;
2622     r = z * (C1 + z * (C2 + z * C3)) + w * w * (C4 + z * (C5 + z * C6));
2623     hz = 0.5 * z;
2624     w = 1.0 - hz;
2625     return w + (((1.0 - w) - hz) + (z * r - x * y));
2626 }
2627
2628 /*********************************************************************
2629  *              cos (MSVCRT.@)
2630  *
2631  * Copied from musl: src/math/cos.c
2632  */
2633 double CDECL cos( double x )
2634 {
2635     double y[2];
2636     UINT32 ix;
2637     unsigned n;
2638
2639     ix = *(ULONGLONG*)&x >> 32;
2640     ix &= 0x7fffffff;
2641
2642     /* |x| ~< pi/4 */
2643     if (ix <= 0x3fe921fb) {
2644         if (ix < 0x3e46a09e) { /* |x| < 2**-27 * sqrt(2) */
2645             /* raise inexact if x!=0 */
2646             fp_barrier(x + 0x1p120f);
2647             return 1.0;
2648         }
2649         return __cos(x, 0);
2650     }
2651
2652     /* cos(Inf or NaN) is NaN */
2653     if (isinf(x)) return math_error(_DOMAIN, "cos", x, 0, x - x);
2654     if (ix >= 0x7ff00000)
2655         return x - x;
2656
2657     /* argument reduction */
2658     n = __rem_pio2(x, y);
2659     switch (n & 3) {
2660     case 0: return __cos(y[0], y[1]);
2661     case 1: return -__sin(y[0], y[1], 1);
2662     case 2: return -__cos(y[0], y[1]);
2663     default: return __sin(y[0], y[1], 1);
2664     }
2665 }
2666
2667 /* Copied from musl: src/math/expm1.c */
2668 static double CDECL __expm1(double x)
2669 {
2670     static const double o_threshold = 7.09782712893383973096e+02,
2671         ln2_hi = 6.93147180369123816490e-01,
2672         ln2_lo = 1.90821492927058770002e-10,
2673         invln2 = 1.44269504088896338700e+00,
2674         Q1 = -3.33333333333331316428e-02,
2675         Q2 = 1.58730158725481460165e-03,
2676         Q3 = -7.93650757867487942473e-05,
2677         Q4 = 4.00821782732936239552e-06,
2678         Q5 = -2.01099218183624371326e-07;
2679
2680     double y, hi, lo, c, t, e, hxs, hfx, r1, twopk;
2681     union {double f; UINT64 i;} u = {x};
2682     UINT32 hx = u.i >> 32 & 0x7fffffff;
2683     int k, sign = u.i >> 63;
2684
2685     /* filter out huge and non-finite argument */
2686     if (hx >= 0x4043687A) { /* if |x|>=56*ln2 */
2687         if (isnan(x))
2688             return x;
2689         if (isinf(x))
2690             return sign ? -1 : x;
2691         if (sign)
2692             return math_error(_UNDERFLOW, "exp", x, 0, -1);
2693         if (x > o_threshold)
2694             return math_error(_OVERFLOW, "exp", x, 0, x * 0x1p1023);
2695     }
2696
2697     /* argument reduction */
2698     if (hx > 0x3fd62e42) { /* if |x| > 0.5 ln2 */
2699         if (hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */
2700             if (!sign) {
2701                 hi = x - ln2_hi;
2702                 lo = ln2_lo;
2703                 k = 1;
2704             } else {
2705                 hi = x + ln2_hi;
2706                 lo = -ln2_lo;
2707                 k = -1;
2708             }
2709         } else {
2710             k = invln2 * x + (sign ? -0.5 : 0.5);
2711             t = k;
2712             hi = x - t * ln2_hi; /* t*ln2_hi is exact here */
2713             lo = t * ln2_lo;
2714         }
2715         x = hi - lo;
2716         c = (hi - x) - lo;
2717     } else if (hx < 0x3c900000) { /* |x| < 2**-54, return x */
2718         fp_barrier(x + 0x1p120f);
2719         if (hx < 0x00100000)
2720             fp_barrier((float)x);
2721         return x;
2722     } else
2723         k = 0;
2724
2725     /* x is now in primary range */
2726     hfx = 0.5 * x;
2727     hxs = x * hfx;
2728     r1 = 1.0 + hxs * (Q1 + hxs * (Q2 + hxs * (Q3 + hxs * (Q4 + hxs * Q5))));
2729     t = 3.0 - r1 * hfx;
2730     e = hxs * ((r1 - t) / (6.0 - x * t));
2731     if (k == 0) /* c is 0 */
2732         return x - (x * e - hxs);
2733     e = x * (e - c) - c;
2734     e -= hxs;
2735     /* exp(x) ~ 2^k (x_reduced - e + 1) */
2736     if (k == -1)
2737         return 0.5 * (x - e) - 0.5;
2738     if (k == 1) {
2739         if (x < -0.25)
2740             return -2.0 * (e - (x + 0.5));
2741         return 1.0 + 2.0 * (x - e);
2742     }
2743     u.i = (UINT64)(0x3ff + k) << 52; /* 2^k */
2744     twopk = u.f;
2745     if (k < 0 || k > 56) { /* suffice to return exp(x)-1 */
2746         y = x - e + 1.0;
2747         if (k == 1024)
2748             y = y * 2.0 * 0x1p1023;
2749         else
2750             y = y * twopk;
2751         return y - 1.0;
2752     }
2753     u.i = (UINT64)(0x3ff - k) << 52; /* 2^-k */
2754     if (k < 20)
2755         y = (x - e + (1 - u.f)) * twopk;
2756     else
2757         y = (x - (e + u.f) + 1) * twopk;
2758     return y;
2759 }
2760
2761 static double __expo2(double x, double sign)
2762 {
2763     static const int k = 2043;
2764     static const double kln2 = 0x1.62066151add8bp+10;
2765     double scale;
2766
2767     *(UINT64*)&scale = (UINT64)(0x3ff + k / 2) << 52;
2768     return exp(x - kln2) * (sign * scale) * scale;
2769 }
2770
2771 /*********************************************************************
2772  *              cosh (MSVCRT.@)
2773  *
2774  * Copied from musl: src/math/cosh.c
2775  */
2776 double CDECL cosh( double x )
2777 {
2778     UINT64 ux = *(UINT64*)&x;
2779     UINT64 sign = ux & 0x8000000000000000ULL;
2780     UINT32 w;
2781     double t;
2782
2783     /* |x| */
2784     ux &= (uint64_t)-1 / 2;
2785     x = *(double*)&ux;
2786     w = ux >> 32;
2787
2788     /* |x| < log(2) */
2789     if (w < 0x3fe62e42) {
2790         if (w < 0x3ff00000 - (26 << 20)) {
2791             fp_barrier(x + 0x1p120f);
2792             return 1;
2793         }
2794         t = __expm1(x);
2795         return 1 + t * t / (2 * (1 + t));
2796     }
2797
2798     /* |x| < log(DBL_MAX) */
2799     if (w < 0x40862e42) {
2800         t = exp(x);
2801         /* note: if x>log(0x1p26) then the 1/t is not needed */
2802         return 0.5 * (t + 1 / t);
2803     }
2804
2805     /* |x| > log(DBL_MAX) or nan */
2806     /* note: the result is stored to handle overflow */
2807     if (ux > 0x7ff0000000000000ULL)
2808         *(UINT64*)&t = ux | sign | 0x0008000000000000ULL;
2809     else
2810         t = __expo2(x, 1.0);
2811     return t;
2812 }
2813
2814 /* Copied from musl: src/math/exp_data.c */
2815 static const UINT64 exp_T[] = {
2816     0x0ULL, 0x3ff0000000000000ULL,
2817     0x3c9b3b4f1a88bf6eULL, 0x3feff63da9fb3335ULL,
2818     0xbc7160139cd8dc5dULL, 0x3fefec9a3e778061ULL,
2819     0xbc905e7a108766d1ULL, 0x3fefe315e86e7f85ULL,
2820     0x3c8cd2523567f613ULL, 0x3fefd9b0d3158574ULL,
2821     0xbc8bce8023f98efaULL, 0x3fefd06b29ddf6deULL,
2822     0x3c60f74e61e6c861ULL, 0x3fefc74518759bc8ULL,
2823     0x3c90a3e45b33d399ULL, 0x3fefbe3ecac6f383ULL,
2824     0x3c979aa65d837b6dULL, 0x3fefb5586cf9890fULL,
2825     0x3c8eb51a92fdeffcULL, 0x3fefac922b7247f7ULL,
2826     0x3c3ebe3d702f9cd1ULL, 0x3fefa3ec32d3d1a2ULL,
2827     0xbc6a033489906e0bULL, 0x3fef9b66affed31bULL,
2828     0xbc9556522a2fbd0eULL, 0x3fef9301d0125b51ULL,
2829     0xbc5080ef8c4eea55ULL, 0x3fef8abdc06c31ccULL,
2830     0xbc91c923b9d5f416ULL, 0x3fef829aaea92de0ULL,
2831     0x3c80d3e3e95c55afULL, 0x3fef7a98c8a58e51ULL,
2832     0xbc801b15eaa59348ULL, 0x3fef72b83c7d517bULL,
2833     0xbc8f1ff055de323dULL, 0x3fef6af9388c8deaULL,
2834     0x3c8b898c3f1353bfULL, 0x3fef635beb6fcb75ULL,
2835     0xbc96d99c7611eb26ULL, 0x3fef5be084045cd4ULL,
2836     0x3c9aecf73e3a2f60ULL, 0x3fef54873168b9aaULL,
2837     0xbc8fe782cb86389dULL, 0x3fef4d5022fcd91dULL,
2838     0x3c8a6f4144a6c38dULL, 0x3fef463b88628cd6ULL,
2839     0x3c807a05b0e4047dULL, 0x3fef3f49917ddc96ULL,
2840     0x3c968efde3a8a894ULL, 0x3fef387a6e756238ULL,
2841     0x3c875e18f274487dULL, 0x3fef31ce4fb2a63fULL,
2842     0x3c80472b981fe7f2ULL, 0x3fef2b4565e27cddULL,
2843     0xbc96b87b3f71085eULL, 0x3fef24dfe1f56381ULL,
2844     0x3c82f7e16d09ab31ULL, 0x3fef1e9df51fdee1ULL,
2845     0xbc3d219b1a6fbffaULL, 0x3fef187fd0dad990ULL,
2846     0x3c8b3782720c0ab4ULL, 0x3fef1285a6e4030bULL,
2847     0x3c6e149289cecb8fULL, 0x3fef0cafa93e2f56ULL,
2848     0x3c834d754db0abb6ULL, 0x3fef06fe0a31b715ULL,
2849     0x3c864201e2ac744cULL, 0x3fef0170fc4cd831ULL,
2850     0x3c8fdd395dd3f84aULL, 0x3feefc08b26416ffULL,
2851     0xbc86a3803b8e5b04ULL, 0x3feef6c55f929ff1ULL,
2852     0xbc924aedcc4b5068ULL, 0x3feef1a7373aa9cbULL,
2853     0xbc9907f81b512d8eULL, 0x3feeecae6d05d866ULL,
2854     0xbc71d1e83e9436d2ULL, 0x3feee7db34e59ff7ULL,
2855     0xbc991919b3ce1b15ULL, 0x3feee32dc313a8e5ULL,
2856     0x3c859f48a72a4c6dULL, 0x3feedea64c123422ULL,
2857     0xbc9312607a28698aULL, 0x3feeda4504ac801cULL,
2858     0xbc58a78f4817895bULL, 0x3feed60a21f72e2aULL,
2859     0xbc7c2c9b67499a1bULL, 0x3feed1f5d950a897ULL,
2860     0x3c4363ed60c2ac11ULL, 0x3feece086061892dULL,
2861     0x3c9666093b0664efULL, 0x3feeca41ed1d0057ULL,
2862     0x3c6ecce1daa10379ULL, 0x3feec6a2b5c13cd0ULL,
2863     0x3c93ff8e3f0f1230ULL, 0x3feec32af0d7d3deULL,
2864     0x3c7690cebb7aafb0ULL, 0x3feebfdad5362a27ULL,
2865     0x3c931dbdeb54e077ULL, 0x3feebcb299fddd0dULL,
2866     0xbc8f94340071a38eULL, 0x3feeb9b2769d2ca7ULL,
2867     0xbc87deccdc93a349ULL, 0x3feeb6daa2cf6642ULL,
2868     0xbc78dec6bd0f385fULL, 0x3feeb42b569d4f82ULL,
2869     0xbc861246ec7b5cf6ULL, 0x3feeb1a4ca5d920fULL,
2870     0x3c93350518fdd78eULL, 0x3feeaf4736b527daULL,
2871     0x3c7b98b72f8a9b05ULL, 0x3feead12d497c7fdULL,
2872     0x3c9063e1e21c5409ULL, 0x3feeab07dd485429ULL,
2873     0x3c34c7855019c6eaULL, 0x3feea9268a5946b7ULL,
2874     0x3c9432e62b64c035ULL, 0x3feea76f15ad2148ULL,
2875     0xbc8ce44a6199769fULL, 0x3feea5e1b976dc09ULL,
2876     0xbc8c33c53bef4da8ULL, 0x3feea47eb03a5585ULL,
2877     0xbc845378892be9aeULL, 0x3feea34634ccc320ULL,
2878     0xbc93cedd78565858ULL, 0x3feea23882552225ULL,
2879     0x3c5710aa807e1964ULL, 0x3feea155d44ca973ULL,
2880     0xbc93b3efbf5e2228ULL, 0x3feea09e667f3bcdULL,
2881     0xbc6a12ad8734b982ULL, 0x3feea012750bdabfULL,
2882     0xbc6367efb86da9eeULL, 0x3fee9fb23c651a2fULL,
2883     0xbc80dc3d54e08851ULL, 0x3fee9f7df9519484ULL,
2884     0xbc781f647e5a3ecfULL, 0x3fee9f75e8ec5f74ULL,
2885     0xbc86ee4ac08b7db0ULL, 0x3fee9f9a48a58174ULL,
2886     0xbc8619321e55e68aULL, 0x3fee9feb564267c9ULL,
2887     0x3c909ccb5e09d4d3ULL, 0x3feea0694fde5d3fULL,
2888     0xbc7b32dcb94da51dULL, 0x3feea11473eb0187ULL,
2889     0x3c94ecfd5467c06bULL, 0x3feea1ed0130c132ULL,
2890     0x3c65ebe1abd66c55ULL, 0x3feea2f336cf4e62ULL,
2891     0xbc88a1c52fb3cf42ULL, 0x3feea427543e1a12ULL,
2892     0xbc9369b6f13b3734ULL, 0x3feea589994cce13ULL,
2893     0xbc805e843a19ff1eULL, 0x3feea71a4623c7adULL,
2894     0xbc94d450d872576eULL, 0x3feea8d99b4492edULL,
2895     0x3c90ad675b0e8a00ULL, 0x3feeaac7d98a6699ULL,
2896     0x3c8db72fc1f0eab4ULL, 0x3feeace5422aa0dbULL,
2897     0xbc65b6609cc5e7ffULL, 0x3feeaf3216b5448cULL,
2898     0x3c7bf68359f35f44ULL, 0x3feeb1ae99157736ULL,
2899     0xbc93091fa71e3d83ULL, 0x3feeb45b0b91ffc6ULL,
2900     0xbc5da9b88b6c1e29ULL, 0x3feeb737b0cdc5e5ULL,
2901     0xbc6c23f97c90b959ULL, 0x3feeba44cbc8520fULL,
2902     0xbc92434322f4f9aaULL, 0x3feebd829fde4e50ULL,
2903     0xbc85ca6cd7668e4bULL, 0x3feec0f170ca07baULL,
2904     0x3c71affc2b91ce27ULL, 0x3feec49182a3f090ULL,
2905     0x3c6dd235e10a73bbULL, 0x3feec86319e32323ULL,
2906     0xbc87c50422622263ULL, 0x3feecc667b5de565ULL,
2907     0x3c8b1c86e3e231d5ULL, 0x3feed09bec4a2d33ULL,
2908     0xbc91bbd1d3bcbb15ULL, 0x3feed503b23e255dULL,
2909     0x3c90cc319cee31d2ULL, 0x3feed99e1330b358ULL,
2910     0x3c8469846e735ab3ULL, 0x3feede6b5579fdbfULL,
2911     0xbc82dfcd978e9db4ULL, 0x3feee36bbfd3f37aULL,
2912     0x3c8c1a7792cb3387ULL, 0x3feee89f995ad3adULL,
2913     0xbc907b8f4ad1d9faULL, 0x3feeee07298db666ULL,
2914     0xbc55c3d956dcaebaULL, 0x3feef3a2b84f15fbULL,
2915     0xbc90a40e3da6f640ULL, 0x3feef9728de5593aULL,
2916     0xbc68d6f438ad9334ULL, 0x3feeff76f2fb5e47ULL,
2917     0xbc91eee26b588a35ULL, 0x3fef05b030a1064aULL,
2918     0x3c74ffd70a5fddcdULL, 0x3fef0c1e904bc1d2ULL,
2919     0xbc91bdfbfa9298acULL, 0x3fef12c25bd71e09ULL,
2920     0x3c736eae30af0cb3ULL, 0x3fef199bdd85529cULL,
2921     0x3c8ee3325c9ffd94ULL, 0x3fef20ab5fffd07aULL,
2922     0x3c84e08fd10959acULL, 0x3fef27f12e57d14bULL,
2923     0x3c63cdaf384e1a67ULL, 0x3fef2f6d9406e7b5ULL,
2924     0x3c676b2c6c921968ULL, 0x3fef3720dcef9069ULL,
2925     0xbc808a1883ccb5d2ULL, 0x3fef3f0b555dc3faULL,
2926     0xbc8fad5d3ffffa6fULL, 0x3fef472d4a07897cULL,
2927     0xbc900dae3875a949ULL, 0x3fef4f87080d89f2ULL,
2928     0x3c74a385a63d07a7ULL, 0x3fef5818dcfba487ULL,
2929     0xbc82919e2040220fULL, 0x3fef60e316c98398ULL,
2930     0x3c8e5a50d5c192acULL, 0x3fef69e603db3285ULL,
2931     0x3c843a59ac016b4bULL, 0x3fef7321f301b460ULL,
2932     0xbc82d52107b43e1fULL, 0x3fef7c97337b9b5fULL,
2933     0xbc892ab93b470dc9ULL, 0x3fef864614f5a129ULL,
2934     0x3c74b604603a88d3ULL, 0x3fef902ee78b3ff6ULL,
2935     0x3c83c5ec519d7271ULL, 0x3fef9a51fbc74c83ULL,
2936     0xbc8ff7128fd391f0ULL, 0x3fefa4afa2a490daULL,
2937     0xbc8dae98e223747dULL, 0x3fefaf482d8e67f1ULL,
2938     0x3c8ec3bc41aa2008ULL, 0x3fefba1bee615a27ULL,
2939     0x3c842b94c3a9eb32ULL, 0x3fefc52b376bba97ULL,
2940     0x3c8a64a931d185eeULL, 0x3fefd0765b6e4540ULL,
2941     0xbc8e37bae43be3edULL, 0x3fefdbfdad9cbe14ULL,
2942     0x3c77893b4d91cd9dULL, 0x3fefe7c1819e90d8ULL,
2943     0x3c5305c14160cc89ULL, 0x3feff3c22b8f71f1ULL
2944 };
2945
2946 /*********************************************************************
2947  *              exp (MSVCRT.@)
2948  *
2949  * Copied from musl: src/math/exp.c
2950  */
2951 double CDECL exp( double x )
2952 {
2953     static const double C[] = {
2954         0x1.ffffffffffdbdp-2,
2955         0x1.555555555543cp-3,
2956         0x1.55555cf172b91p-5,
2957         0x1.1111167a4d017p-7
2958     };
2959     static const double invln2N = 0x1.71547652b82fep0 * (1 << 7),
2960         negln2hiN = -0x1.62e42fefa0000p-8,
2961         negln2loN = -0x1.cf79abc9e3b3ap-47;
2962
2963     UINT32 abstop;
2964     UINT64 ki, idx, top, sbits;
2965     double kd, z, r, r2, scale, tail, tmp;
2966
2967     abstop = (*(UINT64*)&x >> 52) & 0x7ff;
2968     if (abstop -  0x3c9 >= 0x408 - 0x3c9) {
2969         if (abstop - 0x3c9 >= 0x80000000)
2970             /* Avoid spurious underflow for tiny x. */
2971             /* Note: 0 is common input. */
2972             return 1.0 + x;
2973         if (abstop >= 0x409) {
2974             if (*(UINT64*)&x == 0xfff0000000000000ULL)
2975                 return 0.0;
2976 #if _MSVCR_VER == 0
2977             if (*(UINT64*)&x > 0x7ff0000000000000ULL)
2978                 return math_error(_DOMAIN, "exp", x, 0, 1.0 + x);
2979 #endif
2980             if (abstop >= 0x7ff)
2981                 return 1.0 + x;
2982             if (*(UINT64*)&x >> 63)
2983                 return math_error(_UNDERFLOW, "exp", x, 0, fp_barrier(DBL_MIN) * DBL_MIN);
2984             else
2985                 return math_error(_OVERFLOW, "exp", x, 0, fp_barrier(DBL_MAX) * DBL_MAX);
2986         }
2987         /* Large x is special cased below. */
2988         abstop = 0;
2989     }
2990
2991     /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
2992     /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
2993     z = invln2N * x;
2994     kd = __round(z);
2995     ki = (INT64)kd;
2996
2997     r = x + kd * negln2hiN + kd * negln2loN;
2998     /* 2^(k/N) ~= scale * (1 + tail). */
2999     idx = 2 * (ki % (1 << 7));
3000     top = ki << (52 - 7);
3001     tail = *(double*)&exp_T[idx];
3002     /* This is only a valid scale when -1023*N < k < 1024*N. */
3003     sbits = exp_T[idx + 1] + top;
3004     /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
3005     /* Evaluation is optimized assuming superscalar pipelined execution. */
3006     r2 = r * r;
3007     /* Without fma the worst case error is 0.25/N ulp larger. */
3008     /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
3009     tmp = tail + r + r2 * (C[0] + r * C[1]) + r2 * r2 * (C[2] + r * C[3]);
3010     if (abstop == 0) {
3011         /* Handle cases that may overflow or underflow when computing the result that
3012            is scale*(1+TMP) without intermediate rounding. The bit representation of
3013            scale is in SBITS, however it has a computed exponent that may have
3014            overflown into the sign bit so that needs to be adjusted before using it as
3015            a double. (int32_t)KI is the k used in the argument reduction and exponent
3016            adjustment of scale, positive k here means the result may overflow and
3017            negative k means the result may underflow. */
3018         double scale, y;
3019
3020         if ((ki & 0x80000000) == 0) {
3021             /* k > 0, the exponent of scale might have overflowed by <= 460. */
3022             sbits -= 1009ull << 52;
3023             scale = *(double*)&sbits;
3024             y = 0x1p1009 * (scale + scale * tmp);
3025             if (isinf(y))
3026                 return math_error(_OVERFLOW, "exp", x, 0, y);
3027             return y;
3028         }
3029         /* k < 0, need special care in the subnormal range. */
3030         sbits += 1022ull << 52;
3031         scale = *(double*)&sbits;
3032         y = scale + scale * tmp;
3033         if (y < 1.0) {
3034             /* Round y to the right precision before scaling it into the subnormal
3035                range to avoid double rounding that can cause 0.5+E/2 ulp error where
3036                E is the worst-case ulp error outside the subnormal range. So this
3037                is only useful if the goal is better than 1 ulp worst-case error. */
3038             double hi, lo;
3039             lo = scale - y + scale * tmp;
3040             hi = 1.0 + y;
3041             lo = 1.0 - hi + y + lo;
3042             y = hi + lo - 1.0;
3043             /* Avoid -0.0 with downward rounding. */
3044             if (y == 0.0)
3045                 y = 0.0;
3046             /* The underflow exception needs to be signaled explicitly. */
3047             fp_barrier(fp_barrier(0x1p-1022) * 0x1p-1022);
3048             y = 0x1p-1022 * y;
3049             return math_error(_UNDERFLOW, "exp", x, 0, y);
3050         }
3051         y = 0x1p-1022 * y;
3052         return y;
3053     }
3054     scale = *(double*)&sbits;
3055     /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
3056        is no spurious underflow here even without fma. */
3057     return scale + scale * tmp;
3058 }
3059
3060 /*********************************************************************
3061  *              fmod (MSVCRT.@)
3062  *
3063  * Copied from musl: src/math/fmod.c
3064  */
3065 double CDECL fmod( double x, double y )
3066 {
3067     UINT64 xi = *(UINT64*)&x;
3068     UINT64 yi = *(UINT64*)&y;
3069     int ex = xi >> 52 & 0x7ff;
3070     int ey = yi >> 52 & 0x7ff;
3071     int sx = xi >> 63;
3072     UINT64 i;
3073
3074     if (isinf(x)) return math_error(_DOMAIN, "fmod", x, y, (x * y) / (x * y));
3075     if (yi << 1 == 0 || isnan(y) || ex == 0x7ff)
3076         return (x * y) / (x * y);
3077     if (xi << 1 <= yi << 1) {
3078         if (xi << 1 == yi << 1)
3079             return 0 * x;
3080         return x;
3081     }
3082
3083     /* normalize x and y */
3084     if (!ex) {
3085         for (i = xi << 12; i >> 63 == 0; ex--, i <<= 1);
3086         xi <<= -ex + 1;
3087     } else {
3088         xi &= -1ULL >> 12;
3089         xi |= 1ULL << 52;
3090     }
3091     if (!ey) {
3092         for (i = yi << 12; i >> 63 == 0; ey--, i <<= 1);
3093         yi <<= -ey + 1;
3094     } else {
3095         yi &= -1ULL >> 12;
3096         yi |= 1ULL << 52;
3097     }
3098
3099     /* x mod y */
3100     for (; ex > ey; ex--) {
3101         i = xi - yi;
3102         if (i >> 63 == 0) {
3103             if (i == 0)
3104                 return 0 * x;
3105             xi = i;
3106         }
3107         xi <<= 1;
3108     }
3109     i = xi - yi;
3110     if (i >> 63 == 0) {
3111         if (i == 0)
3112             return 0 * x;
3113         xi = i;
3114     }
3115     for (; xi >> 52 == 0; xi <<= 1, ex--);
3116
3117     /* scale result */
3118     if (ex > 0) {
3119         xi -= 1ULL << 52;
3120         xi |= (UINT64)ex << 52;
3121     } else {
3122         xi >>= -ex + 1;
3123     }
3124     xi |= (UINT64)sx << 63;
3125     return *(double*)&xi;
3126 }
3127
3128 /*********************************************************************
3129  *              log (MSVCRT.@)
3130  *
3131  * Copied from musl: src/math/log.c src/math/log_data.c
3132  */
3133 double CDECL log( double x )
3134 {
3135     static const double Ln2hi = 0x1.62e42fefa3800p-1,
3136         Ln2lo = 0x1.ef35793c76730p-45;
3137     static const double A[] = {
3138         -0x1.0000000000001p-1,
3139         0x1.555555551305bp-2,
3140         -0x1.fffffffeb459p-3,
3141         0x1.999b324f10111p-3,
3142         -0x1.55575e506c89fp-3
3143     };
3144     static const double B[] = {
3145         -0x1p-1,
3146         0x1.5555555555577p-2,
3147         -0x1.ffffffffffdcbp-3,
3148         0x1.999999995dd0cp-3,
3149         -0x1.55555556745a7p-3,
3150         0x1.24924a344de3p-3,
3151         -0x1.fffffa4423d65p-4,
3152         0x1.c7184282ad6cap-4,
3153         -0x1.999eb43b068ffp-4,
3154         0x1.78182f7afd085p-4,
3155         -0x1.5521375d145cdp-4
3156     };
3157     static const struct {
3158         double invc, logc;
3159     } T[] = {
3160         {0x1.734f0c3e0de9fp+0, -0x1.7cc7f79e69000p-2},
3161         {0x1.713786a2ce91fp+0, -0x1.76feec20d0000p-2},
3162         {0x1.6f26008fab5a0p+0, -0x1.713e31351e000p-2},
3163         {0x1.6d1a61f138c7dp+0, -0x1.6b85b38287800p-2},
3164         {0x1.6b1490bc5b4d1p+0, -0x1.65d5590807800p-2},
3165         {0x1.69147332f0cbap+0, -0x1.602d076180000p-2},
3166         {0x1.6719f18224223p+0, -0x1.5a8ca86909000p-2},
3167         {0x1.6524f99a51ed9p+0, -0x1.54f4356035000p-2},
3168         {0x1.63356aa8f24c4p+0, -0x1.4f637c36b4000p-2},
3169         {0x1.614b36b9ddc14p+0, -0x1.49da7fda85000p-2},
3170         {0x1.5f66452c65c4cp+0, -0x1.445923989a800p-2},
3171         {0x1.5d867b5912c4fp+0, -0x1.3edf439b0b800p-2},
3172         {0x1.5babccb5b90dep+0, -0x1.396ce448f7000p-2},
3173         {0x1.59d61f2d91a78p+0, -0x1.3401e17bda000p-2},
3174         {0x1.5805612465687p+0, -0x1.2e9e2ef468000p-2},
3175         {0x1.56397cee76bd3p+0, -0x1.2941b3830e000p-2},
3176         {0x1.54725e2a77f93p+0, -0x1.23ec58cda8800p-2},
3177         {0x1.52aff42064583p+0, -0x1.1e9e129279000p-2},
3178         {0x1.50f22dbb2bddfp+0, -0x1.1956d2b48f800p-2},
3179         {0x1.4f38f4734ded7p+0, -0x1.141679ab9f800p-2},
3180         {0x1.4d843cfde2840p+0, -0x1.0edd094ef9800p-2},
3181         {0x1.4bd3ec078a3c8p+0, -0x1.09aa518db1000p-2},
3182         {0x1.4a27fc3e0258ap+0, -0x1.047e65263b800p-2},
3183         {0x1.4880524d48434p+0, -0x1.feb224586f000p-3},
3184         {0x1.46dce1b192d0bp+0, -0x1.f474a7517b000p-3},
3185         {0x1.453d9d3391854p+0, -0x1.ea4443d103000p-3},
3186         {0x1.43a2744b4845ap+0, -0x1.e020d44e9b000p-3},
3187         {0x1.420b54115f8fbp+0, -0x1.d60a22977f000p-3},
3188         {0x1.40782da3ef4b1p+0, -0x1.cc00104959000p-3},
3189         {0x1.3ee8f5d57fe8fp+0, -0x1.c202956891000p-3},
3190         {0x1.3d5d9a00b4ce9p+0, -0x1.b81178d811000p-3},
3191         {0x1.3bd60c010c12bp+0, -0x1.ae2c9ccd3d000p-3},
3192         {0x1.3a5242b75dab8p+0, -0x1.a45402e129000p-3},
3193         {0x1.38d22cd9fd002p+0, -0x1.9a877681df000p-3},
3194         {0x1.3755bc5847a1cp+0, -0x1.90c6d69483000p-3},
3195         {0x1.35dce49ad36e2p+0, -0x1.87120a645c000p-3},
3196         {0x1.34679984dd440p+0, -0x1.7d68fb4143000p-3},
3197         {0x1.32f5cceffcb24p+0, -0x1.73cb83c627000p-3},
3198         {0x1.3187775a10d49p+0, -0x1.6a39a9b376000p-3},
3199         {0x1.301c8373e3990p+0, -0x1.60b3154b7a000p-3},
3200         {0x1.2eb4ebb95f841p+0, -0x1.5737d76243000p-3},
3201         {0x1.2d50a0219a9d1p+0, -0x1.4dc7b8fc23000p-3},
3202         {0x1.2bef9a8b7fd2ap+0, -0x1.4462c51d20000p-3},
3203         {0x1.2a91c7a0c1babp+0, -0x1.3b08abc830000p-3},
3204         {0x1.293726014b530p+0, -0x1.31b996b490000p-3},
3205         {0x1.27dfa5757a1f5p+0, -0x1.2875490a44000p-3},
3206         {0x1.268b39b1d3bbfp+0, -0x1.1f3b9f879a000p-3},
3207         {0x1.2539d838ff5bdp+0, -0x1.160c8252ca000p-3},
3208         {0x1.23eb7aac9083bp+0, -0x1.0ce7f57f72000p-3},
3209         {0x1.22a012ba940b6p+0, -0x1.03cdc49fea000p-3},
3210         {0x1.2157996cc4132p+0, -0x1.f57bdbc4b8000p-4},
3211         {0x1.201201dd2fc9bp+0, -0x1.e370896404000p-4},
3212         {0x1.1ecf4494d480bp+0, -0x1.d17983ef94000p-4},
3213         {0x1.1d8f5528f6569p+0, -0x1.bf9674ed8a000p-4},
3214         {0x1.1c52311577e7cp+0, -0x1.adc79202f6000p-4},
3215         {0x1.1b17c74cb26e9p+0, -0x1.9c0c3e7288000p-4},
3216         {0x1.19e010c2c1ab6p+0, -0x1.8a646b372c000p-4},
3217         {0x1.18ab07bb670bdp+0, -0x1.78d01b3ac0000p-4},
3218         {0x1.1778a25efbcb6p+0, -0x1.674f145380000p-4},
3219         {0x1.1648d354c31dap+0, -0x1.55e0e6d878000p-4},
3220         {0x1.151b990275fddp+0, -0x1.4485cdea1e000p-4},
3221         {0x1.13f0ea432d24cp+0, -0x1.333d94d6aa000p-4},
3222         {0x1.12c8b7210f9dap+0, -0x1.22079f8c56000p-4},
3223         {0x1.11a3028ecb531p+0, -0x1.10e4698622000p-4},
3224         {0x1.107fbda8434afp+0, -0x1.ffa6c6ad20000p-5},
3225         {0x1.0f5ee0f4e6bb3p+0, -0x1.dda8d4a774000p-5},
3226         {0x1.0e4065d2a9fcep+0, -0x1.bbcece4850000p-5},
3227         {0x1.0d244632ca521p+0, -0x1.9a1894012c000p-5},
3228         {0x1.0c0a77ce2981ap+0, -0x1.788583302c000p-5},
3229         {0x1.0af2f83c636d1p+0, -0x1.5715e67d68000p-5},
3230         {0x1.09ddb98a01339p+0, -0x1.35c8a49658000p-5},
3231         {0x1.08cabaf52e7dfp+0, -0x1.149e364154000p-5},
3232         {0x1.07b9f2f4e28fbp+0, -0x1.e72c082eb8000p-6},
3233         {0x1.06ab58c358f19p+0, -0x1.a55f152528000p-6},
3234         {0x1.059eea5ecf92cp+0, -0x1.63d62cf818000p-6},
3235         {0x1.04949cdd12c90p+0, -0x1.228fb8caa0000p-6},
3236         {0x1.038c6c6f0ada9p+0, -0x1.c317b20f90000p-7},
3237         {0x1.02865137932a9p+0, -0x1.419355daa0000p-7},
3238         {0x1.0182427ea7348p+0, -0x1.81203c2ec0000p-8},
3239         {0x1.008040614b195p+0, -0x1.0040979240000p-9},
3240         {0x1.fe01ff726fa1ap-1, 0x1.feff384900000p-9},
3241         {0x1.fa11cc261ea74p-1, 0x1.7dc41353d0000p-7},
3242         {0x1.f6310b081992ep-1, 0x1.3cea3c4c28000p-6},
3243         {0x1.f25f63ceeadcdp-1, 0x1.b9fc114890000p-6},
3244         {0x1.ee9c8039113e7p-1, 0x1.1b0d8ce110000p-5},
3245         {0x1.eae8078cbb1abp-1, 0x1.58a5bd001c000p-5},
3246         {0x1.e741aa29d0c9bp-1, 0x1.95c8340d88000p-5},
3247         {0x1.e3a91830a99b5p-1, 0x1.d276aef578000p-5},
3248         {0x1.e01e009609a56p-1, 0x1.07598e598c000p-4},
3249         {0x1.dca01e577bb98p-1, 0x1.253f5e30d2000p-4},
3250         {0x1.d92f20b7c9103p-1, 0x1.42edd8b380000p-4},
3251         {0x1.d5cac66fb5ccep-1, 0x1.606598757c000p-4},
3252         {0x1.d272caa5ede9dp-1, 0x1.7da76356a0000p-4},
3253         {0x1.cf26e3e6b2ccdp-1, 0x1.9ab434e1c6000p-4},
3254         {0x1.cbe6da2a77902p-1, 0x1.b78c7bb0d6000p-4},
3255         {0x1.c8b266d37086dp-1, 0x1.d431332e72000p-4},
3256         {0x1.c5894bd5d5804p-1, 0x1.f0a3171de6000p-4},
3257         {0x1.c26b533bb9f8cp-1, 0x1.067152b914000p-3},
3258         {0x1.bf583eeece73fp-1, 0x1.147858292b000p-3},
3259         {0x1.bc4fd75db96c1p-1, 0x1.2266ecdca3000p-3},
3260         {0x1.b951e0c864a28p-1, 0x1.303d7a6c55000p-3},
3261         {0x1.b65e2c5ef3e2cp-1, 0x1.3dfc33c331000p-3},
3262         {0x1.b374867c9888bp-1, 0x1.4ba366b7a8000p-3},
3263         {0x1.b094b211d304ap-1, 0x1.5933928d1f000p-3},
3264         {0x1.adbe885f2ef7ep-1, 0x1.66acd2418f000p-3},
3265         {0x1.aaf1d31603da2p-1, 0x1.740f8ec669000p-3},
3266         {0x1.a82e63fd358a7p-1, 0x1.815c0f51af000p-3},
3267         {0x1.a5740ef09738bp-1, 0x1.8e92954f68000p-3},
3268         {0x1.a2c2a90ab4b27p-1, 0x1.9bb3602f84000p-3},
3269         {0x1.a01a01393f2d1p-1, 0x1.a8bed1c2c0000p-3},
3270         {0x1.9d79f24db3c1bp-1, 0x1.b5b515c01d000p-3},
3271         {0x1.9ae2505c7b190p-1, 0x1.c2967ccbcc000p-3},
3272         {0x1.9852ef297ce2fp-1, 0x1.cf635d5486000p-3},
3273         {0x1.95cbaeea44b75p-1, 0x1.dc1bd3446c000p-3},
3274         {0x1.934c69de74838p-1, 0x1.e8c01b8cfe000p-3},
3275         {0x1.90d4f2f6752e6p-1, 0x1.f5509c0179000p-3},
3276         {0x1.8e6528effd79dp-1, 0x1.00e6c121fb800p-2},
3277         {0x1.8bfce9fcc007cp-1, 0x1.071b80e93d000p-2},
3278         {0x1.899c0dabec30ep-1, 0x1.0d46b9e867000p-2},
3279         {0x1.87427aa2317fbp-1, 0x1.13687334bd000p-2},
3280         {0x1.84f00acb39a08p-1, 0x1.1980d67234800p-2},
3281         {0x1.82a49e8653e55p-1, 0x1.1f8ffe0cc8000p-2},
3282         {0x1.8060195f40260p-1, 0x1.2595fd7636800p-2},
3283         {0x1.7e22563e0a329p-1, 0x1.2b9300914a800p-2},
3284         {0x1.7beb377dcb5adp-1, 0x1.3187210436000p-2},
3285         {0x1.79baa679725c2p-1, 0x1.377266dec1800p-2},
3286         {0x1.77907f2170657p-1, 0x1.3d54ffbaf3000p-2},
3287         {0x1.756cadbd6130cp-1, 0x1.432eee32fe000p-2}
3288     };
3289     static const struct {
3290         double chi, clo;
3291     } T2[] = {
3292         {0x1.61000014fb66bp-1, 0x1.e026c91425b3cp-56},
3293         {0x1.63000034db495p-1, 0x1.dbfea48005d41p-55},
3294         {0x1.650000d94d478p-1, 0x1.e7fa786d6a5b7p-55},
3295         {0x1.67000074e6fadp-1, 0x1.1fcea6b54254cp-57},
3296         {0x1.68ffffedf0faep-1, -0x1.c7e274c590efdp-56},
3297         {0x1.6b0000763c5bcp-1, -0x1.ac16848dcda01p-55},
3298         {0x1.6d0001e5cc1f6p-1, 0x1.33f1c9d499311p-55},
3299         {0x1.6efffeb05f63ep-1, -0x1.e80041ae22d53p-56},
3300         {0x1.710000e86978p-1, 0x1.bff6671097952p-56},
3301         {0x1.72ffffc67e912p-1, 0x1.c00e226bd8724p-55},
3302         {0x1.74fffdf81116ap-1, -0x1.e02916ef101d2p-57},
3303         {0x1.770000f679c9p-1, -0x1.7fc71cd549c74p-57},
3304         {0x1.78ffffa7ec835p-1, 0x1.1bec19ef50483p-55},
3305         {0x1.7affffe20c2e6p-1, -0x1.07e1729cc6465p-56},
3306         {0x1.7cfffed3fc9p-1, -0x1.08072087b8b1cp-55},
3307         {0x1.7efffe9261a76p-1, 0x1.dc0286d9df9aep-55},
3308         {0x1.81000049ca3e8p-1, 0x1.97fd251e54c33p-55},
3309         {0x1.8300017932c8fp-1, -0x1.afee9b630f381p-55},
3310         {0x1.850000633739cp-1, 0x1.9bfbf6b6535bcp-55},
3311         {0x1.87000204289c6p-1, -0x1.bbf65f3117b75p-55},
3312         {0x1.88fffebf57904p-1, -0x1.9006ea23dcb57p-55},
3313         {0x1.8b00022bc04dfp-1, -0x1.d00df38e04b0ap-56},
3314         {0x1.8cfffe50c1b8ap-1, -0x1.8007146ff9f05p-55},
3315         {0x1.8effffc918e43p-1, 0x1.3817bd07a7038p-55},
3316         {0x1.910001efa5fc7p-1, 0x1.93e9176dfb403p-55},
3317         {0x1.9300013467bb9p-1, 0x1.f804e4b980276p-56},
3318         {0x1.94fffe6ee076fp-1, -0x1.f7ef0d9ff622ep-55},
3319         {0x1.96fffde3c12d1p-1, -0x1.082aa962638bap-56},
3320         {0x1.98ffff4458a0dp-1, -0x1.7801b9164a8efp-55},
3321         {0x1.9afffdd982e3ep-1, -0x1.740e08a5a9337p-55},
3322         {0x1.9cfffed49fb66p-1, 0x1.fce08c19bep-60},
3323         {0x1.9f00020f19c51p-1, -0x1.a3faa27885b0ap-55},
3324         {0x1.a10001145b006p-1, 0x1.4ff489958da56p-56},
3325         {0x1.a300007bbf6fap-1, 0x1.cbeab8a2b6d18p-55},
3326         {0x1.a500010971d79p-1, 0x1.8fecadd78793p-55},
3327         {0x1.a70001df52e48p-1, -0x1.f41763dd8abdbp-55},
3328         {0x1.a90001c593352p-1, -0x1.ebf0284c27612p-55},
3329         {0x1.ab0002a4f3e4bp-1, -0x1.9fd043cff3f5fp-57},
3330         {0x1.acfffd7ae1ed1p-1, -0x1.23ee7129070b4p-55},
3331         {0x1.aefffee510478p-1, 0x1.a063ee00edea3p-57},
3332         {0x1.b0fffdb650d5bp-1, 0x1.a06c8381f0ab9p-58},
3333         {0x1.b2ffffeaaca57p-1, -0x1.9011e74233c1dp-56},
3334         {0x1.b4fffd995badcp-1, -0x1.9ff1068862a9fp-56},
3335         {0x1.b7000249e659cp-1, 0x1.aff45d0864f3ep-55},
3336         {0x1.b8ffff987164p-1, 0x1.cfe7796c2c3f9p-56},
3337         {0x1.bafffd204cb4fp-1, -0x1.3ff27eef22bc4p-57},
3338         {0x1.bcfffd2415c45p-1, -0x1.cffb7ee3bea21p-57},
3339         {0x1.beffff86309dfp-1, -0x1.14103972e0b5cp-55},
3340         {0x1.c0fffe1b57653p-1, 0x1.bc16494b76a19p-55},
3341         {0x1.c2ffff1fa57e3p-1, -0x1.4feef8d30c6edp-57},
3342         {0x1.c4fffdcbfe424p-1, -0x1.43f68bcec4775p-55},
3343         {0x1.c6fffed54b9f7p-1, 0x1.47ea3f053e0ecp-55},
3344         {0x1.c8fffeb998fd5p-1, 0x1.383068df992f1p-56},
3345         {0x1.cb0002125219ap-1, -0x1.8fd8e64180e04p-57},
3346         {0x1.ccfffdd94469cp-1, 0x1.e7ebe1cc7ea72p-55},
3347         {0x1.cefffeafdc476p-1, 0x1.ebe39ad9f88fep-55},
3348         {0x1.d1000169af82bp-1, 0x1.57d91a8b95a71p-56},
3349         {0x1.d30000d0ff71dp-1, 0x1.9c1906970c7dap-55},
3350         {0x1.d4fffea790fc4p-1, -0x1.80e37c558fe0cp-58},
3351         {0x1.d70002edc87e5p-1, -0x1.f80d64dc10f44p-56},
3352         {0x1.d900021dc82aap-1, -0x1.47c8f94fd5c5cp-56},
3353         {0x1.dafffd86b0283p-1, 0x1.c7f1dc521617ep-55},
3354         {0x1.dd000296c4739p-1, 0x1.8019eb2ffb153p-55},
3355         {0x1.defffe54490f5p-1, 0x1.e00d2c652cc89p-57},
3356         {0x1.e0fffcdabf694p-1, -0x1.f8340202d69d2p-56},
3357         {0x1.e2fffdb52c8ddp-1, 0x1.b00c1ca1b0864p-56},
3358         {0x1.e4ffff24216efp-1, 0x1.2ffa8b094ab51p-56},
3359         {0x1.e6fffe88a5e11p-1, -0x1.7f673b1efbe59p-58},
3360         {0x1.e9000119eff0dp-1, -0x1.4808d5e0bc801p-55},
3361         {0x1.eafffdfa51744p-1, 0x1.80006d54320b5p-56},
3362         {0x1.ed0001a127fa1p-1, -0x1.002f860565c92p-58},
3363         {0x1.ef00007babcc4p-1, -0x1.540445d35e611p-55},
3364         {0x1.f0ffff57a8d02p-1, -0x1.ffb3139ef9105p-59},
3365         {0x1.f30001ee58ac7p-1, 0x1.a81acf2731155p-55},
3366         {0x1.f4ffff5823494p-1, 0x1.a3f41d4d7c743p-55},
3367         {0x1.f6ffffca94c6bp-1, -0x1.202f41c987875p-57},
3368         {0x1.f8fffe1f9c441p-1, 0x1.77dd1f477e74bp-56},
3369         {0x1.fafffd2e0e37ep-1, -0x1.f01199a7ca331p-57},
3370         {0x1.fd0001c77e49ep-1, 0x1.181ee4bceacb1p-56},
3371         {0x1.feffff7e0c331p-1, -0x1.e05370170875ap-57},
3372         {0x1.00ffff465606ep+0, -0x1.a7ead491c0adap-55},
3373         {0x1.02ffff3867a58p+0, -0x1.77f69c3fcb2ep-54},
3374         {0x1.04ffffdfc0d17p+0, 0x1.7bffe34cb945bp-54},
3375         {0x1.0700003cd4d82p+0, 0x1.20083c0e456cbp-55},
3376         {0x1.08ffff9f2cbe8p+0, -0x1.dffdfbe37751ap-57},
3377         {0x1.0b000010cda65p+0, -0x1.13f7faee626ebp-54},
3378         {0x1.0d00001a4d338p+0, 0x1.07dfa79489ff7p-55},
3379         {0x1.0effffadafdfdp+0, -0x1.7040570d66bcp-56},
3380         {0x1.110000bbafd96p+0, 0x1.e80d4846d0b62p-55},
3381         {0x1.12ffffae5f45dp+0, 0x1.dbffa64fd36efp-54},
3382         {0x1.150000dd59ad9p+0, 0x1.a0077701250aep-54},
3383         {0x1.170000f21559ap+0, 0x1.dfdf9e2e3deeep-55},
3384         {0x1.18ffffc275426p+0, 0x1.10030dc3b7273p-54},
3385         {0x1.1b000123d3c59p+0, 0x1.97f7980030188p-54},
3386         {0x1.1cffff8299eb7p+0, -0x1.5f932ab9f8c67p-57},
3387         {0x1.1effff48ad4p+0, 0x1.37fbf9da75bebp-54},
3388         {0x1.210000c8b86a4p+0, 0x1.f806b91fd5b22p-54},
3389         {0x1.2300003854303p+0, 0x1.3ffc2eb9fbf33p-54},
3390         {0x1.24fffffbcf684p+0, 0x1.601e77e2e2e72p-56},
3391         {0x1.26ffff52921d9p+0, 0x1.ffcbb767f0c61p-56},
3392         {0x1.2900014933a3cp+0, -0x1.202ca3c02412bp-56},
3393         {0x1.2b00014556313p+0, -0x1.2808233f21f02p-54},
3394         {0x1.2cfffebfe523bp+0, -0x1.8ff7e384fdcf2p-55},
3395         {0x1.2f0000bb8ad96p+0, -0x1.5ff51503041c5p-55},
3396         {0x1.30ffffb7ae2afp+0, -0x1.10071885e289dp-55},
3397         {0x1.32ffffeac5f7fp+0, -0x1.1ff5d3fb7b715p-54},
3398         {0x1.350000ca66756p+0, 0x1.57f82228b82bdp-54},
3399         {0x1.3700011fbf721p+0, 0x1.000bac40dd5ccp-55},
3400         {0x1.38ffff9592fb9p+0, -0x1.43f9d2db2a751p-54},
3401         {0x1.3b00004ddd242p+0, 0x1.57f6b707638e1p-55},
3402         {0x1.3cffff5b2c957p+0, 0x1.a023a10bf1231p-56},
3403         {0x1.3efffeab0b418p+0, 0x1.87f6d66b152bp-54},
3404         {0x1.410001532aff4p+0, 0x1.7f8375f198524p-57},
3405         {0x1.4300017478b29p+0, 0x1.301e672dc5143p-55},
3406         {0x1.44fffe795b463p+0, 0x1.9ff69b8b2895ap-55},
3407         {0x1.46fffe80475ep+0, -0x1.5c0b19bc2f254p-54},
3408         {0x1.48fffef6fc1e7p+0, 0x1.b4009f23a2a72p-54},
3409         {0x1.4afffe5bea704p+0, -0x1.4ffb7bf0d7d45p-54},
3410         {0x1.4d000171027dep+0, -0x1.9c06471dc6a3dp-54},
3411         {0x1.4f0000ff03ee2p+0, 0x1.77f890b85531cp-54},
3412         {0x1.5100012dc4bd1p+0, 0x1.004657166a436p-57},
3413         {0x1.530001605277ap+0, -0x1.6bfcece233209p-54},
3414         {0x1.54fffecdb704cp+0, -0x1.902720505a1d7p-55},
3415         {0x1.56fffef5f54a9p+0, 0x1.bbfe60ec96412p-54},
3416         {0x1.5900017e61012p+0, 0x1.87ec581afef9p-55},
3417         {0x1.5b00003c93e92p+0, -0x1.f41080abf0ccp-54},
3418         {0x1.5d0001d4919bcp+0, -0x1.8812afb254729p-54},
3419         {0x1.5efffe7b87a89p+0, -0x1.47eb780ed6904p-54}
3420     };
3421
3422     double w, z, r, r2, r3, y, invc, logc, kd, hi, lo;
3423     UINT64 ix, iz, tmp;
3424     UINT32 top;
3425     int k, i;
3426
3427     ix = *(UINT64*)&x;
3428     top = ix >> 48;
3429     if (ix - 0x3fee000000000000ULL < 0x3090000000000ULL) {
3430         double rhi, rlo;
3431
3432         /* Handle close to 1.0 inputs separately. */
3433         /* Fix sign of zero with downward rounding when x==1. */
3434         if (ix == 0x3ff0000000000000ULL)
3435             return 0;
3436         r = x - 1.0;
3437         r2 = r * r;
3438         r3 = r * r2;
3439         y = r3 * (B[1] + r * B[2] + r2 * B[3] + r3 * (B[4] + r * B[5] + r2 * B[6] +
3440                     r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10])));
3441         /* Worst-case error is around 0.507 ULP. */
3442         w = r * 0x1p27;
3443         rhi = r + w - w;
3444         rlo = r - rhi;
3445         w = rhi * rhi * B[0]; /* B[0] == -0.5. */
3446         hi = r + w;
3447         lo = r - hi + w;
3448         lo += B[0] * rlo * (rhi + r);
3449         y += lo;
3450         y += hi;
3451         return y;
3452     }
3453     if (top - 0x0010 >= 0x7ff0 - 0x0010) {
3454         /* x < 0x1p-1022 or inf or nan. */
3455         if (ix * 2 == 0)
3456             return math_error(_SING, "log", x, 0, (top & 0x8000 ? 1.0 : -1.0) / x);
3457         if (ix == 0x7ff0000000000000ULL) /* log(inf) == inf. */
3458             return x;
3459         if ((top & 0x7ff0) == 0x7ff0 && (ix & 0xfffffffffffffULL))
3460             return x;
3461         if (top & 0x8000)
3462             return math_error(_DOMAIN, "log", x, 0, (x - x) / (x - x));
3463         /* x is subnormal, normalize it. */
3464         x *= 0x1p52;
3465         ix = *(UINT64*)&x;
3466         ix -= 52ULL << 52;
3467     }
3468
3469     /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
3470        The range is split into N subintervals.
3471        The ith subinterval contains z and c is near its center. */
3472     tmp = ix - 0x3fe6000000000000ULL;
3473     i = (tmp >> (52 - 7)) % (1 << 7);
3474     k = (INT64)tmp >> 52; /* arithmetic shift */
3475     iz = ix - (tmp & 0xfffULL << 52);
3476     invc = T[i].invc;
3477     logc = T[i].logc;
3478     z = *(double*)&iz;
3479
3480     /* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */
3481     /* r ~= z/c - 1, |r| < 1/(2*N). */
3482     r = (z - T2[i].chi - T2[i].clo) * invc;
3483     kd = (double)k;
3484
3485     /* hi + lo = r + log(c) + k*Ln2. */
3486     w = kd * Ln2hi + logc;
3487     hi = w + r;
3488     lo = w - hi + r + kd * Ln2lo;
3489
3490     /* log(x) = lo + (log1p(r) - r) + hi. */
3491     r2 = r * r; /* rounding error: 0x1p-54/N^2. */
3492     /* Worst case error if |y| > 0x1p-5:
3493        0.5 + 4.13/N + abs-poly-error*2^57 ULP (+ 0.002 ULP without fma)
3494        Worst case error if |y| > 0x1p-4:
3495        0.5 + 2.06/N + abs-poly-error*2^56 ULP (+ 0.001 ULP without fma). */
3496     y = lo + r2 * A[0] +
3497         r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])) + hi;
3498     return y;
3499 }
3500
3501 /*********************************************************************
3502  *              log10 (MSVCRT.@)
3503  */
3504 double CDECL log10( double x )
3505 {
3506     static const double ivln10hi = 4.34294481878168880939e-01,
3507         ivln10lo = 2.50829467116452752298e-11,
3508         log10_2hi = 3.01029995663611771306e-01,
3509         log10_2lo = 3.69423907715893078616e-13,
3510         Lg1 = 6.666666666666735130e-01,
3511         Lg2 = 3.999999999940941908e-01,
3512         Lg3 = 2.857142874366239149e-01,
3513         Lg4 = 2.222219843214978396e-01,
3514         Lg5 = 1.818357216161805012e-01,
3515         Lg6 = 1.531383769920937332e-01,
3516         Lg7 = 1.479819860511658591e-01;
3517
3518     union {double f; UINT64 i;} u = {x};
3519     double hfsq, f, s, z, R, w, t1, t2, dk, y, hi, lo, val_hi, val_lo;
3520     UINT32 hx;
3521     int k;
3522
3523     hx = u.i >> 32;
3524     k = 0;
3525     if (hx < 0x00100000 || hx >> 31) {
3526         if (u.i << 1 == 0)
3527             return math_error(_SING, "log10", x, 0, -1 / (x * x));
3528         if ((u.i & ~(1ULL << 63)) > 0x7ff0000000000000ULL)
3529             return x;
3530         if (hx >> 31)
3531             return math_error(_DOMAIN, "log10", x, 0, (x - x) / (x - x));
3532         /* subnormal number, scale x up */
3533         k -= 54;
3534         x *= 0x1p54;
3535         u.f = x;
3536         hx = u.i >> 32;
3537     } else if (hx >= 0x7ff00000) {
3538         return x;
3539     } else if (hx == 0x3ff00000 && u.i<<32 == 0)
3540         return 0;
3541
3542     /* reduce x into [sqrt(2)/2, sqrt(2)] */
3543     hx += 0x3ff00000 - 0x3fe6a09e;
3544     k += (int)(hx >> 20) - 0x3ff;
3545     hx = (hx & 0x000fffff) + 0x3fe6a09e;
3546     u.i = (UINT64)hx << 32 | (u.i & 0xffffffff);
3547     x = u.f;
3548
3549     f = x - 1.0;
3550     hfsq = 0.5 * f * f;
3551     s = f / (2.0 + f);
3552     z = s * s;
3553     w = z * z;
3554     t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
3555     t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
3556     R = t2 + t1;
3557
3558     /* hi+lo = f - hfsq + s*(hfsq+R) ~ log(1+f) */
3559     hi = f - hfsq;
3560     u.f = hi;
3561     u.i &= (UINT64)-1 << 32;
3562     hi = u.f;
3563     lo = f - hi - hfsq + s * (hfsq + R);
3564
3565     /* val_hi+val_lo ~ log10(1+f) + k*log10(2) */
3566     val_hi = hi * ivln10hi;
3567     dk = k;
3568     y = dk * log10_2hi;
3569     val_lo = dk * log10_2lo + (lo + hi) * ivln10lo + lo * ivln10hi;
3570
3571     /*
3572      * Extra precision in for adding y is not strictly needed
3573      * since there is no very large cancellation near x = sqrt(2) or
3574      * x = 1/sqrt(2), but we do it anyway since it costs little on CPUs
3575      * with some parallelism and it reduces the error for many args.
3576      */
3577     w = y + val_hi;
3578     val_lo += (y - w) + val_hi;
3579     val_hi = w;
3580
3581     return val_lo + val_hi;
3582 }
3583
3584 /* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about
3585    additional 15 bits precision. IX is the bit representation of x, but
3586    normalized in the subnormal range using the sign bit for the exponent. */
3587 static double pow_log(UINT64 ix, double *tail)
3588 {
3589     static const struct {
3590         double invc, logc, logctail;
3591     } T[] = {
3592         {0x1.6a00000000000p+0, -0x1.62c82f2b9c800p-2, 0x1.ab42428375680p-48},
3593         {0x1.6800000000000p+0, -0x1.5d1bdbf580800p-2, -0x1.ca508d8e0f720p-46},
3594         {0x1.6600000000000p+0, -0x1.5767717455800p-2, -0x1.362a4d5b6506dp-45},
3595         {0x1.6400000000000p+0, -0x1.51aad872df800p-2, -0x1.684e49eb067d5p-49},
3596         {0x1.6200000000000p+0, -0x1.4be5f95777800p-2, -0x1.41b6993293ee0p-47},
3597         {0x1.6000000000000p+0, -0x1.4618bc21c6000p-2, 0x1.3d82f484c84ccp-46},
3598         {0x1.5e00000000000p+0, -0x1.404308686a800p-2, 0x1.c42f3ed820b3ap-50},
3599         {0x1.5c00000000000p+0, -0x1.3a64c55694800p-2, 0x1.0b1c686519460p-45},
3600         {0x1.5a00000000000p+0, -0x1.347dd9a988000p-2, 0x1.5594dd4c58092p-45},
3601         {0x1.5800000000000p+0, -0x1.2e8e2bae12000p-2, 0x1.67b1e99b72bd8p-45},
3602         {0x1.5600000000000p+0, -0x1.2895a13de8800p-2, 0x1.5ca14b6cfb03fp-46},
3603         {0x1.5600000000000p+0, -0x1.2895a13de8800p-2, 0x1.5ca14b6cfb03fp-46},
3604         {0x1.5400000000000p+0, -0x1.22941fbcf7800p-2, -0x1.65a242853da76p-46},
3605         {0x1.5200000000000p+0, -0x1.1c898c1699800p-2, -0x1.fafbc68e75404p-46},
3606         {0x1.5000000000000p+0, -0x1.1675cababa800p-2, 0x1.f1fc63382a8f0p-46},
3607         {0x1.4e00000000000p+0, -0x1.1058bf9ae4800p-2, -0x1.6a8c4fd055a66p-45},
3608         {0x1.4c00000000000p+0, -0x1.0a324e2739000p-2, -0x1.c6bee7ef4030ep-47},
3609         {0x1.4a00000000000p+0, -0x1.0402594b4d000p-2, -0x1.036b89ef42d7fp-48},
3610         {0x1.4a00000000000p+0, -0x1.0402594b4d000p-2, -0x1.036b89ef42d7fp-48},
3611         {0x1.4800000000000p+0, -0x1.fb9186d5e4000p-3, 0x1.d572aab993c87p-47},
3612         {0x1.4600000000000p+0, -0x1.ef0adcbdc6000p-3, 0x1.b26b79c86af24p-45},
3613         {0x1.4400000000000p+0, -0x1.e27076e2af000p-3, -0x1.72f4f543fff10p-46},
3614         {0x1.4200000000000p+0, -0x1.d5c216b4fc000p-3, 0x1.1ba91bbca681bp-45},
3615         {0x1.4000000000000p+0, -0x1.c8ff7c79aa000p-3, 0x1.7794f689f8434p-45},
3616         {0x1.4000000000000p+0, -0x1.c8ff7c79aa000p-3, 0x1.7794f689f8434p-45},
3617         {0x1.3e00000000000p+0, -0x1.bc286742d9000p-3, 0x1.94eb0318bb78fp-46},
3618         {0x1.3c00000000000p+0, -0x1.af3c94e80c000p-3, 0x1.a4e633fcd9066p-52},
3619         {0x1.3a00000000000p+0, -0x1.a23bc1fe2b000p-3, -0x1.58c64dc46c1eap-45},
3620         {0x1.3a00000000000p+0, -0x1.a23bc1fe2b000p-3, -0x1.58c64dc46c1eap-45},
3621         {0x1.3800000000000p+0, -0x1.9525a9cf45000p-3, -0x1.ad1d904c1d4e3p-45},
3622         {0x1.3600000000000p+0, -0x1.87fa06520d000p-3, 0x1.bbdbf7fdbfa09p-45},
3623         {0x1.3400000000000p+0, -0x1.7ab890210e000p-3, 0x1.bdb9072534a58p-45},
3624         {0x1.3400000000000p+0, -0x1.7ab890210e000p-3, 0x1.bdb9072534a58p-45},
3625         {0x1.3200000000000p+0, -0x1.6d60fe719d000p-3, -0x1.0e46aa3b2e266p-46},
3626         {0x1.3000000000000p+0, -0x1.5ff3070a79000p-3, -0x1.e9e439f105039p-46},
3627         {0x1.3000000000000p+0, -0x1.5ff3070a79000p-3, -0x1.e9e439f105039p-46},
3628         {0x1.2e00000000000p+0, -0x1.526e5e3a1b000p-3, -0x1.0de8b90075b8fp-45},
3629         {0x1.2c00000000000p+0, -0x1.44d2b6ccb8000p-3, 0x1.70cc16135783cp-46},
3630         {0x1.2c00000000000p+0, -0x1.44d2b6ccb8000p-3, 0x1.70cc16135783cp-46},
3631         {0x1.2a00000000000p+0, -0x1.371fc201e9000p-3, 0x1.178864d27543ap-48},
3632         {0x1.2800000000000p+0, -0x1.29552f81ff000p-3, -0x1.48d301771c408p-45},
3633         {0x1.2600000000000p+0, -0x1.1b72ad52f6000p-3, -0x1.e80a41811a396p-45},
3634         {0x1.2600000000000p+0, -0x1.1b72ad52f6000p-3, -0x1.e80a41811a396p-45},
3635         {0x1.2400000000000p+0, -0x1.0d77e7cd09000p-3, 0x1.a699688e85bf4p-47},
3636         {0x1.2400000000000p+0, -0x1.0d77e7cd09000p-3, 0x1.a699688e85bf4p-47},
3637         {0x1.2200000000000p+0, -0x1.fec9131dbe000p-4, -0x1.575545ca333f2p-45},
3638         {0x1.2000000000000p+0, -0x1.e27076e2b0000p-4, 0x1.a342c2af0003cp-45},
3639         {0x1.2000000000000p+0, -0x1.e27076e2b0000p-4, 0x1.a342c2af0003cp-45},
3640         {0x1.1e00000000000p+0, -0x1.c5e548f5bc000p-4, -0x1.d0c57585fbe06p-46},
3641         {0x1.1c00000000000p+0, -0x1.a926d3a4ae000p-4, 0x1.53935e85baac8p-45},
3642         {0x1.1c00000000000p+0, -0x1.a926d3a4ae000p-4, 0x1.53935e85baac8p-45},
3643         {0x1.1a00000000000p+0, -0x1.8c345d631a000p-4, 0x1.37c294d2f5668p-46},
3644         {0x1.1a00000000000p+0, -0x1.8c345d631a000p-4, 0x1.37c294d2f5668p-46},
3645         {0x1.1800000000000p+0, -0x1.6f0d28ae56000p-4, -0x1.69737c93373dap-45},
3646         {0x1.1600000000000p+0, -0x1.51b073f062000p-4, 0x1.f025b61c65e57p-46},
3647         {0x1.1600000000000p+0, -0x1.51b073f062000p-4, 0x1.f025b61c65e57p-46},
3648         {0x1.1400000000000p+0, -0x1.341d7961be000p-4, 0x1.c5edaccf913dfp-45},
3649         {0x1.1400000000000p+0, -0x1.341d7961be000p-4, 0x1.c5edaccf913dfp-45},
3650         {0x1.1200000000000p+0, -0x1.16536eea38000p-4, 0x1.47c5e768fa309p-46},
3651         {0x1.1000000000000p+0, -0x1.f0a30c0118000p-5, 0x1.d599e83368e91p-45},
3652         {0x1.1000000000000p+0, -0x1.f0a30c0118000p-5, 0x1.d599e83368e91p-45},
3653         {0x1.0e00000000000p+0, -0x1.b42dd71198000p-5, 0x1.c827ae5d6704cp-46},
3654         {0x1.0e00000000000p+0, -0x1.b42dd71198000p-5, 0x1.c827ae5d6704cp-46},
3655         {0x1.0c00000000000p+0, -0x1.77458f632c000p-5, -0x1.cfc4634f2a1eep-45},
3656         {0x1.0c00000000000p+0, -0x1.77458f632c000p-5, -0x1.cfc4634f2a1eep-45},
3657         {0x1.0a00000000000p+0, -0x1.39e87b9fec000p-5, 0x1.502b7f526feaap-48},
3658         {0x1.0a00000000000p+0, -0x1.39e87b9fec000p-5, 0x1.502b7f526feaap-48},
3659         {0x1.0800000000000p+0, -0x1.f829b0e780000p-6, -0x1.980267c7e09e4p-45},
3660         {0x1.0800000000000p+0, -0x1.f829b0e780000p-6, -0x1.980267c7e09e4p-45},
3661         {0x1.0600000000000p+0, -0x1.7b91b07d58000p-6, -0x1.88d5493faa639p-45},
3662         {0x1.0400000000000p+0, -0x1.fc0a8b0fc0000p-7, -0x1.f1e7cf6d3a69cp-50},
3663         {0x1.0400000000000p+0, -0x1.fc0a8b0fc0000p-7, -0x1.f1e7cf6d3a69cp-50},
3664         {0x1.0200000000000p+0, -0x1.fe02a6b100000p-8, -0x1.9e23f0dda40e4p-46},
3665         {0x1.0200000000000p+0, -0x1.fe02a6b100000p-8, -0x1.9e23f0dda40e4p-46},
3666         {0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x0.0000000000000p+0},
3667         {0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x0.0000000000000p+0},
3668         {0x1.fc00000000000p-1, 0x1.0101575890000p-7, -0x1.0c76b999d2be8p-46},
3669         {0x1.f800000000000p-1, 0x1.0205658938000p-6, -0x1.3dc5b06e2f7d2p-45},
3670         {0x1.f400000000000p-1, 0x1.8492528c90000p-6, -0x1.aa0ba325a0c34p-45},
3671         {0x1.f000000000000p-1, 0x1.0415d89e74000p-5, 0x1.111c05cf1d753p-47},
3672         {0x1.ec00000000000p-1, 0x1.466aed42e0000p-5, -0x1.c167375bdfd28p-45},
3673         {0x1.e800000000000p-1, 0x1.894aa149fc000p-5, -0x1.97995d05a267dp-46},
3674         {0x1.e400000000000p-1, 0x1.ccb73cdddc000p-5, -0x1.a68f247d82807p-46},
3675         {0x1.e200000000000p-1, 0x1.eea31c006c000p-5, -0x1.e113e4fc93b7bp-47},
3676         {0x1.de00000000000p-1, 0x1.1973bd1466000p-4, -0x1.5325d560d9e9bp-45},
3677         {0x1.da00000000000p-1, 0x1.3bdf5a7d1e000p-4, 0x1.cc85ea5db4ed7p-45},
3678         {0x1.d600000000000p-1, 0x1.5e95a4d97a000p-4, -0x1.c69063c5d1d1ep-45},
3679         {0x1.d400000000000p-1, 0x1.700d30aeac000p-4, 0x1.c1e8da99ded32p-49},
3680         {0x1.d000000000000p-1, 0x1.9335e5d594000p-4, 0x1.3115c3abd47dap-45},
3681         {0x1.cc00000000000p-1, 0x1.b6ac88dad6000p-4, -0x1.390802bf768e5p-46},
3682         {0x1.ca00000000000p-1, 0x1.c885801bc4000p-4, 0x1.646d1c65aacd3p-45},
3683         {0x1.c600000000000p-1, 0x1.ec739830a2000p-4, -0x1.dc068afe645e0p-45},
3684         {0x1.c400000000000p-1, 0x1.fe89139dbe000p-4, -0x1.534d64fa10afdp-45},
3685         {0x1.c000000000000p-1, 0x1.1178e8227e000p-3, 0x1.1ef78ce2d07f2p-45},
3686         {0x1.be00000000000p-1, 0x1.1aa2b7e23f000p-3, 0x1.ca78e44389934p-45},
3687         {0x1.ba00000000000p-1, 0x1.2d1610c868000p-3, 0x1.39d6ccb81b4a1p-47},
3688         {0x1.b800000000000p-1, 0x1.365fcb0159000p-3, 0x1.62fa8234b7289p-51},
3689         {0x1.b400000000000p-1, 0x1.4913d8333b000p-3, 0x1.5837954fdb678p-45},
3690         {0x1.b200000000000p-1, 0x1.527e5e4a1b000p-3, 0x1.633e8e5697dc7p-45},
3691         {0x1.ae00000000000p-1, 0x1.6574ebe8c1000p-3, 0x1.9cf8b2c3c2e78p-46},
3692         {0x1.ac00000000000p-1, 0x1.6f0128b757000p-3, -0x1.5118de59c21e1p-45},
3693         {0x1.aa00000000000p-1, 0x1.7898d85445000p-3, -0x1.c661070914305p-46},
3694         {0x1.a600000000000p-1, 0x1.8beafeb390000p-3, -0x1.73d54aae92cd1p-47},
3695         {0x1.a400000000000p-1, 0x1.95a5adcf70000p-3, 0x1.7f22858a0ff6fp-47},
3696         {0x1.a000000000000p-1, 0x1.a93ed3c8ae000p-3, -0x1.8724350562169p-45},
3697         {0x1.9e00000000000p-1, 0x1.b31d8575bd000p-3, -0x1.c358d4eace1aap-47},
3698         {0x1.9c00000000000p-1, 0x1.bd087383be000p-3, -0x1.d4bc4595412b6p-45},
3699         {0x1.9a00000000000p-1, 0x1.c6ffbc6f01000p-3, -0x1.1ec72c5962bd2p-48},
3700         {0x1.9600000000000p-1, 0x1.db13db0d49000p-3, -0x1.aff2af715b035p-45},
3701         {0x1.9400000000000p-1, 0x1.e530effe71000p-3, 0x1.212276041f430p-51},
3702         {0x1.9200000000000p-1, 0x1.ef5ade4dd0000p-3, -0x1.a211565bb8e11p-51},
3703         {0x1.9000000000000p-1, 0x1.f991c6cb3b000p-3, 0x1.bcbecca0cdf30p-46},
3704         {0x1.8c00000000000p-1, 0x1.07138604d5800p-2, 0x1.89cdb16ed4e91p-48},
3705         {0x1.8a00000000000p-1, 0x1.0c42d67616000p-2, 0x1.7188b163ceae9p-45},
3706         {0x1.8800000000000p-1, 0x1.1178e8227e800p-2, -0x1.c210e63a5f01cp-45},
3707         {0x1.8600000000000p-1, 0x1.16b5ccbacf800p-2, 0x1.b9acdf7a51681p-45},
3708         {0x1.8400000000000p-1, 0x1.1bf99635a6800p-2, 0x1.ca6ed5147bdb7p-45},
3709         {0x1.8200000000000p-1, 0x1.214456d0eb800p-2, 0x1.a87deba46baeap-47},
3710         {0x1.7e00000000000p-1, 0x1.2bef07cdc9000p-2, 0x1.a9cfa4a5004f4p-45},
3711         {0x1.7c00000000000p-1, 0x1.314f1e1d36000p-2, -0x1.8e27ad3213cb8p-45},
3712         {0x1.7a00000000000p-1, 0x1.36b6776be1000p-2, 0x1.16ecdb0f177c8p-46},
3713         {0x1.7800000000000p-1, 0x1.3c25277333000p-2, 0x1.83b54b606bd5cp-46},
3714         {0x1.7600000000000p-1, 0x1.419b423d5e800p-2, 0x1.8e436ec90e09dp-47},
3715         {0x1.7400000000000p-1, 0x1.4718dc271c800p-2, -0x1.f27ce0967d675p-45},
3716         {0x1.7200000000000p-1, 0x1.4c9e09e173000p-2, -0x1.e20891b0ad8a4p-45},
3717         {0x1.7000000000000p-1, 0x1.522ae0738a000p-2, 0x1.ebe708164c759p-45},
3718         {0x1.6e00000000000p-1, 0x1.57bf753c8d000p-2, 0x1.fadedee5d40efp-46},
3719         {0x1.6c00000000000p-1, 0x1.5d5bddf596000p-2, -0x1.a0b2a08a465dcp-47},
3720     };
3721     static const double A[] = {
3722         -0x1p-1,
3723         0x1.555555555556p-2 * -2,
3724         -0x1.0000000000006p-2 * -2,
3725         0x1.999999959554ep-3 * 4,
3726         -0x1.555555529a47ap-3 * 4,
3727         0x1.2495b9b4845e9p-3 * -8,
3728         -0x1.0002b8b263fc3p-3 * -8
3729     };
3730     static const double ln2hi = 0x1.62e42fefa3800p-1,
3731         ln2lo = 0x1.ef35793c76730p-45;
3732
3733     double z, r, y, invc, logc, logctail, kd, hi, t1, t2, lo, lo1, lo2, p;
3734     double zhi, zlo, rhi, rlo, ar, ar2, ar3, lo3, lo4, arhi, arhi2;
3735     UINT64 iz, tmp;
3736     int k, i;
3737
3738     /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
3739        The range is split into N subintervals.
3740        The ith subinterval contains z and c is near its center. */
3741     tmp = ix - 0x3fe6955500000000ULL;
3742     i = (tmp >> (52 - 7)) % (1 << 7);
3743     k = (INT64)tmp >> 52; /* arithmetic shift */
3744     iz = ix - (tmp & 0xfffULL << 52);
3745     z = *(double*)&iz;
3746     kd = k;
3747
3748     /* log(x) = k*Ln2 + log(c) + log1p(z/c-1). */
3749     invc = T[i].invc;
3750     logc = T[i].logc;
3751     logctail = T[i].logctail;
3752
3753     /* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and
3754      |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible. */
3755     /* Split z such that rhi, rlo and rhi*rhi are exact and |rlo| <= |r|. */
3756     iz = (iz + (1ULL << 31)) & (-1ULL << 32);
3757     zhi = *(double*)&iz;
3758     zlo = z - zhi;
3759     rhi = zhi * invc - 1.0;
3760     rlo = zlo * invc;
3761     r = rhi + rlo;
3762
3763     /* k*Ln2 + log(c) + r. */
3764     t1 = kd * ln2hi + logc;
3765     t2 = t1 + r;
3766     lo1 = kd * ln2lo + logctail;
3767     lo2 = t1 - t2 + r;
3768
3769     /* Evaluation is optimized assuming superscalar pipelined execution. */
3770     ar = A[0] * r; /* A[0] = -0.5. */
3771     ar2 = r * ar;
3772     ar3 = r * ar2;
3773     /* k*Ln2 + log(c) + r + A[0]*r*r. */
3774     arhi = A[0] * rhi;
3775     arhi2 = rhi * arhi;
3776     hi = t2 + arhi2;
3777     lo3 = rlo * (ar + arhi);
3778     lo4 = t2 - hi + arhi2;
3779     /* p = log1p(r) - r - A[0]*r*r. */
3780     p = (ar3 * (A[1] + r * A[2] + ar2 * (A[3] + r * A[4] + ar2 * (A[5] + r * A[6]))));
3781     lo = lo1 + lo2 + lo3 + lo4 + p;
3782     y = hi + lo;
3783     *tail = hi - y + lo;
3784     return y;
3785 }
3786
3787 /* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
3788    The sign_bias argument is SIGN_BIAS or 0 and sets the sign to -1 or 1. */
3789 static double pow_exp(double argx, double argy, double x, double xtail, UINT32 sign_bias)
3790 {
3791     static const double C[] = {
3792         0x1.ffffffffffdbdp-2,
3793         0x1.555555555543cp-3,
3794         0x1.55555cf172b91p-5,
3795         0x1.1111167a4d017p-7
3796     };
3797     static const double invln2N = 0x1.71547652b82fep0 * (1 << 7),
3798         negln2hiN = -0x1.62e42fefa0000p-8,
3799         negln2loN = -0x1.cf79abc9e3b3ap-47;
3800
3801     UINT32 abstop;
3802     UINT64 ki, idx, top, sbits;
3803     double kd, z, r, r2, scale, tail, tmp;
3804
3805     abstop = (*(UINT64*)&x >> 52) & 0x7ff;
3806     if (abstop - 0x3c9 >= 0x408 - 0x3c9) {
3807         if (abstop - 0x3c9 >= 0x80000000) {
3808             /* Avoid spurious underflow for tiny x. */
3809             /* Note: 0 is common input. */
3810             double one = 1.0 + x;
3811             return sign_bias ? -one : one;
3812         }
3813         if (abstop >= 0x409) {
3814             /* Note: inf and nan are already handled. */
3815             if (*(UINT64*)&x >> 63)
3816                 return math_error(_UNDERFLOW, "pow", argx, argy, (sign_bias ? -DBL_MIN : DBL_MIN) * DBL_MIN);
3817             return math_error(_OVERFLOW, "pow", argx, argy, (sign_bias ? -DBL_MAX : DBL_MAX) * DBL_MAX);
3818         }
3819         /* Large x is special cased below. */
3820         abstop = 0;
3821     }
3822
3823     /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
3824     /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
3825     z = invln2N * x;
3826     kd = __round(z);
3827     ki = kd;
3828     r = x + kd * negln2hiN + kd * negln2loN;
3829     /* The code assumes 2^-200 < |xtail| < 2^-8/N. */
3830     r += xtail;
3831     /* 2^(k/N) ~= scale * (1 + tail). */
3832     idx = 2 * (ki % (1 << 7));
3833     top = (ki + sign_bias) << (52 - 7);
3834     tail = *(double*)&exp_T[idx];
3835     /* This is only a valid scale when -1023*N < k < 1024*N. */
3836     sbits = exp_T[idx + 1] + top;
3837     /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
3838     /* Evaluation is optimized assuming superscalar pipelined execution. */
3839     r2 = r * r;
3840     /* Without fma the worst case error is 0.25/N ulp larger. */
3841     /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
3842     tmp = tail + r + r2 * (C[0] + r * C[1]) + r2 * r2 * (C[2] + r * C[3]);
3843     if (abstop == 0) {
3844         /* Handle cases that may overflow or underflow when computing the result that
3845            is scale*(1+TMP) without intermediate rounding. The bit representation of
3846            scale is in SBITS, however it has a computed exponent that may have
3847            overflown into the sign bit so that needs to be adjusted before using it as
3848            a double. (int32_t)KI is the k used in the argument reduction and exponent
3849            adjustment of scale, positive k here means the result may overflow and
3850            negative k means the result may underflow. */
3851         double scale, y;
3852
3853         if ((ki & 0x80000000) == 0) {
3854             /* k > 0, the exponent of scale might have overflowed by <= 460. */
3855             sbits -= 1009ull << 52;
3856             scale = *(double*)&sbits;
3857             y = 0x1p1009 * (scale + scale * tmp);
3858             if (isinf(y))
3859                 return math_error(_OVERFLOW, "pow", argx, argy, y);
3860             return y;
3861         }
3862         /* k < 0, need special care in the subnormal range. */
3863         sbits += 1022ull << 52;
3864         /* Note: sbits is signed scale. */
3865         scale = *(double*)&sbits;
3866         y = scale + scale * tmp;
3867         if (fabs(y) < 1.0) {
3868             /* Round y to the right precision before scaling it into the subnormal
3869                range to avoid double rounding that can cause 0.5+E/2 ulp error where
3870                E is the worst-case ulp error outside the subnormal range. So this
3871                is only useful if the goal is better than 1 ulp worst-case error. */
3872             double hi, lo, one = 1.0;
3873             if (y < 0.0)
3874                 one = -1.0;
3875             lo = scale - y + scale * tmp;
3876             hi = one + y;
3877             lo = one - hi + y + lo;
3878             y = hi + lo - one;
3879             /* Fix the sign of 0. */
3880             if (y == 0.0) {
3881                 sbits &= 0x8000000000000000ULL;
3882                 y = *(double*)&sbits;
3883             }
3884             /* The underflow exception needs to be signaled explicitly. */
3885             fp_barrier(fp_barrier(0x1p-1022) * 0x1p-1022);
3886             y = 0x1p-1022 * y;
3887             return math_error(_UNDERFLOW, "pow", argx, argy, y);
3888         }
3889         y = 0x1p-1022 * y;
3890         return y;
3891     }
3892     scale = *(double*)&sbits;
3893     /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
3894        is no spurious underflow here even without fma. */
3895     return scale + scale * tmp;
3896 }
3897
3898 /* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
3899    the bit representation of a non-zero finite floating-point value. */
3900 static inline int pow_checkint(UINT64 iy)
3901 {
3902     int e = iy >> 52 & 0x7ff;
3903     if (e < 0x3ff)
3904         return 0;
3905     if (e > 0x3ff + 52)
3906         return 2;
3907     if (iy & ((1ULL << (0x3ff + 52 - e)) - 1))
3908         return 0;
3909     if (iy & (1ULL << (0x3ff + 52 - e)))
3910         return 1;
3911     return 2;
3912 }
3913
3914 /*********************************************************************
3915  *              pow (MSVCRT.@)
3916  *
3917  * Copied from musl: src/math/pow.c
3918  */
3919 double CDECL pow( double x, double y )
3920 {
3921     UINT32 sign_bias = 0;
3922     UINT64 ix, iy;
3923     UINT32 topx, topy;
3924     double lo, hi, ehi, elo, yhi, ylo, lhi, llo;
3925
3926     ix = *(UINT64*)&x;
3927     iy = *(UINT64*)&y;
3928     topx = ix >> 52;
3929     topy = iy >> 52;
3930     if (topx - 0x001 >= 0x7ff - 0x001 ||
3931             (topy & 0x7ff) - 0x3be >= 0x43e - 0x3be) {
3932         /* Note: if |y| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then pow(x,y) = inf/0
3933            and if |y| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then pow(x,y) = +-1. */
3934         /* Special cases: (x < 0x1p-126 or inf or nan) or
3935            (|y| < 0x1p-65 or |y| >= 0x1p63 or nan). */
3936         if (2 * iy - 1 >= 2 * 0x7ff0000000000000ULL - 1) {
3937             if (2 * iy == 0)
3938                 return 1.0;
3939             if (ix == 0x3ff0000000000000ULL)
3940                 return 1.0;
3941             if (2 * ix > 2 * 0x7ff0000000000000ULL ||
3942                     2 * iy > 2 * 0x7ff0000000000000ULL)
3943                 return x + y;
3944             if (2 * ix == 2 * 0x3ff0000000000000ULL)
3945                 return 1.0;
3946             if ((2 * ix < 2 * 0x3ff0000000000000ULL) == !(iy >> 63))
3947                 return 0.0; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
3948             return y * y;
3949         }
3950         if (2 * ix - 1 >= 2 * 0x7ff0000000000000ULL - 1) {
3951             double x2 = x * x;
3952             if (ix >> 63 && pow_checkint(iy) == 1)
3953                 x2 = -x2;
3954             if (iy & 0x8000000000000000ULL && x2 == 0.0)
3955                 return math_error(_SING, "pow", x, y, 1 / x2);
3956             /* Without the barrier some versions of clang hoist the 1/x2 and
3957                thus division by zero exception can be signaled spuriously. */
3958             return iy >> 63 ? fp_barrier(1 / x2) : x2;
3959         }
3960         /* Here x and y are non-zero finite. */
3961         if (ix >> 63) {
3962             /* Finite x < 0. */
3963             int yint = pow_checkint(iy);
3964             if (yint == 0)
3965                 return math_error(_DOMAIN, "pow", x, y, 0 / (x - x));
3966             if (yint == 1)
3967                 sign_bias = 0x800 << 7;
3968             ix &= 0x7fffffffffffffff;
3969             topx &= 0x7ff;
3970         }
3971         if ((topy & 0x7ff) - 0x3be >= 0x43e - 0x3be) {
3972             /* Note: sign_bias == 0 here because y is not odd. */
3973             if (ix == 0x3ff0000000000000ULL)
3974                 return 1.0;
3975             if ((topy & 0x7ff) < 0x3be) {
3976                 /* |y| < 2^-65, x^y ~= 1 + y*log(x). */
3977                 return ix > 0x3ff0000000000000ULL ? 1.0 + y : 1.0 - y;
3978             }
3979             if ((ix > 0x3ff0000000000000ULL) == (topy < 0x800))
3980                 return math_error(_OVERFLOW, "pow", x, y, fp_barrier(DBL_MAX) * DBL_MAX);
3981             return math_error(_UNDERFLOW, "pow", x, y, fp_barrier(DBL_MIN) * DBL_MIN);
3982         }
3983         if (topx == 0) {
3984             /* Normalize subnormal x so exponent becomes negative. */
3985             x *= 0x1p52;
3986             ix = *(UINT64*)&x;
3987             ix &= 0x7fffffffffffffff;
3988             ix -= 52ULL << 52;
3989         }
3990     }
3991
3992     hi = pow_log(ix, &lo);
3993     iy &= -1ULL << 27;
3994     yhi = *(double*)&iy;
3995     ylo = y - yhi;
3996     *(UINT64*)&lhi = *(UINT64*)&hi & -1ULL << 27;
3997     llo = fp_barrier(hi - lhi + lo);
3998     ehi = yhi * lhi;
3999     elo = ylo * lhi + y * llo; /* |elo| < |ehi| * 2^-25. */
4000     return pow_exp(x, y, ehi, elo, sign_bias);
4001 }
4002
4003 /*********************************************************************
4004  *              sin (MSVCRT.@)
4005  *
4006  * Copied from musl: src/math/sin.c
4007  */
4008 double CDECL sin( double x )
4009 {
4010     double y[2];
4011     UINT32 ix;
4012     unsigned n;
4013
4014     ix = *(ULONGLONG*)&x >> 32;
4015     ix &= 0x7fffffff;
4016
4017     /* |x| ~< pi/4 */
4018     if (ix <= 0x3fe921fb) {
4019         if (ix < 0x3e500000) { /* |x| < 2**-26 */
4020             /* raise inexact if x != 0 and underflow if subnormal*/
4021             fp_barrier(ix < 0x00100000 ? x/0x1p120f : x+0x1p120f);
4022             return x;
4023         }
4024         return __sin(x, 0.0, 0);
4025     }
4026
4027     /* sin(Inf or NaN) is NaN */
4028     if (isinf(x))
4029         return math_error(_DOMAIN, "sin", x, 0, x - x);
4030     if (ix >= 0x7ff00000)
4031         return x - x;
4032
4033     /* argument reduction needed */
4034     n = __rem_pio2(x, y);
4035     switch (n&3) {
4036     case 0: return  __sin(y[0], y[1], 1);
4037     case 1: return  __cos(y[0], y[1]);
4038     case 2: return -__sin(y[0], y[1], 1);
4039     default: return -__cos(y[0], y[1]);
4040     }
4041 }
4042
4043 /*********************************************************************
4044  *              sinh (MSVCRT.@)
4045  */
4046 double CDECL sinh( double x )
4047 {
4048     UINT64 ux = *(UINT64*)&x;
4049     UINT32 w;
4050     double t, h, absx;
4051
4052     h = 0.5;
4053     if (ux >> 63)
4054         h = -h;
4055     /* |x| */
4056     ux &= (UINT64)-1 / 2;
4057     absx = *(double*)&ux;
4058     w = ux >> 32;
4059
4060     /* |x| < log(DBL_MAX) */
4061     if (w < 0x40862e42) {
4062         t = __expm1(absx);
4063         if (w < 0x3ff00000) {
4064             if (w < 0x3ff00000 - (26 << 20))
4065                 return x;
4066             return h * (2 * t - t * t / (t + 1));
4067         }
4068         return h * (t + t / (t + 1));
4069     }
4070
4071     /* |x| > log(DBL_MAX) or nan */
4072     /* note: the result is stored to handle overflow */
4073     t = __expo2(absx, 2 * h);
4074     return t;
4075 }
4076
4077 static BOOL sqrt_validate( double *x, BOOL update_sw )
4078 {
4079     short c = _dclass(*x);
4080
4081     if (c == FP_ZERO) return FALSE;
4082     if (c == FP_NAN)
4083     {
4084 #ifdef __i386__
4085         if (update_sw)
4086             *x = math_error(_DOMAIN, "sqrt", *x, 0, *x);
4087 #else
4088         /* set signaling bit */
4089         *(ULONGLONG*)x |= 0x8000000000000ULL;
4090 #endif
4091         return FALSE;
4092     }
4093     if (signbit(*x))
4094     {
4095         *x = math_error(_DOMAIN, "sqrt", *x, 0, ret_nan(update_sw));
4096         return FALSE;
4097     }
4098     if (c == FP_INFINITE) return FALSE;
4099     return TRUE;
4100 }
4101
4102 #if defined(__x86_64__) || defined(__i386__)
4103 double CDECL sse2_sqrt(double);
4104 __ASM_GLOBAL_FUNC( sse2_sqrt,
4105         "sqrtsd %xmm0, %xmm0\n\t"
4106         "ret" )
4107 #endif
4108
4109 #ifdef __i386__
4110 double CDECL x87_sqrt(double);
4111 __ASM_GLOBAL_FUNC( x87_sqrt,
4112         "fldl 4(%esp)\n\t"
4113         SET_X87_CW(0xc00)
4114         "fsqrt\n\t"
4115         RESET_X87_CW
4116         "ret" )
4117 #endif
4118
4119 /*********************************************************************
4120  *              sqrt (MSVCRT.@)
4121  *
4122  * Copied from musl: src/math/sqrt.c
4123  */
4124 double CDECL sqrt( double x )
4125 {
4126 #ifdef __x86_64__
4127     if (!sqrt_validate(&x, TRUE))
4128         return x;
4129
4130     return sse2_sqrt(x);
4131 #elif defined( __i386__ )
4132     if (!sqrt_validate(&x, TRUE))
4133         return x;
4134
4135     return x87_sqrt(x);
4136 #else
4137     static const double tiny = 1.0e-300;
4138
4139     double z;
4140     int sign = 0x80000000;
4141     int ix0,s0,q,m,t,i;
4142     unsigned int r,t1,s1,ix1,q1;
4143     ULONGLONG ix;
4144
4145     if (!sqrt_validate(&x, TRUE))
4146         return x;
4147
4148     ix = *(ULONGLONG*)&x;
4149     ix0 = ix >> 32;
4150     ix1 = ix;
4151
4152     /* normalize x */
4153     m = ix0 >> 20;
4154     if (m == 0) {  /* subnormal x */
4155         while (ix0 == 0) {
4156             m -= 21;
4157             ix0 |= (ix1 >> 11);
4158             ix1 <<= 21;
4159         }
4160         for (i=0; (ix0 & 0x00100000) == 0; i++)
4161             ix0 <<= 1;
4162         m -= i - 1;
4163         ix0 |= ix1 >> (32 - i);
4164         ix1 <<= i;
4165     }
4166     m -= 1023;    /* unbias exponent */
4167     ix0 = (ix0 & 0x000fffff) | 0x00100000;
4168     if (m & 1) {  /* odd m, double x to make it even */
4169         ix0 += ix0 + ((ix1 & sign) >> 31);
4170         ix1 += ix1;
4171     }
4172     m >>= 1;      /* m = [m/2] */
4173
4174     /* generate sqrt(x) bit by bit */
4175     ix0 += ix0 + ((ix1 & sign) >> 31);
4176     ix1 += ix1;
4177     q = q1 = s0 = s1 = 0;  /* [q,q1] = sqrt(x) */
4178     r = 0x00200000;        /* r = moving bit from right to left */
4179
4180     while (r != 0) {
4181         t = s0 + r;
4182         if (t <= ix0) {
4183             s0   = t + r;
4184             ix0 -= t;
4185             q   += r;
4186         }
4187         ix0 += ix0 + ((ix1 & sign) >> 31);
4188         ix1 += ix1;
4189         r >>= 1;
4190     }
4191
4192     r = sign;
4193     while (r != 0) {
4194         t1 = s1 + r;
4195         t  = s0;
4196         if (t < ix0 || (t == ix0 && t1 <= ix1)) {
4197             s1 = t1 + r;
4198             if ((t1&sign) == sign && (s1 & sign) == 0)
4199                 s0++;
4200             ix0 -= t;
4201             if (ix1 < t1)
4202                 ix0--;
4203             ix1 -= t1;
4204             q1 += r;
4205         }
4206         ix0 += ix0 + ((ix1 & sign) >> 31);
4207         ix1 += ix1;
4208         r >>= 1;
4209     }
4210
4211     /* use floating add to find out rounding direction */
4212     if ((ix0 | ix1) != 0) {
4213         z = 1.0 - tiny; /* raise inexact flag */
4214         if (z >= 1.0) {
4215             z = 1.0 + tiny;
4216             if (q1 == (unsigned int)0xffffffff) {
4217                 q1 = 0;
4218                 q++;
4219             } else if (z > 1.0) {
4220                 if (q1 == (unsigned int)0xfffffffe)
4221                     q++;
4222                 q1 += 2;
4223             } else
4224                 q1 += q1 & 1;
4225         }
4226     }
4227     ix0 = (q >> 1) + 0x3fe00000;
4228     ix1 = q1 >> 1;
4229     if (q & 1)
4230         ix1 |= sign;
4231     ix = ix0 + ((unsigned int)m << 20);
4232     ix <<= 32;
4233     ix |= ix1;
4234     return *(double*)&ix;
4235 #endif
4236 }
4237
4238 /* Copied from musl: src/math/__tan.c */
4239 static double __tan(double x, double y, int odd)
4240 {
4241     static const double T[] = {
4242         3.33333333333334091986e-01,
4243         1.33333333333201242699e-01,
4244         5.39682539762260521377e-02,
4245         2.18694882948595424599e-02,
4246         8.86323982359930005737e-03,
4247         3.59207910759131235356e-03,
4248         1.45620945432529025516e-03,
4249         5.88041240820264096874e-04,
4250         2.46463134818469906812e-04,
4251         7.81794442939557092300e-05,
4252         7.14072491382608190305e-05,
4253         -1.85586374855275456654e-05,
4254         2.59073051863633712884e-05,
4255     };
4256     static const double pio4 = 7.85398163397448278999e-01;
4257     static const double pio4lo = 3.06161699786838301793e-17;
4258
4259     double z, r, v, w, s, a, w0, a0;
4260     UINT32 hx;
4261     int big, sign;
4262
4263     hx = *(ULONGLONG*)&x >> 32;
4264     big = (hx & 0x7fffffff) >= 0x3FE59428; /* |x| >= 0.6744 */
4265     if (big) {
4266         sign = hx >> 31;
4267         if (sign) {
4268             x = -x;
4269             y = -y;
4270         }
4271         x = (pio4 - x) + (pio4lo - y);
4272         y = 0.0;
4273     }
4274     z = x * x;
4275     w = z * z;
4276     r = T[1] + w * (T[3] + w * (T[5] + w * (T[7] + w * (T[9] + w * T[11]))));
4277     v = z * (T[2] + w * (T[4] + w * (T[6] + w * (T[8] + w * (T[10] + w * T[12])))));
4278     s = z * x;
4279     r = y + z * (s * (r + v) + y) + s * T[0];
4280     w = x + r;
4281     if (big) {
4282         s = 1 - 2 * odd;
4283         v = s - 2.0 * (x + (r - w * w / (w + s)));
4284         return sign ? -v : v;
4285     }
4286     if (!odd)
4287         return w;
4288     /* -1.0/(x+r) has up to 2ulp error, so compute it accurately */
4289     w0 = w;
4290     *(LONGLONG*)&w0 = *(LONGLONG*)&w0 & 0xffffffff00000000ULL;
4291     v = r - (w0 - x);       /* w0+v = r+x */
4292     a0 = a = -1.0 / w;
4293     *(LONGLONG*)&a0 = *(LONGLONG*)&a0 & 0xffffffff00000000ULL;
4294     return a0 + a * (1.0 + a0 * w0 + a0 * v);
4295 }
4296
4297 /*********************************************************************
4298  *              tan (MSVCRT.@)
4299  *
4300  * Copied from musl: src/math/tan.c
4301  */
4302 double CDECL tan( double x )
4303 {
4304     double y[2];
4305     UINT32 ix;
4306     unsigned n;
4307
4308     ix = *(ULONGLONG*)&x >> 32;
4309     ix &= 0x7fffffff;
4310
4311     if (ix <= 0x3fe921fb) { /* |x| ~< pi/4 */
4312         if (ix < 0x3e400000) { /* |x| < 2**-27 */
4313             /* raise inexact if x!=0 and underflow if subnormal */
4314             fp_barrier(ix < 0x00100000 ? x / 0x1p120f : x + 0x1p120f);
4315             return x;
4316         }
4317         return __tan(x, 0.0, 0);
4318     }
4319
4320     if (isinf(x))
4321         return math_error(_DOMAIN, "tan", x, 0, x - x);
4322     if (ix >= 0x7ff00000)
4323         return x - x;
4324
4325     n = __rem_pio2(x, y);
4326     return __tan(y[0], y[1], n & 1);
4327 }
4328
4329 /*********************************************************************
4330  *              tanh (MSVCRT.@)
4331  */
4332 double CDECL tanh( double x )
4333 {
4334     UINT64 ui = *(UINT64*)&x;
4335     UINT32 w;
4336     int sign;
4337     double t;
4338
4339     /* x = |x| */
4340     sign = ui >> 63;
4341     ui &= (UINT64)-1 / 2;
4342     x = *(double*)&ui;
4343     w = ui >> 32;
4344
4345     if (w > 0x3fe193ea) {
4346         /* |x| > log(3)/2 ~= 0.5493 or nan */
4347         if (w > 0x40340000) {
4348 #if _MSVCR_VER < 140
4349             if (isnan(x))
4350                 return math_error(_DOMAIN, "tanh", x, 0, x);
4351 #endif
4352             /* |x| > 20 or nan */
4353             /* note: this branch avoids raising overflow */
4354             fp_barrier(x + 0x1p120f);
4355             t = 1 - 0 / x;
4356         } else {
4357             t = __expm1(2 * x);
4358             t = 1 - 2 / (t + 2);
4359         }
4360     } else if (w > 0x3fd058ae) {
4361         /* |x| > log(5/3)/2 ~= 0.2554 */
4362         t = __expm1(2 * x);
4363         t = t / (t + 2);
4364     } else if (w >= 0x00100000) {
4365         /* |x| >= 0x1p-1022, up to 2ulp error in [0.1,0.2554] */
4366         t = __expm1(-2 * x);
4367         t = -t / (t + 2);
4368     } else {
4369         /* |x| is subnormal */
4370         /* note: the branch above would not raise underflow in [0x1p-1023,0x1p-1022) */
4371         fp_barrier((float)x);
4372         t = x;
4373     }
4374     return sign ? -t : t;
4375 }
4376
4377
4378 #if (defined(__GNUC__) || defined(__clang__)) && defined(__i386__)
4379
4380 #define CREATE_FPU_FUNC1(name, call) \
4381     __ASM_GLOBAL_FUNC(name, \
4382             "pushl   %ebp\n\t" \
4383             __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t") \
4384             __ASM_CFI(".cfi_rel_offset %ebp,0\n\t") \
4385             "movl    %esp, %ebp\n\t" \
4386             __ASM_CFI(".cfi_def_cfa_register %ebp\n\t") \
4387             "subl    $68, %esp\n\t" /* sizeof(double)*8 + sizeof(int) */ \
4388             "fstpl   (%esp)\n\t"    /* store function argument */ \
4389             "fwait\n\t" \
4390             "movl    $1, %ecx\n\t"  /* empty FPU stack */ \
4391             "1:\n\t" \
4392             "fxam\n\t" \
4393             "fstsw   %ax\n\t" \
4394             "and     $0x4500, %ax\n\t" \
4395             "cmp     $0x4100, %ax\n\t" \
4396             "je      2f\n\t" \
4397             "fstpl    (%esp,%ecx,8)\n\t" \
4398             "fwait\n\t" \
4399             "incl    %ecx\n\t" \
4400             "jmp     1b\n\t" \
4401             "2:\n\t" \
4402             "movl    %ecx, -4(%ebp)\n\t" \
4403             "call    " __ASM_NAME( #call ) "\n\t" \
4404             "movl    -4(%ebp), %ecx\n\t" \
4405             "fstpl   (%esp)\n\t"    /* save result */ \
4406             "3:\n\t"                /* restore FPU stack */ \
4407             "decl    %ecx\n\t" \
4408             "fldl    (%esp,%ecx,8)\n\t" \
4409             "cmpl    $0, %ecx\n\t" \
4410             "jne     3b\n\t" \
4411             "leave\n\t" \
4412             __ASM_CFI(".cfi_def_cfa %esp,4\n\t") \
4413             __ASM_CFI(".cfi_same_value %ebp\n\t") \
4414             "ret")
4415
4416 #define CREATE_FPU_FUNC2(name, call) \
4417     __ASM_GLOBAL_FUNC(name, \
4418             "pushl   %ebp\n\t" \
4419             __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t") \
4420             __ASM_CFI(".cfi_rel_offset %ebp,0\n\t") \
4421             "movl    %esp, %ebp\n\t" \
4422             __ASM_CFI(".cfi_def_cfa_register %ebp\n\t") \
4423             "subl    $68, %esp\n\t" /* sizeof(double)*8 + sizeof(int) */ \
4424             "fstpl   8(%esp)\n\t"   /* store function argument */ \
4425             "fwait\n\t" \
4426             "fstpl   (%esp)\n\t" \
4427             "fwait\n\t" \
4428             "movl    $2, %ecx\n\t"  /* empty FPU stack */ \
4429             "1:\n\t" \
4430             "fxam\n\t" \
4431             "fstsw   %ax\n\t" \
4432             "and     $0x4500, %ax\n\t" \
4433             "cmp     $0x4100, %ax\n\t" \
4434             "je      2f\n\t" \
4435             "fstpl    (%esp,%ecx,8)\n\t" \
4436             "fwait\n\t" \
4437             "incl    %ecx\n\t" \
4438             "jmp     1b\n\t" \
4439             "2:\n\t" \
4440             "movl    %ecx, -4(%ebp)\n\t" \
4441             "call    " __ASM_NAME( #call ) "\n\t" \
4442             "movl    -4(%ebp), %ecx\n\t" \
4443             "fstpl   8(%esp)\n\t"   /* save result */ \
4444             "3:\n\t"                /* restore FPU stack */ \
4445             "decl    %ecx\n\t" \
4446             "fldl    (%esp,%ecx,8)\n\t" \
4447             "cmpl    $1, %ecx\n\t" \
4448             "jne     3b\n\t" \
4449             "leave\n\t" \
4450             __ASM_CFI(".cfi_def_cfa %esp,4\n\t") \
4451             __ASM_CFI(".cfi_same_value %ebp\n\t") \
4452             "ret")
4453
4454 CREATE_FPU_FUNC1(_CIacos, acos)
4455 CREATE_FPU_FUNC1(_CIasin, asin)
4456 CREATE_FPU_FUNC1(_CIatan, atan)
4457 CREATE_FPU_FUNC2(_CIatan2, atan2)
4458 CREATE_FPU_FUNC1(_CIcos, cos)
4459 CREATE_FPU_FUNC1(_CIcosh, cosh)
4460 CREATE_FPU_FUNC1(_CIexp, exp)
4461 CREATE_FPU_FUNC2(_CIfmod, fmod)
4462 CREATE_FPU_FUNC1(_CIlog, log)
4463 CREATE_FPU_FUNC1(_CIlog10, log10)
4464 CREATE_FPU_FUNC2(_CIpow, pow)
4465 CREATE_FPU_FUNC1(_CIsin, sin)
4466 CREATE_FPU_FUNC1(_CIsinh, sinh)
4467 CREATE_FPU_FUNC1(_CIsqrt, sqrt)
4468 CREATE_FPU_FUNC1(_CItan, tan)
4469 CREATE_FPU_FUNC1(_CItanh, tanh)
4470
4471 __ASM_GLOBAL_FUNC(_ftol,
4472         "pushl   %ebp\n\t"
4473         __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t")
4474         __ASM_CFI(".cfi_rel_offset %ebp,0\n\t")
4475         "movl    %esp, %ebp\n\t"
4476         __ASM_CFI(".cfi_def_cfa_register %ebp\n\t")
4477         "subl    $12, %esp\n\t"     /* sizeof(LONGLONG) + 2*sizeof(WORD) */
4478         "fnstcw  (%esp)\n\t"
4479         "mov     (%esp), %ax\n\t"
4480         "or      $0xc00, %ax\n\t"
4481         "mov     %ax, 2(%esp)\n\t"
4482         "fldcw   2(%esp)\n\t"
4483         "fistpq  4(%esp)\n\t"
4484         "fldcw   (%esp)\n\t"
4485         "movl    4(%esp), %eax\n\t"
4486         "movl    8(%esp), %edx\n\t"
4487         "leave\n\t"
4488         __ASM_CFI(".cfi_def_cfa %esp,4\n\t")
4489         __ASM_CFI(".cfi_same_value %ebp\n\t")
4490         "ret")
4491
4492 #endif /* (defined(__GNUC__) || defined(__clang__)) && defined(__i386__) */
4493
4494 /*********************************************************************
4495  *              _fpclass (MSVCRT.@)
4496  */
4497 int CDECL _fpclass(double num)
4498 {
4499     union { double f; UINT64 i; } u = { num };
4500     int e = u.i >> 52 & 0x7ff;
4501     int s = u.i >> 63;
4502
4503     switch (e)
4504     {
4505     case 0:
4506         if (u.i << 1) return s ? _FPCLASS_ND : _FPCLASS_PD;
4507         return s ? _FPCLASS_NZ : _FPCLASS_PZ;
4508     case 0x7ff:
4509         if (u.i << 12) return ((u.i >> 51) & 1) ? _FPCLASS_QNAN : _FPCLASS_SNAN;
4510         return s ? _FPCLASS_NINF : _FPCLASS_PINF;
4511     default:
4512         return s ? _FPCLASS_NN : _FPCLASS_PN;
4513     }
4514 }
4515
4516 /*********************************************************************
4517  *              _rotl (MSVCRT.@)
4518  */
4519 unsigned int CDECL MSVCRT__rotl(unsigned int num, int shift)
4520 {
4521   shift &= 31;
4522   return (num << shift) | (num >> (32-shift));
4523 }
4524
4525 /*********************************************************************
4526  *              _lrotl (MSVCRT.@)
4527  */
4528 __msvcrt_ulong CDECL MSVCRT__lrotl(__msvcrt_ulong num, int shift)
4529 {
4530   shift &= 0x1f;
4531   return (num << shift) | (num >> (32-shift));
4532 }
4533
4534 /*********************************************************************
4535  *              _lrotr (MSVCRT.@)
4536  */
4537 __msvcrt_ulong CDECL MSVCRT__lrotr(__msvcrt_ulong num, int shift)
4538 {
4539   shift &= 0x1f;
4540   return (num >> shift) | (num << (32-shift));
4541 }
4542
4543 /*********************************************************************
4544  *              _rotr (MSVCRT.@)
4545  */
4546 unsigned int CDECL MSVCRT__rotr(unsigned int num, int shift)
4547 {
4548     shift &= 0x1f;
4549     return (num >> shift) | (num << (32-shift));
4550 }
4551
4552 /*********************************************************************
4553  *              _rotl64 (MSVCRT.@)
4554  */
4555 unsigned __int64 CDECL MSVCRT__rotl64(unsigned __int64 num, int shift)
4556 {
4557   shift &= 63;
4558   return (num << shift) | (num >> (64-shift));
4559 }
4560
4561 /*********************************************************************
4562  *              _rotr64 (MSVCRT.@)
4563  */
4564 unsigned __int64 CDECL MSVCRT__rotr64(unsigned __int64 num, int shift)
4565 {
4566     shift &= 63;
4567     return (num >> shift) | (num << (64-shift));
4568 }
4569
4570 /*********************************************************************
4571  *              abs (MSVCRT.@)
4572  */
4573 int CDECL abs( int n )
4574 {
4575     return n >= 0 ? n : -n;
4576 }
4577
4578 /*********************************************************************
4579  *              labs (MSVCRT.@)
4580  */
4581 __msvcrt_long CDECL labs( __msvcrt_long n )
4582 {
4583     return n >= 0 ? n : -n;
4584 }
4585
4586 #if _MSVCR_VER>=100
4587 /*********************************************************************
4588  *              llabs (MSVCR100.@)
4589  */
4590 __int64 CDECL llabs( __int64 n )
4591 {
4592     return n >= 0 ? n : -n;
4593 }
4594 #endif
4595
4596 #if _MSVCR_VER>=120
4597 /*********************************************************************
4598  *              imaxabs (MSVCR120.@)
4599  */
4600 intmax_t CDECL imaxabs( intmax_t n )
4601 {
4602     return n >= 0 ? n : -n;
4603 }
4604 #endif
4605
4606 /*********************************************************************
4607  *              _abs64 (MSVCRT.@)
4608  */
4609 __int64 CDECL _abs64( __int64 n )
4610 {
4611     return n >= 0 ? n : -n;
4612 }
4613
4614 /* Copied from musl: src/math/ilogb.c */
4615 static int __ilogb(double x)
4616 {
4617     union { double f; UINT64 i; } u = { x };
4618     int e = u.i >> 52 & 0x7ff;
4619
4620     if (!e)
4621     {
4622         u.i <<= 12;
4623         if (u.i == 0) return FP_ILOGB0;
4624         /* subnormal x */
4625         for (e = -0x3ff; u.i >> 63 == 0; e--, u.i <<= 1);
4626         return e;
4627     }
4628     if (e == 0x7ff) return u.i << 12 ? FP_ILOGBNAN : INT_MAX;
4629     return e - 0x3ff;
4630 }
4631
4632 /*********************************************************************
4633  *              _logb (MSVCRT.@)
4634  *
4635  * Copied from musl: src/math/logb.c
4636  */
4637 double CDECL _logb(double x)
4638 {
4639     if (!isfinite(x))
4640         return x * x;
4641     if (x == 0)
4642         return math_error(_SING, "_logb", x, 0, -1 / (x * x));
4643     return __ilogb(x);
4644 }
4645
4646 static void sq(double *hi, double *lo, double x)
4647 {
4648     double xh, xl, xc;
4649
4650     xc = x * (0x1p27 + 1);
4651     xh = x - xc + xc;
4652     xl = x - xh;
4653     *hi = x * x;
4654     *lo = xh * xh - *hi + 2 * xh * xl + xl * xl;
4655 }
4656
4657 /*********************************************************************
4658  *              _hypot (MSVCRT.@)
4659  *
4660  * Copied from musl: src/math/hypot.c
4661  */
4662 double CDECL _hypot(double x, double y)
4663 {
4664     UINT64 ux = *(UINT64*)&x, uy = *(UINT64*)&y, ut;
4665     double hx, lx, hy, ly, z;
4666     int ex, ey;
4667
4668     /* arrange |x| >= |y| */
4669     ux &= -1ULL >> 1;
4670     uy &= -1ULL >> 1;
4671     if (ux < uy) {
4672         ut = ux;
4673         ux = uy;
4674         uy = ut;
4675     }
4676
4677     /* special cases */
4678     ex = ux >> 52;
4679     ey = uy >> 52;
4680     x = *(double*)&ux;
4681     y = *(double*)&uy;
4682     /* note: hypot(inf,nan) == inf */
4683     if (ey == 0x7ff)
4684         return y;
4685     if (ex == 0x7ff || uy == 0)
4686         return x;
4687     /* note: hypot(x,y) ~= x + y*y/x/2 with inexact for small y/x */
4688     /* 64 difference is enough for ld80 double_t */
4689     if (ex - ey > 64)
4690         return x + y;
4691
4692     /* precise sqrt argument in nearest rounding mode without overflow */
4693     /* xh*xh must not overflow and xl*xl must not underflow in sq */
4694     z = 1;
4695     if (ex > 0x3ff + 510) {
4696         z = 0x1p700;
4697         x *= 0x1p-700;
4698         y *= 0x1p-700;
4699     } else if (ey < 0x3ff - 450) {
4700         z = 0x1p-700;
4701         x *= 0x1p700;
4702         y *= 0x1p700;
4703     }
4704     sq(&hx, &lx, x);
4705     sq(&hy, &ly, y);
4706     return z * sqrt(ly + lx + hy + hx);
4707 }
4708
4709 /*********************************************************************
4710  *      _hypotf (MSVCRT.@)
4711  *
4712  * Copied from musl: src/math/hypotf.c
4713  */
4714 float CDECL _hypotf(float x, float y)
4715 {
4716     UINT32 ux = *(UINT32*)&x, uy = *(UINT32*)&y, ut;
4717     float z;
4718
4719     ux &= -1U >> 1;
4720     uy &= -1U >> 1;
4721     if (ux < uy) {
4722         ut = ux;
4723         ux = uy;
4724         uy = ut;
4725     }
4726
4727     x = *(float*)&ux;
4728     y = *(float*)&uy;
4729     if (uy == 0xff << 23)
4730         return y;
4731     if (ux >= 0xff << 23 || uy == 0 || ux - uy >= 25 << 23)
4732         return x + y;
4733
4734     z = 1;
4735     if (ux >= (0x7f + 60) << 23) {
4736         z = 0x1p90f;
4737         x *= 0x1p-90f;
4738         y *= 0x1p-90f;
4739     } else if (uy < (0x7f - 60) << 23) {
4740         z = 0x1p-90f;
4741         x *= 0x1p90f;
4742         y *= 0x1p90f;
4743     }
4744     return z * sqrtf((double)x * x + (double)y * y);
4745 }
4746
4747 /*********************************************************************
4748  *              ceil (MSVCRT.@)
4749  *
4750  * Based on musl: src/math/ceilf.c
4751  */
4752 double CDECL ceil( double x )
4753 {
4754     union {double f; UINT64 i;} u = {x};
4755     int e = (u.i >> 52 & 0x7ff) - 0x3ff;
4756     UINT64 m;
4757
4758     if (e >= 52)
4759         return x;
4760     if (e >= 0) {
4761         m = 0x000fffffffffffffULL >> e;
4762         if ((u.i & m) == 0)
4763             return x;
4764         if (u.i >> 63 == 0)
4765             u.i += m;
4766         u.i &= ~m;
4767     } else {
4768         if (u.i >> 63)
4769             return -0.0;
4770         else if (u.i << 1)
4771             return 1.0;
4772     }
4773     return u.f;
4774 }
4775
4776 /*********************************************************************
4777  *              floor (MSVCRT.@)
4778  *
4779  * Based on musl: src/math/floorf.c
4780  */
4781 double CDECL floor( double x )
4782 {
4783     union {double f; UINT64 i;} u = {x};
4784     int e = (int)(u.i >> 52 & 0x7ff) - 0x3ff;
4785     UINT64 m;
4786
4787     if (e >= 52)
4788         return x;
4789     if (e >= 0) {
4790         m = 0x000fffffffffffffULL >> e;
4791         if ((u.i & m) == 0)
4792             return x;
4793         if (u.i >> 63)
4794             u.i += m;
4795         u.i &= ~m;
4796     } else {
4797         if (u.i >> 63 == 0)
4798             return 0;
4799         else if (u.i << 1)
4800             return -1;
4801     }
4802     return u.f;
4803 }
4804
4805 /*********************************************************************
4806  *      fma (MSVCRT.@)
4807  *
4808  * Copied from musl: src/math/fma.c
4809  */
4810 struct fma_num
4811 {
4812     UINT64 m;
4813     int e;
4814     int sign;
4815 };
4816
4817 static struct fma_num normalize(double x)
4818 {
4819     UINT64 ix = *(UINT64*)&x;
4820     int e = ix >> 52;
4821     int sign = e & 0x800;
4822     struct fma_num ret;
4823
4824     e &= 0x7ff;
4825     if (!e) {
4826         x *= 0x1p63;
4827         ix = *(UINT64*)&x;
4828         e = ix >> 52 & 0x7ff;
4829         e = e ? e - 63 : 0x800;
4830     }
4831     ix &= (1ull << 52) - 1;
4832     ix |= 1ull << 52;
4833     ix <<= 1;
4834     e -= 0x3ff + 52 + 1;
4835
4836     ret.m = ix;
4837     ret.e = e;
4838     ret.sign = sign;
4839     return ret;
4840 }
4841
4842 static void mul(UINT64 *hi, UINT64 *lo, UINT64 x, UINT64 y)
4843 {
4844     UINT64 t1, t2, t3;
4845     UINT64 xlo = (UINT32)x, xhi = x >> 32;
4846     UINT64 ylo = (UINT32)y, yhi = y >> 32;
4847
4848     t1 = xlo * ylo;
4849     t2 = xlo * yhi + xhi * ylo;
4850     t3 = xhi * yhi;
4851     *lo = t1 + (t2 << 32);
4852     *hi = t3 + (t2 >> 32) + (t1 > *lo);
4853 }
4854
4855 double CDECL fma( double x, double y, double z )
4856 {
4857     int e, d, sign, samesign, nonzero;
4858     UINT64 rhi, rlo, zhi, zlo;
4859     struct fma_num nx, ny, nz;
4860     double r;
4861     INT64 i;
4862
4863     /* normalize so top 10bits and last bit are 0 */
4864     nx = normalize(x);
4865     ny = normalize(y);
4866     nz = normalize(z);
4867
4868     if (nx.e >= 0x7ff - 0x3ff - 52 - 1 || ny.e >= 0x7ff - 0x3ff - 52 - 1) {
4869         r = x * y + z;
4870         if (!isnan(x) && !isnan(y) && !isnan(z) && isnan(r)) *_errno() = EDOM;
4871         return r;
4872     }
4873     if (nz.e >= 0x7ff - 0x3ff - 52 - 1) {
4874         if (nz.e > 0x7ff - 0x3ff - 52 - 1) {/* z==0 */
4875             r = x * y + z;
4876             if (!isnan(x) && !isnan(y) && isnan(r)) *_errno() = EDOM;
4877             return r;
4878         }
4879         return z;
4880     }
4881
4882     /* mul: r = x*y */
4883     mul(&rhi, &rlo, nx.m, ny.m);
4884     /* either top 20 or 21 bits of rhi and last 2 bits of rlo are 0 */
4885
4886     /* align exponents */
4887     e = nx.e + ny.e;
4888     d = nz.e - e;
4889     /* shift bits z<<=kz, r>>=kr, so kz+kr == d, set e = e+kr (== ez-kz) */
4890     if (d > 0) {
4891         if (d < 64) {
4892             zlo = nz.m << d;
4893             zhi = nz.m >> (64 - d);
4894         } else {
4895             zlo = 0;
4896             zhi = nz.m;
4897             e = nz.e - 64;
4898             d -= 64;
4899             if (d < 64 && d) {
4900                 rlo = rhi << (64 - d) | rlo >> d | !!(rlo << (64 - d));
4901                 rhi = rhi >> d;
4902             } else if (d) {
4903                 rlo = 1;
4904                 rhi = 0;
4905             }
4906         }
4907     } else {
4908         zhi = 0;
4909         d = -d;
4910         if (d == 0) {
4911             zlo = nz.m;
4912         } else if (d < 64) {
4913             zlo = nz.m >> d | !!(nz.m << (64 - d));
4914         } else {
4915             zlo = 1;
4916         }
4917     }
4918
4919     /* add */
4920     sign = nx.sign ^ ny.sign;
4921     samesign = !(sign ^ nz.sign);
4922     nonzero = 1;
4923     if (samesign) {
4924         /* r += z */
4925         rlo += zlo;
4926         rhi += zhi + (rlo < zlo);
4927     } else {
4928         /* r -= z */
4929         UINT64 t = rlo;
4930         rlo -= zlo;
4931         rhi = rhi - zhi - (t < rlo);
4932         if (rhi >> 63) {
4933             rlo = -rlo;
4934             rhi = -rhi - !!rlo;
4935             sign = !sign;
4936         }
4937         nonzero = !!rhi;
4938     }
4939
4940     /* set rhi to top 63bit of the result (last bit is sticky) */
4941     if (nonzero) {
4942         e += 64;
4943         if (rhi >> 32) {
4944             BitScanReverse((DWORD*)&d, rhi >> 32);
4945             d = 31 - d - 1;
4946         } else {
4947             BitScanReverse((DWORD*)&d, rhi);
4948             d = 63 - d - 1;
4949         }
4950         /* note: d > 0 */
4951         rhi = rhi << d | rlo >> (64 - d) | !!(rlo << d);
4952     } else if (rlo) {
4953         if (rlo >> 32) {
4954             BitScanReverse((DWORD*)&d, rlo >> 32);
4955             d = 31 - d - 1;
4956         } else {
4957             BitScanReverse((DWORD*)&d, rlo);
4958             d = 63 - d - 1;
4959         }
4960         if (d < 0)
4961             rhi = rlo >> 1 | (rlo & 1);
4962         else
4963             rhi = rlo << d;
4964     } else {
4965         /* exact +-0 */
4966         return x * y + z;
4967     }
4968     e -= d;
4969
4970     /* convert to double */
4971     i = rhi; /* i is in [1<<62,(1<<63)-1] */
4972     if (sign)
4973         i = -i;
4974     r = i; /* |r| is in [0x1p62,0x1p63] */
4975
4976     if (e < -1022 - 62) {
4977         /* result is subnormal before rounding */
4978         if (e == -1022 - 63) {
4979             double c = 0x1p63;
4980             if (sign)
4981                 c = -c;
4982             if (r == c) {
4983                 /* min normal after rounding, underflow depends
4984                    on arch behaviour which can be imitated by
4985                    a double to float conversion */
4986                 float fltmin = 0x0.ffffff8p-63 * FLT_MIN * r;
4987                 return DBL_MIN / FLT_MIN * fltmin;
4988             }
4989             /* one bit is lost when scaled, add another top bit to
4990                only round once at conversion if it is inexact */
4991             if (rhi << 53) {
4992                 double tiny;
4993
4994                 i = rhi >> 1 | (rhi & 1) | 1ull << 62;
4995                 if (sign)
4996                     i = -i;
4997                 r = i;
4998                 r = 2 * r - c; /* remove top bit */
4999
5000                 /* raise underflow portably, such that it
5001                    cannot be optimized away */
5002                 tiny = DBL_MIN / FLT_MIN * r;
5003                 r += (double)(tiny * tiny) * (r - r);
5004             }
5005         } else {
5006             /* only round once when scaled */
5007             d = 10;
5008             i = (rhi >> d | !!(rhi << (64 - d))) << d;
5009             if (sign)
5010                 i = -i;
5011             r = i;
5012         }
5013     }
5014     return __scalbn(r, e);
5015 }
5016
5017 /*********************************************************************
5018  *      fmaf (MSVCRT.@)
5019  *
5020  * Copied from musl: src/math/fmaf.c
5021  */
5022 float CDECL fmaf( float x, float y, float z )
5023 {
5024     union { double f; UINT64 i; } u;
5025     double xy, adjust;
5026     int e;
5027
5028     xy = (double)x * y;
5029     u.f = xy + z;
5030     e = u.i>>52 & 0x7ff;
5031     /* Common case: The double precision result is fine. */
5032     if ((u.i & 0x1fffffff) != 0x10000000 || /* not a halfway case */
5033             e == 0x7ff || /* NaN */
5034             (u.f - xy == z && u.f - z == xy) || /* exact */
5035             (_controlfp(0, 0) & _MCW_RC) != _RC_NEAR) /* not round-to-nearest */
5036     {
5037         if (!isnan(x) && !isnan(y) && !isnan(z) && isnan(u.f)) *_errno() = EDOM;
5038
5039         /* underflow may not be raised correctly, example:
5040            fmaf(0x1p-120f, 0x1p-120f, 0x1p-149f) */
5041         if (e < 0x3ff-126 && e >= 0x3ff-149 && _statusfp() & _SW_INEXACT)
5042             fp_barrierf((float)u.f * (float)u.f);
5043         return u.f;
5044     }
5045
5046     /*
5047      * If result is inexact, and exactly halfway between two float values,
5048      * we need to adjust the low-order bit in the direction of the error.
5049      */
5050     _controlfp(_RC_CHOP, _MCW_RC);
5051     adjust = fp_barrier(xy + z);
5052     _controlfp(_RC_NEAR, _MCW_RC);
5053     if (u.f == adjust)
5054         u.i++;
5055     return u.f;
5056 }
5057
5058 /*********************************************************************
5059  *              fabs (MSVCRT.@)
5060  *
5061  * Copied from musl: src/math/fabsf.c
5062  */
5063 double CDECL fabs( double x )
5064 {
5065     union { double f; UINT64 i; } u = { x };
5066     u.i &= ~0ull >> 1;
5067     return u.f;
5068 }
5069
5070 /*********************************************************************
5071  *              frexp (MSVCRT.@)
5072  *
5073  * Copied from musl: src/math/frexp.c
5074  */
5075 double CDECL frexp( double x, int *e )
5076 {
5077     UINT64 ux = *(UINT64*)&x;
5078     int ee = ux >> 52 & 0x7ff;
5079
5080     if (!ee) {
5081         if (x) {
5082             x = frexp(x * 0x1p64, e);
5083             *e -= 64;
5084         } else *e = 0;
5085         return x;
5086     } else if (ee == 0x7ff) {
5087         return x;
5088     }
5089
5090     *e = ee - 0x3fe;
5091     ux &= 0x800fffffffffffffull;
5092     ux |= 0x3fe0000000000000ull;
5093     return *(double*)&ux;
5094 }
5095
5096 /*********************************************************************
5097  *              modf (MSVCRT.@)
5098  *
5099  * Copied from musl: src/math/modf.c
5100  */
5101 double CDECL modf( double x, double *iptr )
5102 {
5103     union {double f; UINT64 i;} u = {x};
5104     UINT64 mask;
5105     int e = (u.i >> 52 & 0x7ff) - 0x3ff;
5106
5107     /* no fractional part */
5108     if (e >= 52) {
5109         *iptr = x;
5110         if (e == 0x400 && u.i << 12 != 0) /* nan */
5111             return x;
5112         u.i &= 1ULL << 63;
5113         return u.f;
5114     }
5115
5116     /* no integral part*/
5117     if (e < 0) {
5118         u.i &= 1ULL << 63;
5119         *iptr = u.f;
5120         return x;
5121     }
5122
5123     mask = -1ULL >> 12 >> e;
5124     if ((u.i & mask) == 0) {
5125         *iptr = x;
5126         u.i &= 1ULL << 63;
5127         return u.f;
5128     }
5129     u.i &= ~mask;
5130     *iptr = u.f;
5131     return x - u.f;
5132 }
5133
5134 /**********************************************************************
5135  *              _statusfp2 (MSVCRT.@)
5136  *
5137  * Not exported by native msvcrt, added in msvcr80.
5138  */
5139 #if defined(__i386__) || defined(__x86_64__)
5140 void CDECL _statusfp2( unsigned int *x86_sw, unsigned int *sse2_sw )
5141 {
5142 #if defined(__GNUC__) || defined(__clang__)
5143     unsigned int flags;
5144     unsigned long fpword;
5145
5146     if (x86_sw)
5147     {
5148         __asm__ __volatile__( "fstsw %0" : "=m" (fpword) );
5149         flags = 0;
5150         if (fpword & 0x1)  flags |= _SW_INVALID;
5151         if (fpword & 0x2)  flags |= _SW_DENORMAL;
5152         if (fpword & 0x4)  flags |= _SW_ZERODIVIDE;
5153         if (fpword & 0x8)  flags |= _SW_OVERFLOW;
5154         if (fpword & 0x10) flags |= _SW_UNDERFLOW;
5155         if (fpword & 0x20) flags |= _SW_INEXACT;
5156         *x86_sw = flags;
5157     }
5158
5159     if (!sse2_sw) return;
5160
5161     if (sse2_supported)
5162     {
5163         __asm__ __volatile__( "stmxcsr %0" : "=m" (fpword) );
5164         flags = 0;
5165         if (fpword & 0x1)  flags |= _SW_INVALID;
5166         if (fpword & 0x2)  flags |= _SW_DENORMAL;
5167         if (fpword & 0x4)  flags |= _SW_ZERODIVIDE;
5168         if (fpword & 0x8)  flags |= _SW_OVERFLOW;
5169         if (fpword & 0x10) flags |= _SW_UNDERFLOW;
5170         if (fpword & 0x20) flags |= _SW_INEXACT;
5171         *sse2_sw = flags;
5172     }
5173     else *sse2_sw = 0;
5174 #else
5175     FIXME( "not implemented\n" );
5176 #endif
5177 }
5178 #endif
5179
5180 /**********************************************************************
5181  *              _statusfp (MSVCRT.@)
5182  */
5183 unsigned int CDECL _statusfp(void)
5184 {
5185     unsigned int flags = 0;
5186 #if defined(__i386__) || defined(__x86_64__)
5187     unsigned int x86_sw, sse2_sw;
5188
5189     _statusfp2( &x86_sw, &sse2_sw );
5190     /* FIXME: there's no definition for ambiguous status, just return all status bits for now */
5191     flags = x86_sw | sse2_sw;
5192 #elif defined(__aarch64__)
5193     ULONG_PTR fpsr;
5194
5195     __asm__ __volatile__( "mrs %0, fpsr" : "=r" (fpsr) );
5196     if (fpsr & 0x1)  flags |= _SW_INVALID;
5197     if (fpsr & 0x2)  flags |= _SW_ZERODIVIDE;
5198     if (fpsr & 0x4)  flags |= _SW_OVERFLOW;
5199     if (fpsr & 0x8)  flags |= _SW_UNDERFLOW;
5200     if (fpsr & 0x10) flags |= _SW_INEXACT;
5201     if (fpsr & 0x80) flags |= _SW_DENORMAL;
5202 #else
5203     FIXME( "not implemented\n" );
5204 #endif
5205     return flags;
5206 }
5207
5208 /*********************************************************************
5209  *              _clearfp (MSVCRT.@)
5210  */
5211 unsigned int CDECL _clearfp(void)
5212 {
5213     unsigned int flags = 0;
5214 #if (defined(__GNUC__) || defined(__clang__)) && (defined(__i386__) || defined(__x86_64__))
5215     unsigned long fpword;
5216
5217     __asm__ __volatile__( "fnstsw %0; fnclex" : "=m" (fpword) );
5218     if (fpword & 0x1)  flags |= _SW_INVALID;
5219     if (fpword & 0x2)  flags |= _SW_DENORMAL;
5220     if (fpword & 0x4)  flags |= _SW_ZERODIVIDE;
5221     if (fpword & 0x8)  flags |= _SW_OVERFLOW;
5222     if (fpword & 0x10) flags |= _SW_UNDERFLOW;
5223     if (fpword & 0x20) flags |= _SW_INEXACT;
5224
5225     if (sse2_supported)
5226     {
5227         __asm__ __volatile__( "stmxcsr %0" : "=m" (fpword) );
5228         if (fpword & 0x1)  flags |= _SW_INVALID;
5229         if (fpword & 0x2)  flags |= _SW_DENORMAL;
5230         if (fpword & 0x4)  flags |= _SW_ZERODIVIDE;
5231         if (fpword & 0x8)  flags |= _SW_OVERFLOW;
5232         if (fpword & 0x10) flags |= _SW_UNDERFLOW;
5233         if (fpword & 0x20) flags |= _SW_INEXACT;
5234         fpword &= ~0x3f;
5235         __asm__ __volatile__( "ldmxcsr %0" : : "m" (fpword) );
5236     }
5237 #elif defined(__aarch64__)
5238     ULONG_PTR fpsr;
5239
5240     __asm__ __volatile__( "mrs %0, fpsr" : "=r" (fpsr) );
5241     if (fpsr & 0x1)  flags |= _SW_INVALID;
5242     if (fpsr & 0x2)  flags |= _SW_ZERODIVIDE;
5243     if (fpsr & 0x4)  flags |= _SW_OVERFLOW;
5244     if (fpsr & 0x8)  flags |= _SW_UNDERFLOW;
5245     if (fpsr & 0x10) flags |= _SW_INEXACT;
5246     if (fpsr & 0x80) flags |= _SW_DENORMAL;
5247     fpsr &= ~0x9f;
5248     __asm__ __volatile__( "msr fpsr, %0" :: "r" (fpsr) );
5249 #else
5250     FIXME( "not implemented\n" );
5251 #endif
5252     return flags;
5253 }
5254
5255 /*********************************************************************
5256  *              __fpecode (MSVCRT.@)
5257  */
5258 int * CDECL __fpecode(void)
5259 {
5260     return &msvcrt_get_thread_data()->fpecode;
5261 }
5262
5263 /*********************************************************************
5264  *              ldexp (MSVCRT.@)
5265  */
5266 double CDECL ldexp(double num, int exp)
5267 {
5268   double z = __scalbn(num, exp);
5269
5270   if (isfinite(num) && !isfinite(z))
5271     return math_error(_OVERFLOW, "ldexp", num, exp, z);
5272   if (num && isfinite(num) && !z)
5273     return math_error(_UNDERFLOW, "ldexp", num, exp, z);
5274   return z;
5275 }
5276
5277 /*********************************************************************
5278  *              _cabs (MSVCRT.@)
5279  */
5280 double CDECL _cabs(struct _complex num)
5281 {
5282   return sqrt(num.x * num.x + num.y * num.y);
5283 }
5284
5285 /*********************************************************************
5286  *              _chgsign (MSVCRT.@)
5287  */
5288 double CDECL _chgsign(double num)
5289 {
5290     union { double f; UINT64 i; } u = { num };
5291     u.i ^= 1ull << 63;
5292     return u.f;
5293 }
5294
5295 /*********************************************************************
5296  *              __control87_2 (MSVCR80.@)
5297  *
5298  * Not exported by native msvcrt, added in msvcr80.
5299  */
5300 #ifdef __i386__
5301 int CDECL __control87_2( unsigned int newval, unsigned int mask,
5302                          unsigned int *x86_cw, unsigned int *sse2_cw )
5303 {
5304 #if defined(__GNUC__) || defined(__clang__)
5305     unsigned long fpword;
5306     unsigned int flags;
5307     unsigned int old_flags;
5308
5309     if (x86_cw)
5310     {
5311         __asm__ __volatile__( "fstcw %0" : "=m" (fpword) );
5312
5313         /* Convert into mask constants */
5314         flags = 0;
5315         if (fpword & 0x1)  flags |= _EM_INVALID;
5316         if (fpword & 0x2)  flags |= _EM_DENORMAL;
5317         if (fpword & 0x4)  flags |= _EM_ZERODIVIDE;
5318         if (fpword & 0x8)  flags |= _EM_OVERFLOW;
5319         if (fpword & 0x10) flags |= _EM_UNDERFLOW;
5320         if (fpword & 0x20) flags |= _EM_INEXACT;
5321         switch (fpword & 0xc00)
5322         {
5323         case 0xc00: flags |= _RC_UP|_RC_DOWN; break;
5324         case 0x800: flags |= _RC_UP; break;
5325         case 0x400: flags |= _RC_DOWN; break;
5326         }
5327         switch (fpword & 0x300)
5328         {
5329         case 0x0:   flags |= _PC_24; break;
5330         case 0x200: flags |= _PC_53; break;
5331         case 0x300: flags |= _PC_64; break;
5332         }
5333         if (fpword & 0x1000) flags |= _IC_AFFINE;
5334
5335         TRACE( "x86 flags=%08x newval=%08x mask=%08x\n", flags, newval, mask );
5336         if (mask)
5337         {
5338             flags = (flags & ~mask) | (newval & mask);
5339
5340             /* Convert (masked) value back to fp word */
5341             fpword = 0;
5342             if (flags & _EM_INVALID)    fpword |= 0x1;
5343             if (flags & _EM_DENORMAL)   fpword |= 0x2;
5344             if (flags & _EM_ZERODIVIDE) fpword |= 0x4;
5345             if (flags & _EM_OVERFLOW)   fpword |= 0x8;
5346             if (flags & _EM_UNDERFLOW)  fpword |= 0x10;
5347             if (flags & _EM_INEXACT)    fpword |= 0x20;
5348             switch (flags & _MCW_RC)
5349             {
5350             case _RC_UP|_RC_DOWN:   fpword |= 0xc00; break;
5351             case _RC_UP:            fpword |= 0x800; break;
5352             case _RC_DOWN:          fpword |= 0x400; break;
5353             }
5354             switch (flags & _MCW_PC)
5355             {
5356             case _PC_64: fpword |= 0x300; break;
5357             case _PC_53: fpword |= 0x200; break;
5358             case _PC_24: fpword |= 0x0; break;
5359             }
5360             if (flags & _IC_AFFINE) fpword |= 0x1000;
5361
5362             __asm__ __volatile__( "fldcw %0" : : "m" (fpword) );
5363         }
5364         *x86_cw = flags;
5365     }
5366
5367     if (!sse2_cw) return 1;
5368
5369     if (sse2_supported)
5370     {
5371         __asm__ __volatile__( "stmxcsr %0" : "=m" (fpword) );
5372
5373         /* Convert into mask constants */
5374         flags = 0;
5375         if (fpword & 0x80)   flags |= _EM_INVALID;
5376         if (fpword & 0x100)  flags |= _EM_DENORMAL;
5377         if (fpword & 0x200)  flags |= _EM_ZERODIVIDE;
5378         if (fpword & 0x400)  flags |= _EM_OVERFLOW;
5379         if (fpword & 0x800)  flags |= _EM_UNDERFLOW;
5380         if (fpword & 0x1000) flags |= _EM_INEXACT;
5381         switch (fpword & 0x6000)
5382         {
5383         case 0x6000: flags |= _RC_UP|_RC_DOWN; break;
5384         case 0x4000: flags |= _RC_UP; break;
5385         case 0x2000: flags |= _RC_DOWN; break;
5386         }
5387         switch (fpword & 0x8040)
5388         {
5389         case 0x0040: flags |= _DN_FLUSH_OPERANDS_SAVE_RESULTS; break;
5390         case 0x8000: flags |= _DN_SAVE_OPERANDS_FLUSH_RESULTS; break;
5391         case 0x8040: flags |= _DN_FLUSH; break;
5392         }
5393
5394         TRACE( "sse2 flags=%08x newval=%08x mask=%08x\n", flags, newval, mask );
5395         if (mask)
5396         {
5397             old_flags = flags;
5398             mask &= _MCW_EM | _MCW_RC | _MCW_DN;
5399             flags = (flags & ~mask) | (newval & mask);
5400
5401             if (flags != old_flags)
5402             {
5403                 /* Convert (masked) value back to fp word */
5404                 fpword = 0;
5405                 if (flags & _EM_INVALID)    fpword |= 0x80;
5406                 if (flags & _EM_DENORMAL)   fpword |= 0x100;
5407                 if (flags & _EM_ZERODIVIDE) fpword |= 0x200;
5408                 if (flags & _EM_OVERFLOW)   fpword |= 0x400;
5409                 if (flags & _EM_UNDERFLOW)  fpword |= 0x800;
5410                 if (flags & _EM_INEXACT)    fpword |= 0x1000;
5411                 switch (flags & _MCW_RC)
5412                 {
5413                 case _RC_UP|_RC_DOWN:   fpword |= 0x6000; break;
5414                 case _RC_UP:            fpword |= 0x4000; break;
5415                 case _RC_DOWN:          fpword |= 0x2000; break;
5416                 }
5417                 switch (flags & _MCW_DN)
5418                 {
5419                 case _DN_FLUSH_OPERANDS_SAVE_RESULTS: fpword |= 0x0040; break;
5420                 case _DN_SAVE_OPERANDS_FLUSH_RESULTS: fpword |= 0x8000; break;
5421                 case _DN_FLUSH:                       fpword |= 0x8040; break;
5422                 }
5423                 __asm__ __volatile__( "ldmxcsr %0" : : "m" (fpword) );
5424             }
5425         }
5426         *sse2_cw = flags;
5427     }
5428     else *sse2_cw = 0;
5429
5430     return 1;
5431 #else
5432     FIXME( "not implemented\n" );
5433     return 0;
5434 #endif
5435 }
5436 #endif
5437
5438 /*********************************************************************
5439  *              _control87 (MSVCRT.@)
5440  */
5441 unsigned int CDECL _control87(unsigned int newval, unsigned int mask)
5442 {
5443     unsigned int flags = 0;
5444 #ifdef __i386__
5445     unsigned int sse2_cw;
5446
5447     __control87_2( newval, mask, &flags, &sse2_cw );
5448
5449     if ((flags ^ sse2_cw) & (_MCW_EM | _MCW_RC)) flags |= _EM_AMBIGUOUS;
5450     flags |= sse2_cw;
5451 #elif defined(__x86_64__)
5452     unsigned long fpword;
5453     unsigned int old_flags;
5454
5455     __asm__ __volatile__( "stmxcsr %0" : "=m" (fpword) );
5456     if (fpword & 0x80)   flags |= _EM_INVALID;
5457     if (fpword & 0x100)  flags |= _EM_DENORMAL;
5458     if (fpword & 0x200)  flags |= _EM_ZERODIVIDE;
5459     if (fpword & 0x400)  flags |= _EM_OVERFLOW;
5460     if (fpword & 0x800)  flags |= _EM_UNDERFLOW;
5461     if (fpword & 0x1000) flags |= _EM_INEXACT;
5462     switch (fpword & 0x6000)
5463     {
5464     case 0x6000: flags |= _RC_CHOP; break;
5465     case 0x4000: flags |= _RC_UP; break;
5466     case 0x2000: flags |= _RC_DOWN; break;
5467     }
5468     switch (fpword & 0x8040)
5469     {
5470     case 0x0040: flags |= _DN_FLUSH_OPERANDS_SAVE_RESULTS; break;
5471     case 0x8000: flags |= _DN_SAVE_OPERANDS_FLUSH_RESULTS; break;
5472     case 0x8040: flags |= _DN_FLUSH; break;
5473     }
5474     old_flags = flags;
5475     mask &= _MCW_EM | _MCW_RC | _MCW_DN;
5476     flags = (flags & ~mask) | (newval & mask);
5477     if (flags != old_flags)
5478     {
5479         fpword = 0;
5480         if (flags & _EM_INVALID)    fpword |= 0x80;
5481         if (flags & _EM_DENORMAL)   fpword |= 0x100;
5482         if (flags & _EM_ZERODIVIDE) fpword |= 0x200;
5483         if (flags & _EM_OVERFLOW)   fpword |= 0x400;
5484         if (flags & _EM_UNDERFLOW)  fpword |= 0x800;
5485         if (flags & _EM_INEXACT)    fpword |= 0x1000;
5486         switch (flags & _MCW_RC)
5487         {
5488         case _RC_CHOP: fpword |= 0x6000; break;
5489         case _RC_UP:   fpword |= 0x4000; break;
5490         case _RC_DOWN: fpword |= 0x2000; break;
5491         }
5492         switch (flags & _MCW_DN)
5493         {
5494         case _DN_FLUSH_OPERANDS_SAVE_RESULTS: fpword |= 0x0040; break;
5495         case _DN_SAVE_OPERANDS_FLUSH_RESULTS: fpword |= 0x8000; break;
5496         case _DN_FLUSH:                       fpword |= 0x8040; break;
5497         }
5498         __asm__ __volatile__( "ldmxcsr %0" :: "m" (fpword) );
5499     }
5500 #elif defined(__aarch64__)
5501     ULONG_PTR fpcr;
5502
5503     __asm__ __volatile__( "mrs %0, fpcr" : "=r" (fpcr) );
5504     if (!(fpcr & 0x100))  flags |= _EM_INVALID;
5505     if (!(fpcr & 0x200))  flags |= _EM_ZERODIVIDE;
5506     if (!(fpcr & 0x400))  flags |= _EM_OVERFLOW;
5507     if (!(fpcr & 0x800))  flags |= _EM_UNDERFLOW;
5508     if (!(fpcr & 0x1000)) flags |= _EM_INEXACT;
5509     if (!(fpcr & 0x8000)) flags |= _EM_DENORMAL;
5510     switch (fpcr & 0xc00000)
5511     {
5512     case 0x400000: flags |= _RC_UP; break;
5513     case 0x800000: flags |= _RC_DOWN; break;
5514     case 0xc00000: flags |= _RC_CHOP; break;
5515     }
5516     flags = (flags & ~mask) | (newval & mask);
5517     fpcr &= ~0xc09f00ul;
5518     if (!(flags & _EM_INVALID)) fpcr |= 0x100;
5519     if (!(flags & _EM_ZERODIVIDE)) fpcr |= 0x200;
5520     if (!(flags & _EM_OVERFLOW)) fpcr |= 0x400;
5521     if (!(flags & _EM_UNDERFLOW)) fpcr |= 0x800;
5522     if (!(flags & _EM_INEXACT)) fpcr |= 0x1000;
5523     if (!(flags & _EM_DENORMAL)) fpcr |= 0x8000;
5524     switch (flags & _MCW_RC)
5525     {
5526     case _RC_CHOP: fpcr |= 0xc00000; break;
5527     case _RC_UP:   fpcr |= 0x400000; break;
5528     case _RC_DOWN: fpcr |= 0x800000; break;
5529     }
5530     __asm__ __volatile__( "msr fpcr, %0" :: "r" (fpcr) );
5531 #else
5532     FIXME( "not implemented\n" );
5533 #endif
5534     return flags;
5535 }
5536
5537 /*********************************************************************
5538  *              _controlfp (MSVCRT.@)
5539  */
5540 unsigned int CDECL _controlfp(unsigned int newval, unsigned int mask)
5541 {
5542   return _control87( newval, mask & ~_EM_DENORMAL );
5543 }
5544
5545 /*********************************************************************
5546  *              _set_controlfp (MSVCRT.@)
5547  */
5548 void CDECL _set_controlfp( unsigned int newval, unsigned int mask )
5549 {
5550     _controlfp( newval, mask );
5551 }
5552
5553 /*********************************************************************
5554  *              _controlfp_s (MSVCRT.@)
5555  */
5556 int CDECL _controlfp_s(unsigned int *cur, unsigned int newval, unsigned int mask)
5557 {
5558     static const unsigned int all_flags = (_MCW_EM | _MCW_IC | _MCW_RC |
5559                                            _MCW_PC | _MCW_DN);
5560     unsigned int val;
5561
5562     if (!MSVCRT_CHECK_PMT( !(newval & mask & ~all_flags) ))
5563     {
5564         if (cur) *cur = _controlfp( 0, 0 );  /* retrieve it anyway */
5565         return EINVAL;
5566     }
5567     val = _controlfp( newval, mask );
5568     if (cur) *cur = val;
5569     return 0;
5570 }
5571
5572 #if _MSVCR_VER >= 140
5573 enum fenv_masks
5574 {
5575     FENV_X_INVALID = 0x00100010,
5576     FENV_X_DENORMAL = 0x00200020,
5577     FENV_X_ZERODIVIDE = 0x00080008,
5578     FENV_X_OVERFLOW = 0x00040004,
5579     FENV_X_UNDERFLOW = 0x00020002,
5580     FENV_X_INEXACT = 0x00010001,
5581     FENV_X_AFFINE = 0x00004000,
5582     FENV_X_UP = 0x00800200,
5583     FENV_X_DOWN = 0x00400100,
5584     FENV_X_24 = 0x00002000,
5585     FENV_X_53 = 0x00001000,
5586     FENV_Y_INVALID = 0x10000010,
5587     FENV_Y_DENORMAL = 0x20000020,
5588     FENV_Y_ZERODIVIDE = 0x08000008,
5589     FENV_Y_OVERFLOW = 0x04000004,
5590     FENV_Y_UNDERFLOW = 0x02000002,
5591     FENV_Y_INEXACT = 0x01000001,
5592     FENV_Y_UP = 0x80000200,
5593     FENV_Y_DOWN = 0x40000100,
5594     FENV_Y_FLUSH = 0x00000400,
5595     FENV_Y_FLUSH_SAVE = 0x00000800
5596 };
5597
5598 /* encodes x87/sse control/status word in ulong */
5599 static __msvcrt_ulong fenv_encode(unsigned int x, unsigned int y)
5600 {
5601     __msvcrt_ulong ret = 0;
5602
5603 #ifdef __i386__
5604     if (x & _EM_INVALID) ret |= FENV_X_INVALID;
5605     if (x & _EM_DENORMAL) ret |= FENV_X_DENORMAL;
5606     if (x & _EM_ZERODIVIDE) ret |= FENV_X_ZERODIVIDE;
5607     if (x & _EM_OVERFLOW) ret |= FENV_X_OVERFLOW;
5608     if (x & _EM_UNDERFLOW) ret |= FENV_X_UNDERFLOW;
5609     if (x & _EM_INEXACT) ret |= FENV_X_INEXACT;
5610     if (x & _IC_AFFINE) ret |= FENV_X_AFFINE;
5611     if (x & _RC_UP) ret |= FENV_X_UP;
5612     if (x & _RC_DOWN) ret |= FENV_X_DOWN;
5613     if (x & _PC_24) ret |= FENV_X_24;
5614     if (x & _PC_53) ret |= FENV_X_53;
5615 #endif
5616     x &= ~(_MCW_EM | _MCW_IC | _MCW_RC | _MCW_PC);
5617
5618     if (y & _EM_INVALID) ret |= FENV_Y_INVALID;
5619     if (y & _EM_DENORMAL) ret |= FENV_Y_DENORMAL;
5620     if (y & _EM_ZERODIVIDE) ret |= FENV_Y_ZERODIVIDE;
5621     if (y & _EM_OVERFLOW) ret |= FENV_Y_OVERFLOW;
5622     if (y & _EM_UNDERFLOW) ret |= FENV_Y_UNDERFLOW;
5623     if (y & _EM_INEXACT) ret |= FENV_Y_INEXACT;
5624     if (y & _RC_UP) ret |= FENV_Y_UP;
5625     if (y & _RC_DOWN) ret |= FENV_Y_DOWN;
5626     if (y & _DN_FLUSH) ret |= FENV_Y_FLUSH;
5627     if (y & _DN_FLUSH_OPERANDS_SAVE_RESULTS) ret |= FENV_Y_FLUSH_SAVE;
5628     y &= ~(_MCW_EM | _MCW_IC | _MCW_RC | _MCW_DN);
5629
5630     if(x || y) FIXME("unsupported flags: %x, %x\n", x, y);
5631     return ret;
5632 }
5633
5634 /* decodes x87/sse control/status word, returns FALSE on error */
5635 #if (defined(__i386__) || defined(__x86_64__))
5636 static BOOL fenv_decode(__msvcrt_ulong enc, unsigned int *x, unsigned int *y)
5637 {
5638     *x = *y = 0;
5639     if ((enc & FENV_X_INVALID) == FENV_X_INVALID) *x |= _EM_INVALID;
5640     if ((enc & FENV_X_DENORMAL) == FENV_X_DENORMAL) *x |= _EM_DENORMAL;
5641     if ((enc & FENV_X_ZERODIVIDE) == FENV_X_ZERODIVIDE) *x |= _EM_ZERODIVIDE;
5642     if ((enc & FENV_X_OVERFLOW) == FENV_X_OVERFLOW) *x |= _EM_OVERFLOW;
5643     if ((enc & FENV_X_UNDERFLOW) == FENV_X_UNDERFLOW) *x |= _EM_UNDERFLOW;
5644     if ((enc & FENV_X_INEXACT) == FENV_X_INEXACT) *x |= _EM_INEXACT;
5645     if ((enc & FENV_X_AFFINE) == FENV_X_AFFINE) *x |= _IC_AFFINE;
5646     if ((enc & FENV_X_UP) == FENV_X_UP) *x |= _RC_UP;
5647     if ((enc & FENV_X_DOWN) == FENV_X_DOWN) *x |= _RC_DOWN;
5648     if ((enc & FENV_X_24) == FENV_X_24) *x |= _PC_24;
5649     if ((enc & FENV_X_53) == FENV_X_53) *x |= _PC_53;
5650
5651     if ((enc & FENV_Y_INVALID) == FENV_Y_INVALID) *y |= _EM_INVALID;
5652     if ((enc & FENV_Y_DENORMAL) == FENV_Y_DENORMAL) *y |= _EM_DENORMAL;
5653     if ((enc & FENV_Y_ZERODIVIDE) == FENV_Y_ZERODIVIDE) *y |= _EM_ZERODIVIDE;
5654     if ((enc & FENV_Y_OVERFLOW) == FENV_Y_OVERFLOW) *y |= _EM_OVERFLOW;
5655     if ((enc & FENV_Y_UNDERFLOW) == FENV_Y_UNDERFLOW) *y |= _EM_UNDERFLOW;
5656     if ((enc & FENV_Y_INEXACT) == FENV_Y_INEXACT) *y |= _EM_INEXACT;
5657     if ((enc & FENV_Y_UP) == FENV_Y_UP) *y |= _RC_UP;
5658     if ((enc & FENV_Y_DOWN) == FENV_Y_DOWN) *y |= _RC_DOWN;
5659     if ((enc & FENV_Y_FLUSH) == FENV_Y_FLUSH) *y |= _DN_FLUSH;
5660     if ((enc & FENV_Y_FLUSH_SAVE) == FENV_Y_FLUSH_SAVE) *y |= _DN_FLUSH_OPERANDS_SAVE_RESULTS;
5661
5662     if (fenv_encode(*x, *y) != enc)
5663     {
5664         WARN("can't decode: %lx\n", enc);
5665         return FALSE;
5666     }
5667     return TRUE;
5668 }
5669 #endif
5670 #elif _MSVCR_VER >= 120
5671 static __msvcrt_ulong fenv_encode(unsigned int x, unsigned int y)
5672 {
5673     return x | y;
5674 }
5675
5676 #if (defined(__i386__) || defined(__x86_64__))
5677 static BOOL fenv_decode(__msvcrt_ulong enc, unsigned int *x, unsigned int *y)
5678 {
5679     *x = *y = enc;
5680     return TRUE;
5681 }
5682 #endif
5683 #endif
5684
5685 #if _MSVCR_VER>=120
5686 /*********************************************************************
5687  *              fegetenv (MSVCR120.@)
5688  */
5689 int CDECL fegetenv(fenv_t *env)
5690 {
5691 #if _MSVCR_VER>=140 && defined(__i386__)
5692     unsigned int x87, sse;
5693     __control87_2(0, 0, &x87, &sse);
5694     env->_Fe_ctl = fenv_encode(x87, sse);
5695     _statusfp2(&x87, &sse);
5696     env->_Fe_stat = fenv_encode(x87, sse);
5697 #elif _MSVCR_VER>=140
5698     env->_Fe_ctl = fenv_encode(0, _control87(0, 0));
5699     env->_Fe_stat = fenv_encode(0, _statusfp());
5700 #else
5701     env->_Fe_ctl = _controlfp(0, 0) & (_EM_INEXACT | _EM_UNDERFLOW |
5702             _EM_OVERFLOW | _EM_ZERODIVIDE | _EM_INVALID | _RC_CHOP);
5703     env->_Fe_stat = _statusfp();
5704 #endif
5705     return 0;
5706 }
5707
5708 /*********************************************************************
5709  *              feupdateenv (MSVCR120.@)
5710  */
5711 int CDECL feupdateenv(const fenv_t *env)
5712 {
5713     fenv_t set;
5714     fegetenv(&set);
5715     set._Fe_ctl = env->_Fe_ctl;
5716     set._Fe_stat |= env->_Fe_stat;
5717     return fesetenv(&set);
5718 }
5719
5720 /*********************************************************************
5721  *      fetestexcept (MSVCR120.@)
5722  */
5723 int CDECL fetestexcept(int flags)
5724 {
5725     return _statusfp() & flags;
5726 }
5727
5728 /*********************************************************************
5729  *      fesetexceptflag (MSVCR120.@)
5730  */
5731 int CDECL fesetexceptflag(const fexcept_t *status, int excepts)
5732 {
5733     fenv_t env;
5734
5735     excepts &= FE_ALL_EXCEPT;
5736     if(!excepts)
5737         return 0;
5738
5739     fegetenv(&env);
5740     env._Fe_stat &= ~fenv_encode(excepts, excepts);
5741     env._Fe_stat |= *status & fenv_encode(excepts, excepts);
5742     return fesetenv(&env);
5743 }
5744
5745 /*********************************************************************
5746  *      feraiseexcept (MSVCR120.@)
5747  */
5748 int CDECL feraiseexcept(int flags)
5749 {
5750     fenv_t env;
5751
5752     flags &= FE_ALL_EXCEPT;
5753     fegetenv(&env);
5754     env._Fe_stat |= fenv_encode(flags, flags);
5755     return fesetenv(&env);
5756 }
5757
5758 /*********************************************************************
5759  *      feclearexcept (MSVCR120.@)
5760  */
5761 int CDECL feclearexcept(int flags)
5762 {
5763     fenv_t env;
5764
5765     fegetenv(&env);
5766     flags &= FE_ALL_EXCEPT;
5767     env._Fe_stat &= ~fenv_encode(flags, flags);
5768     return fesetenv(&env);
5769 }
5770
5771 /*********************************************************************
5772  *      fegetexceptflag (MSVCR120.@)
5773  */
5774 int CDECL fegetexceptflag(fexcept_t *status, int excepts)
5775 {
5776 #if _MSVCR_VER>=140 && defined(__i386__)
5777     unsigned int x87, sse;
5778     _statusfp2(&x87, &sse);
5779     *status = fenv_encode(x87 & excepts, sse & excepts);
5780 #else
5781     *status = fenv_encode(0, _statusfp() & excepts);
5782 #endif
5783     return 0;
5784 }
5785 #endif
5786
5787 #if _MSVCR_VER>=140
5788 /*********************************************************************
5789  *              __fpe_flt_rounds (UCRTBASE.@)
5790  */
5791 int CDECL __fpe_flt_rounds(void)
5792 {
5793     unsigned int fpc = _controlfp(0, 0) & _RC_CHOP;
5794
5795     TRACE("()\n");
5796
5797     switch(fpc) {
5798         case _RC_CHOP: return 0;
5799         case _RC_NEAR: return 1;
5800         case _RC_UP: return 2;
5801         default: return 3;
5802     }
5803 }
5804 #endif
5805
5806 #if _MSVCR_VER>=120
5807
5808 /*********************************************************************
5809  *              fegetround (MSVCR120.@)
5810  */
5811 int CDECL fegetround(void)
5812 {
5813     return _controlfp(0, 0) & _MCW_RC;
5814 }
5815
5816 /*********************************************************************
5817  *              fesetround (MSVCR120.@)
5818  */
5819 int CDECL fesetround(int round_mode)
5820 {
5821     if (round_mode & (~_MCW_RC))
5822         return 1;
5823     _controlfp(round_mode, _MCW_RC);
5824     return 0;
5825 }
5826
5827 #endif /* _MSVCR_VER>=120 */
5828
5829 /*********************************************************************
5830  *              _copysign (MSVCRT.@)
5831  *
5832  * Copied from musl: src/math/copysign.c
5833  */
5834 double CDECL _copysign( double x, double y )
5835 {
5836     union { double f; UINT64 i; } ux = { x }, uy = { y };
5837     ux.i &= ~0ull >> 1;
5838     ux.i |= uy.i & 1ull << 63;
5839     return ux.f;
5840 }
5841
5842 /*********************************************************************
5843  *              _finite (MSVCRT.@)
5844  */
5845 int CDECL _finite(double num)
5846 {
5847     union { double f; UINT64 i; } u = { num };
5848     return (u.i & ~0ull >> 1) < 0x7ffull << 52;
5849 }
5850
5851 /*********************************************************************
5852  *              _fpreset (MSVCRT.@)
5853  */
5854 void CDECL _fpreset(void)
5855 {
5856 #if (defined(__GNUC__) || defined(__clang__)) && (defined(__i386__) || defined(__x86_64__))
5857     const unsigned int x86_cw = 0x27f;
5858     __asm__ __volatile__( "fninit; fldcw %0" : : "m" (x86_cw) );
5859     if (sse2_supported)
5860     {
5861         const unsigned long sse2_cw = 0x1f80;
5862         __asm__ __volatile__( "ldmxcsr %0" : : "m" (sse2_cw) );
5863     }
5864 #else
5865     FIXME( "not implemented\n" );
5866 #endif
5867 }
5868
5869 #if _MSVCR_VER>=120
5870 /*********************************************************************
5871  *              fesetenv (MSVCR120.@)
5872  */
5873 int CDECL fesetenv(const fenv_t *env)
5874 {
5875 #if (defined(__GNUC__) || defined(__clang__)) && (defined(__i386__) || defined(__x86_64__))
5876     unsigned int x87_cw, sse_cw, x87_stat, sse_stat;
5877 #ifdef __i386__
5878     struct {
5879         WORD control_word;
5880         WORD unused1;
5881         WORD status_word;
5882         WORD unused2;
5883         WORD tag_word;
5884         WORD unused3;
5885         DWORD instruction_pointer;
5886         WORD code_segment;
5887         WORD unused4;
5888         DWORD operand_addr;
5889         WORD data_segment;
5890         WORD unused5;
5891     } fenv;
5892 #endif
5893
5894     TRACE( "(%p)\n", env );
5895
5896     if (!env->_Fe_ctl && !env->_Fe_stat) {
5897         _fpreset();
5898         return 0;
5899     }
5900
5901     if (!fenv_decode(env->_Fe_ctl, &x87_cw, &sse_cw))
5902         return 1;
5903     if (!fenv_decode(env->_Fe_stat, &x87_stat, &sse_stat))
5904         return 1;
5905
5906 #ifdef __i386__
5907     __asm__ __volatile__( "fnstenv %0" : "=m" (fenv) );
5908
5909     fenv.control_word &= ~0xc3d;
5910 #if _MSVCR_VER>=140
5911     fenv.control_word &= ~0x1302;
5912 #endif
5913     if (x87_cw & _EM_INVALID) fenv.control_word |= 0x1;
5914     if (x87_cw & _EM_ZERODIVIDE) fenv.control_word |= 0x4;
5915     if (x87_cw & _EM_OVERFLOW) fenv.control_word |= 0x8;
5916     if (x87_cw & _EM_UNDERFLOW) fenv.control_word |= 0x10;
5917     if (x87_cw & _EM_INEXACT) fenv.control_word |= 0x20;
5918     switch (x87_cw & _MCW_RC)
5919     {
5920         case _RC_UP|_RC_DOWN:   fenv.control_word |= 0xc00; break;
5921         case _RC_UP:            fenv.control_word |= 0x800; break;
5922         case _RC_DOWN:          fenv.control_word |= 0x400; break;
5923     }
5924 #if _MSVCR_VER>=140
5925     if (x87_cw & _EM_DENORMAL) fenv.control_word |= 0x2;
5926     switch (x87_cw & _MCW_PC)
5927     {
5928         case _PC_64: fenv.control_word |= 0x300; break;
5929         case _PC_53: fenv.control_word |= 0x200; break;
5930         case _PC_24: fenv.control_word |= 0x0; break;
5931     }
5932     if (x87_cw & _IC_AFFINE) fenv.control_word |= 0x1000;
5933 #endif
5934
5935     fenv.status_word &= ~0x3f;
5936     if (x87_stat & _SW_INVALID) fenv.status_word |= 0x1;
5937     if (x87_stat & _SW_DENORMAL) fenv.status_word |= 0x2;
5938     if (x87_stat & _SW_ZERODIVIDE) fenv.status_word |= 0x4;
5939     if (x87_stat & _SW_OVERFLOW) fenv.status_word |= 0x8;
5940     if (x87_stat & _SW_UNDERFLOW) fenv.status_word |= 0x10;
5941     if (x87_stat & _SW_INEXACT) fenv.status_word |= 0x20;
5942
5943     __asm__ __volatile__( "fldenv %0" : : "m" (fenv) : "st", "st(1)",
5944             "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)" );
5945 #endif
5946
5947     if (sse2_supported)
5948     {
5949         DWORD fpword;
5950         __asm__ __volatile__( "stmxcsr %0" : "=m" (fpword) );
5951         fpword &= ~0x7ebf;
5952 #if _MSVCR_VER>=140
5953         fpword &= ~0x8140;
5954 #endif
5955         if (sse_cw & _EM_INVALID) fpword |= 0x80;
5956         if (sse_cw & _EM_ZERODIVIDE) fpword |= 0x200;
5957         if (sse_cw & _EM_OVERFLOW) fpword |= 0x400;
5958         if (sse_cw & _EM_UNDERFLOW) fpword |= 0x800;
5959         if (sse_cw & _EM_INEXACT) fpword |= 0x1000;
5960         switch (sse_cw & _MCW_RC)
5961         {
5962             case _RC_CHOP: fpword |= 0x6000; break;
5963             case _RC_UP:   fpword |= 0x4000; break;
5964             case _RC_DOWN: fpword |= 0x2000; break;
5965         }
5966         if (sse_stat & _SW_INVALID) fpword |= 0x1;
5967         if (sse_stat & _SW_DENORMAL) fpword |= 0x2;
5968         if (sse_stat & _SW_ZERODIVIDE) fpword |= 0x4;
5969         if (sse_stat & _SW_OVERFLOW) fpword |= 0x8;
5970         if (sse_stat & _SW_UNDERFLOW) fpword |= 0x10;
5971         if (sse_stat & _SW_INEXACT) fpword |= 0x20;
5972 #if _MSVCR_VER>=140
5973         if (sse_cw & _EM_DENORMAL) fpword |= 0x100;
5974         switch (sse_cw & _MCW_DN)
5975         {
5976             case _DN_FLUSH_OPERANDS_SAVE_RESULTS: fpword |= 0x0040; break;
5977             case _DN_SAVE_OPERANDS_FLUSH_RESULTS: fpword |= 0x8000; break;
5978             case _DN_FLUSH:                       fpword |= 0x8040; break;
5979         }
5980 #endif
5981         __asm__ __volatile__( "ldmxcsr %0" : : "m" (fpword) );
5982     }
5983
5984     return 0;
5985 #else
5986     FIXME( "not implemented\n" );
5987 #endif
5988     return 1;
5989 }
5990 #endif
5991
5992 /*********************************************************************
5993  *              _isnan (MSVCRT.@)
5994  */
5995 int CDECL _isnan(double num)
5996 {
5997     union { double f; UINT64 i; } u = { num };
5998     return (u.i & ~0ull >> 1) > 0x7ffull << 52;
5999 }
6000
6001 static double pzero(double x)
6002 {
6003     static const double pR8[6] = { /* for x in [inf, 8]=1/[0,0.125] */
6004         0.00000000000000000000e+00,
6005         -7.03124999999900357484e-02,
6006         -8.08167041275349795626e+00,
6007         -2.57063105679704847262e+02,
6008         -2.48521641009428822144e+03,
6009         -5.25304380490729545272e+03,
6010     }, pS8[5] = {
6011         1.16534364619668181717e+02,
6012         3.83374475364121826715e+03,
6013         4.05978572648472545552e+04,
6014         1.16752972564375915681e+05,
6015         4.76277284146730962675e+04,
6016     }, pR5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */
6017         -1.14125464691894502584e-11,
6018         -7.03124940873599280078e-02,
6019         -4.15961064470587782438e+00,
6020         -6.76747652265167261021e+01,
6021         -3.31231299649172967747e+02,
6022         -3.46433388365604912451e+02,
6023     }, pS5[5] = {
6024         6.07539382692300335975e+01,
6025         1.05125230595704579173e+03,
6026         5.97897094333855784498e+03,
6027         9.62544514357774460223e+03,
6028         2.40605815922939109441e+03,
6029     }, pR3[6] = {/* for x in [4.547,2.8571]=1/[0.2199,0.35001] */
6030         -2.54704601771951915620e-09,
6031         -7.03119616381481654654e-02,
6032         -2.40903221549529611423e+00,
6033         -2.19659774734883086467e+01,
6034         -5.80791704701737572236e+01,
6035         -3.14479470594888503854e+01,
6036     }, pS3[5] = {
6037         3.58560338055209726349e+01,
6038         3.61513983050303863820e+02,
6039         1.19360783792111533330e+03,
6040         1.12799679856907414432e+03,
6041         1.73580930813335754692e+02,
6042     }, pR2[6] = {/* for x in [2.8570,2]=1/[0.3499,0.5] */
6043         -8.87534333032526411254e-08,
6044         -7.03030995483624743247e-02,
6045         -1.45073846780952986357e+00,
6046         -7.63569613823527770791e+00,
6047         -1.11931668860356747786e+01,
6048         -3.23364579351335335033e+00,
6049     }, pS2[5] = {
6050         2.22202997532088808441e+01,
6051         1.36206794218215208048e+02,
6052         2.70470278658083486789e+02,
6053         1.53875394208320329881e+02,
6054         1.46576176948256193810e+01,
6055     };
6056
6057     const double *p, *q;
6058     double z, r, s;
6059     UINT32 ix;
6060
6061     ix = *(ULONGLONG*)&x >> 32;
6062     ix &= 0x7fffffff;
6063     if (ix >= 0x40200000) {
6064         p = pR8;
6065         q = pS8;
6066     } else if (ix >= 0x40122E8B) {
6067         p = pR5;
6068         q = pS5;
6069     } else if (ix >= 0x4006DB6D) {
6070         p = pR3;
6071         q = pS3;
6072     } else /*ix >= 0x40000000*/ {
6073         p = pR2;
6074         q = pS2;
6075     }
6076
6077     z = 1.0 / (x * x);
6078     r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5]))));
6079     s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * q[4]))));
6080     return 1.0 + r / s;
6081 }
6082
6083 static double qzero(double x)
6084 {
6085     static const double qR8[6] = { /* for x in [inf, 8]=1/[0,0.125] */
6086         0.00000000000000000000e+00,
6087         7.32421874999935051953e-02,
6088         1.17682064682252693899e+01,
6089         5.57673380256401856059e+02,
6090         8.85919720756468632317e+03,
6091         3.70146267776887834771e+04,
6092     }, qS8[6] = {
6093         1.63776026895689824414e+02,
6094         8.09834494656449805916e+03,
6095         1.42538291419120476348e+05,
6096         8.03309257119514397345e+05,
6097         8.40501579819060512818e+05,
6098         -3.43899293537866615225e+05,
6099     }, qR5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */
6100         1.84085963594515531381e-11,
6101         7.32421766612684765896e-02,
6102         5.83563508962056953777e+00,
6103         1.35111577286449829671e+02,
6104         1.02724376596164097464e+03,
6105         1.98997785864605384631e+03,
6106     }, qS5[6] = {
6107         8.27766102236537761883e+01,
6108         2.07781416421392987104e+03,
6109         1.88472887785718085070e+04,
6110         5.67511122894947329769e+04,
6111         3.59767538425114471465e+04,
6112         -5.35434275601944773371e+03,
6113     }, qR3[6] = {/* for x in [4.547,2.8571]=1/[0.2199,0.35001] */
6114         4.37741014089738620906e-09,
6115         7.32411180042911447163e-02,
6116         3.34423137516170720929e+00,
6117         4.26218440745412650017e+01,
6118         1.70808091340565596283e+02,
6119         1.66733948696651168575e+02,
6120     }, qS3[6] = {
6121         4.87588729724587182091e+01,
6122         7.09689221056606015736e+02,
6123         3.70414822620111362994e+03,
6124         6.46042516752568917582e+03,
6125         2.51633368920368957333e+03,
6126         -1.49247451836156386662e+02,
6127     }, qR2[6] = {/* for x in [2.8570,2]=1/[0.3499,0.5] */
6128         1.50444444886983272379e-07,
6129         7.32234265963079278272e-02,
6130         1.99819174093815998816e+00,
6131         1.44956029347885735348e+01,
6132         3.16662317504781540833e+01,
6133         1.62527075710929267416e+01,
6134     }, qS2[6] = {
6135         3.03655848355219184498e+01,
6136         2.69348118608049844624e+02,
6137         8.44783757595320139444e+02,
6138         8.82935845112488550512e+02,
6139         2.12666388511798828631e+02,
6140         -5.31095493882666946917e+00,
6141     };
6142
6143     const double *p, *q;
6144     double s, r, z;
6145     unsigned int ix;
6146
6147     ix = *(ULONGLONG*)&x >> 32;
6148     ix &= 0x7fffffff;
6149     if (ix >= 0x40200000) {
6150         p = qR8;
6151         q = qS8;
6152     } else if (ix >= 0x40122E8B) {
6153         p = qR5;
6154         q = qS5;
6155     } else if (ix >= 0x4006DB6D) {
6156         p = qR3;
6157         q = qS3;
6158     } else /*ix >= 0x40000000*/ {
6159         p = qR2;
6160         q = qS2;
6161     }
6162
6163     z = 1.0 / (x * x);
6164     r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5]))));
6165     s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * (q[4] + z * q[5])))));
6166     return (-0.125 + r / s) / x;
6167 }
6168
6169 /* j0 and y0 approximation for |x|>=2 */
6170 static double j0_y0_approx(unsigned int ix, double x, BOOL y0)
6171 {
6172     static const double invsqrtpi = 5.64189583547756279280e-01;
6173
6174     double s, c, ss, cc, z;
6175
6176     s = sin(x);
6177     c = cos(x);
6178     if (y0) c = -c;
6179     cc = s + c;
6180     /* avoid overflow in 2*x, big ulp error when x>=0x1p1023 */
6181     if (ix < 0x7fe00000) {
6182         ss = s - c;
6183         z = -cos(2 * x);
6184         if (s * c < 0) cc = z / ss;
6185         else ss = z / cc;
6186         if (ix < 0x48000000) {
6187             if (y0) ss = -ss;
6188             cc = pzero(x) * cc - qzero(x) * ss;
6189         }
6190     }
6191     return invsqrtpi * cc / sqrt(x);
6192 }
6193
6194 /*********************************************************************
6195  *              _j0 (MSVCRT.@)
6196  *
6197  * Copied from musl: src/math/j0.c
6198  */
6199 double CDECL _j0(double x)
6200 {
6201     static const double R02 =  1.56249999999999947958e-02,
6202             R03 = -1.89979294238854721751e-04,
6203             R04 =  1.82954049532700665670e-06,
6204             R05 = -4.61832688532103189199e-09,
6205             S01 =  1.56191029464890010492e-02,
6206             S02 =  1.16926784663337450260e-04,
6207             S03 =  5.13546550207318111446e-07,
6208             S04 =  1.16614003333790000205e-09;
6209
6210     double z, r, s;
6211     unsigned int ix;
6212
6213     ix = *(ULONGLONG*)&x >> 32;
6214     ix &= 0x7fffffff;
6215
6216     /* j0(+-inf)=0, j0(nan)=nan */
6217     if (ix >= 0x7ff00000)
6218         return math_error(_DOMAIN, "_j0", x, 0, 1 / (x * x));
6219     x = fabs(x);
6220
6221     if (ix >= 0x40000000) {  /* |x| >= 2 */
6222         /* large ulp error near zeros: 2.4, 5.52, 8.6537,.. */
6223         return j0_y0_approx(ix, x, FALSE);
6224     }
6225
6226     if (ix >= 0x3f200000) {  /* |x| >= 2**-13 */
6227         /* up to 4ulp error close to 2 */
6228         z = x * x;
6229         r = z * (R02 + z * (R03 + z * (R04 + z * R05)));
6230         s = 1 + z * (S01 + z * (S02 + z * (S03 + z * S04)));
6231         return (1 + x / 2) * (1 - x / 2) + z * (r / s);
6232     }
6233
6234     /* 1 - x*x/4 */
6235     /* prevent underflow */
6236     /* inexact should be raised when x!=0, this is not done correctly */
6237     if (ix >= 0x38000000)  /* |x| >= 2**-127 */
6238         x = 0.25 * x * x;
6239     return 1 - x;
6240 }
6241
6242 static double pone(double x)
6243 {
6244     static const double pr8[6] = { /* for x in [inf, 8]=1/[0,0.125] */
6245         0.00000000000000000000e+00,
6246         1.17187499999988647970e-01,
6247         1.32394806593073575129e+01,
6248         4.12051854307378562225e+02,
6249         3.87474538913960532227e+03,
6250         7.91447954031891731574e+03,
6251     }, ps8[5] = {
6252         1.14207370375678408436e+02,
6253         3.65093083420853463394e+03,
6254         3.69562060269033463555e+04,
6255         9.76027935934950801311e+04,
6256         3.08042720627888811578e+04,
6257     }, pr5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */
6258         1.31990519556243522749e-11,
6259         1.17187493190614097638e-01,
6260         6.80275127868432871736e+00,
6261         1.08308182990189109773e+02,
6262         5.17636139533199752805e+02,
6263         5.28715201363337541807e+02,
6264     }, ps5[5] = {
6265         5.92805987221131331921e+01,
6266         9.91401418733614377743e+02,
6267         5.35326695291487976647e+03,
6268         7.84469031749551231769e+03,
6269         1.50404688810361062679e+03,
6270     }, pr3[6] = {
6271         3.02503916137373618024e-09,
6272         1.17186865567253592491e-01,
6273         3.93297750033315640650e+00,
6274         3.51194035591636932736e+01,
6275         9.10550110750781271918e+01,
6276         4.85590685197364919645e+01,
6277     }, ps3[5] = {
6278         3.47913095001251519989e+01,
6279         3.36762458747825746741e+02,
6280         1.04687139975775130551e+03,
6281         8.90811346398256432622e+02,
6282         1.03787932439639277504e+02,
6283     }, pr2[6] = { /* for x in [2.8570,2]=1/[0.3499,0.5] */
6284         1.07710830106873743082e-07,
6285         1.17176219462683348094e-01,
6286         2.36851496667608785174e+00,
6287         1.22426109148261232917e+01,
6288         1.76939711271687727390e+01,
6289         5.07352312588818499250e+00,
6290     }, ps2[5] = {
6291         2.14364859363821409488e+01,
6292         1.25290227168402751090e+02,
6293         2.32276469057162813669e+02,
6294         1.17679373287147100768e+02,
6295         8.36463893371618283368e+00,
6296     };
6297
6298     const double *p, *q;
6299     double z, r, s;
6300     unsigned int ix;
6301
6302     ix = *(ULONGLONG*)&x >> 32;
6303     ix &= 0x7fffffff;
6304     if (ix >= 0x40200000) {
6305         p = pr8;
6306         q = ps8;
6307     } else if (ix >= 0x40122E8B) {
6308         p = pr5;
6309         q = ps5;
6310     } else if (ix >= 0x4006DB6D) {
6311         p = pr3;
6312         q = ps3;
6313     } else /*ix >= 0x40000000*/ {
6314         p = pr2;
6315         q = ps2;
6316     }
6317     z = 1.0 / (x * x);
6318     r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5]))));
6319     s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * q[4]))));
6320     return 1.0 + r / s;
6321 }
6322
6323 static double qone(double x)
6324 {
6325     static const double qr8[6] = { /* for x in [inf, 8]=1/[0,0.125] */
6326         0.00000000000000000000e+00,
6327         -1.02539062499992714161e-01,
6328         -1.62717534544589987888e+01,
6329         -7.59601722513950107896e+02,
6330         -1.18498066702429587167e+04,
6331         -4.84385124285750353010e+04,
6332     }, qs8[6] = {
6333         1.61395369700722909556e+02,
6334         7.82538599923348465381e+03,
6335         1.33875336287249578163e+05,
6336         7.19657723683240939863e+05,
6337         6.66601232617776375264e+05,
6338         -2.94490264303834643215e+05,
6339     }, qr5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */
6340         -2.08979931141764104297e-11,
6341         -1.02539050241375426231e-01,
6342         -8.05644828123936029840e+00,
6343         -1.83669607474888380239e+02,
6344         -1.37319376065508163265e+03,
6345         -2.61244440453215656817e+03,
6346     }, qs5[6] = {
6347         8.12765501384335777857e+01,
6348         1.99179873460485964642e+03,
6349         1.74684851924908907677e+04,
6350         4.98514270910352279316e+04,
6351         2.79480751638918118260e+04,
6352         -4.71918354795128470869e+03,
6353     }, qr3[6] = {
6354         -5.07831226461766561369e-09,
6355         -1.02537829820837089745e-01,
6356         -4.61011581139473403113e+00,
6357         -5.78472216562783643212e+01,
6358         -2.28244540737631695038e+02,
6359         -2.19210128478909325622e+02,
6360     }, qs3[6] = {
6361         4.76651550323729509273e+01,
6362         6.73865112676699709482e+02,
6363         3.38015286679526343505e+03,
6364         5.54772909720722782367e+03,
6365         1.90311919338810798763e+03,
6366         -1.35201191444307340817e+02,
6367     }, qr2[6] = { /* for x in [2.8570,2]=1/[0.3499,0.5] */
6368         -1.78381727510958865572e-07,
6369         -1.02517042607985553460e-01,
6370         -2.75220568278187460720e+00,
6371         -1.96636162643703720221e+01,
6372         -4.23253133372830490089e+01,
6373         -2.13719211703704061733e+01,
6374     }, qs2[6] = {
6375         2.95333629060523854548e+01,
6376         2.52981549982190529136e+02,
6377         7.57502834868645436472e+02,
6378         7.39393205320467245656e+02,
6379         1.55949003336666123687e+02,
6380         -4.95949898822628210127e+00,
6381     };
6382
6383     const double *p, *q;
6384     double s, r, z;
6385     unsigned int ix;
6386
6387     ix = *(ULONGLONG*)&x >> 32;
6388     ix &= 0x7fffffff;
6389     if (ix >= 0x40200000) {
6390         p = qr8;
6391         q = qs8;
6392     } else if (ix >= 0x40122E8B) {
6393         p = qr5;
6394         q = qs5;
6395     } else if (ix >= 0x4006DB6D) {
6396         p = qr3;
6397         q = qs3;
6398     } else /*ix >= 0x40000000*/ {
6399         p = qr2;
6400         q = qs2;
6401     }
6402     z = 1.0 / (x * x);
6403     r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5]))));
6404     s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * (q[4] + z * q[5])))));
6405     return (0.375 + r / s) / x;
6406 }
6407
6408 static double j1_y1_approx(unsigned int ix, double x, BOOL y1, int sign)
6409 {
6410     static const double invsqrtpi = 5.64189583547756279280e-01;
6411
6412     double z, s, c, ss, cc;
6413
6414     s = sin(x);
6415     if (y1) s = -s;
6416     c = cos(x);
6417     cc = s - c;
6418     if (ix < 0x7fe00000) {
6419         ss = -s - c;
6420         z = cos(2 * x);
6421         if (s * c > 0) cc = z / ss;
6422         else ss = z / cc;
6423         if (ix < 0x48000000) {
6424             if (y1)
6425                 ss = -ss;
6426             cc = pone(x) * cc - qone(x) * ss;
6427         }
6428     }
6429     if (sign)
6430         cc = -cc;
6431     return invsqrtpi * cc / sqrt(x);
6432 }
6433
6434 /*********************************************************************
6435  *              _j1 (MSVCRT.@)
6436  *
6437  * Copied from musl: src/math/j1.c
6438  */
6439 double CDECL _j1(double x)
6440 {
6441     static const double r00 = -6.25000000000000000000e-02,
6442         r01 =  1.40705666955189706048e-03,
6443         r02 = -1.59955631084035597520e-05,
6444         r03 =  4.96727999609584448412e-08,
6445         s01 =  1.91537599538363460805e-02,
6446         s02 =  1.85946785588630915560e-04,
6447         s03 =  1.17718464042623683263e-06,
6448         s04 =  5.04636257076217042715e-09,
6449         s05 =  1.23542274426137913908e-11;
6450
6451     double z, r, s;
6452     unsigned int ix;
6453     int sign;
6454
6455     ix = *(ULONGLONG*)&x >> 32;
6456     sign = ix >> 31;
6457     ix &= 0x7fffffff;
6458     if (ix >= 0x7ff00000)
6459         return math_error(isnan(x) ? 0 : _DOMAIN, "_j1", x, 0, 1 / (x * x));
6460     if (ix >= 0x40000000)  /* |x| >= 2 */
6461         return j1_y1_approx(ix, fabs(x), FALSE, sign);
6462     if (ix >= 0x38000000) {  /* |x| >= 2**-127 */
6463         z = x * x;
6464         r = z * (r00 + z * (r01 + z * (r02 + z * r03)));
6465         s = 1 + z * (s01 + z * (s02 + z * (s03 + z * (s04 + z * s05))));
6466         z = r / s;
6467     } else {
6468         /* avoid underflow, raise inexact if x!=0 */
6469         z = x;
6470     }
6471     return (0.5 + z) * x;
6472 }
6473
6474 /*********************************************************************
6475  *              _jn (MSVCRT.@)
6476  *
6477  * Copied from musl: src/math/jn.c
6478  */
6479 double CDECL _jn(int n, double x)
6480 {
6481     static const double invsqrtpi = 5.64189583547756279280e-01;
6482
6483     unsigned int ix, lx;
6484     int nm1, i, sign;
6485     double a, b, temp;
6486
6487     ix = *(ULONGLONG*)&x >> 32;
6488     lx = *(ULONGLONG*)&x;
6489     sign = ix >> 31;
6490     ix &= 0x7fffffff;
6491
6492     if ((ix | (lx | -lx) >> 31) > 0x7ff00000) /* nan */
6493         return x;
6494
6495     if (n == 0)
6496         return _j0(x);
6497     if (n < 0) {
6498         nm1 = -(n + 1);
6499         x = -x;
6500         sign ^= 1;
6501     } else {
6502         nm1 = n-1;
6503     }
6504     if (nm1 == 0)
6505         return j1(x);
6506
6507     sign &= n;  /* even n: 0, odd n: signbit(x) */
6508     x = fabs(x);
6509     if ((ix | lx) == 0 || ix == 0x7ff00000)  /* if x is 0 or inf */
6510         b = 0.0;
6511     else if (nm1 < x) {
6512         if (ix >= 0x52d00000) { /* x > 2**302 */
6513             switch(nm1 & 3) {
6514             case 0:
6515                 temp = -cos(x) + sin(x);
6516                 break;
6517             case 1:
6518                 temp = -cos(x) - sin(x);
6519                 break;
6520             case 2:
6521                 temp =  cos(x) - sin(x);
6522                 break;
6523             default:
6524                 temp =  cos(x) + sin(x);
6525                 break;
6526             }
6527             b = invsqrtpi * temp / sqrt(x);
6528         } else {
6529             a = _j0(x);
6530             b = _j1(x);
6531             for (i = 0; i < nm1; ) {
6532                 i++;
6533                 temp = b;
6534                 b = b * (2.0 * i / x) - a; /* avoid underflow */
6535                 a = temp;
6536             }
6537         }
6538     } else {
6539         if (ix < 0x3e100000) { /* x < 2**-29 */
6540             if (nm1 > 32)  /* underflow */
6541                 b = 0.0;
6542             else {
6543                 temp = x * 0.5;
6544                 b = temp;
6545                 a = 1.0;
6546                 for (i = 2; i <= nm1 + 1; i++) {
6547                     a *= (double)i; /* a = n! */
6548                     b *= temp;      /* b = (x/2)^n */
6549                 }
6550                 b = b / a;
6551             }
6552         } else {
6553             double t, q0, q1, w, h, z, tmp, nf;
6554             int k;
6555
6556             nf = nm1 + 1.0;
6557             w = 2 * nf / x;
6558             h = 2 / x;
6559             z = w + h;
6560             q0 = w;
6561             q1 = w * z - 1.0;
6562             k = 1;
6563             while (q1 < 1.0e9) {
6564                 k += 1;
6565                 z += h;
6566                 tmp = z * q1 - q0;
6567                 q0 = q1;
6568                 q1 = tmp;
6569             }
6570             for (t = 0.0, i = k; i >= 0; i--)
6571                 t = 1 / (2 * (i + nf) / x - t);
6572             a = t;
6573             b = 1.0;
6574             tmp = nf * log(fabs(w));
6575             if (tmp < 7.09782712893383973096e+02) {
6576                 for (i = nm1; i > 0; i--) {
6577                     temp = b;
6578                     b = b * (2.0 * i) / x - a;
6579                     a = temp;
6580                 }
6581             } else {
6582                 for (i = nm1; i > 0; i--) {
6583                     temp = b;
6584                     b = b * (2.0 * i) / x - a;
6585                     a = temp;
6586                     /* scale b to avoid spurious overflow */
6587                     if (b > 0x1p500) {
6588                         a /= b;
6589                         t /= b;
6590                         b  = 1.0;
6591                     }
6592                 }
6593             }
6594             z = j0(x);
6595             w = j1(x);
6596             if (fabs(z) >= fabs(w))
6597                 b = t * z / b;
6598             else
6599                 b = t * w / a;
6600         }
6601     }
6602     return sign ? -b : b;
6603 }
6604
6605 /*********************************************************************
6606  *              _y0 (MSVCRT.@)
6607  */
6608 double CDECL _y0(double x)
6609 {
6610     static const double tpi = 6.36619772367581382433e-01,
6611         u00  = -7.38042951086872317523e-02,
6612         u01  =  1.76666452509181115538e-01,
6613         u02  = -1.38185671945596898896e-02,
6614         u03  =  3.47453432093683650238e-04,
6615         u04  = -3.81407053724364161125e-06,
6616         u05  =  1.95590137035022920206e-08,
6617         u06  = -3.98205194132103398453e-11,
6618         v01  =  1.27304834834123699328e-02,
6619         v02  =  7.60068627350353253702e-05,
6620         v03  =  2.59150851840457805467e-07,
6621         v04  =  4.41110311332675467403e-10;
6622
6623     double z, u, v;
6624     unsigned int ix, lx;
6625
6626     ix = *(ULONGLONG*)&x >> 32;
6627     lx = *(ULONGLONG*)&x;
6628
6629     /* y0(nan)=nan, y0(<0)=nan, y0(0)=-inf, y0(inf)=0 */
6630     if ((ix << 1 | lx) == 0)
6631         return math_error(_OVERFLOW, "_y0", x, 0, -INFINITY);
6632     if (isnan(x))
6633         return x;
6634     if (ix >> 31)
6635         return math_error(_DOMAIN, "_y0", x, 0, 0 / (x - x));
6636     if (ix >= 0x7ff00000)
6637         return 1 / x;
6638
6639     if (ix >= 0x40000000) {  /* x >= 2 */
6640         /* large ulp errors near zeros: 3.958, 7.086,.. */
6641         return j0_y0_approx(ix, x, TRUE);
6642     }
6643
6644     if (ix >= 0x3e400000) {  /* x >= 2**-27 */
6645         /* large ulp error near the first zero, x ~= 0.89 */
6646         z = x * x;
6647         u = u00 + z * (u01 + z * (u02 + z * (u03 + z * (u04 + z * (u05 + z * u06)))));
6648         v = 1.0 + z * (v01 + z * (v02 + z * (v03 + z * v04)));
6649         return u / v + tpi * (j0(x) * log(x));
6650     }
6651     return u00 + tpi * log(x);
6652 }
6653
6654 /*********************************************************************
6655  *              _y1 (MSVCRT.@)
6656  */
6657 double CDECL _y1(double x)
6658 {
6659     static const double tpi = 6.36619772367581382433e-01,
6660         u00 =  -1.96057090646238940668e-01,
6661         u01 = 5.04438716639811282616e-02,
6662         u02 = -1.91256895875763547298e-03,
6663         u03 = 2.35252600561610495928e-05,
6664         u04 = -9.19099158039878874504e-08,
6665         v00 = 1.99167318236649903973e-02,
6666         v01 = 2.02552581025135171496e-04,
6667         v02 = 1.35608801097516229404e-06,
6668         v03 = 6.22741452364621501295e-09,
6669         v04 = 1.66559246207992079114e-11;
6670
6671     double z, u, v;
6672     unsigned int ix, lx;
6673
6674     ix = *(ULONGLONG*)&x >> 32;
6675     lx = *(ULONGLONG*)&x;
6676
6677     /* y1(nan)=nan, y1(<0)=nan, y1(0)=-inf, y1(inf)=0 */
6678     if ((ix << 1 | lx) == 0)
6679         return math_error(_OVERFLOW, "_y1", x, 0, -INFINITY);
6680     if (isnan(x))
6681         return x;
6682     if (ix >> 31)
6683         return math_error(_DOMAIN, "_y1", x, 0, 0 / (x - x));
6684     if (ix >= 0x7ff00000)
6685         return 1 / x;
6686
6687     if (ix >= 0x40000000)  /* x >= 2 */
6688         return j1_y1_approx(ix, x, TRUE, 0);
6689     if (ix < 0x3c900000)  /* x < 2**-54 */
6690         return -tpi / x;
6691     z = x * x;
6692     u = u00 + z * (u01 + z * (u02 + z * (u03 + z * u04)));
6693     v = 1 + z * (v00 + z * (v01 + z * (v02 + z * (v03 + z * v04))));
6694     return x * (u / v) + tpi * (j1(x) * log(x) - 1 / x);
6695 }
6696
6697 /*********************************************************************
6698  *              _yn (MSVCRT.@)
6699  *
6700  * Copied from musl: src/math/jn.c
6701  */
6702 double CDECL _yn(int n, double x)
6703 {
6704     static const double invsqrtpi = 5.64189583547756279280e-01;
6705
6706     unsigned int ix, lx, ib;
6707     int nm1, sign, i;
6708     double a, b, temp;
6709
6710     ix = *(ULONGLONG*)&x >> 32;
6711     lx = *(ULONGLONG*)&x;
6712     sign = ix >> 31;
6713     ix &= 0x7fffffff;
6714
6715     if ((ix | (lx | -lx) >> 31) > 0x7ff00000) /* nan */
6716         return x;
6717     if (sign && (ix | lx) != 0) /* x < 0 */
6718         return math_error(_DOMAIN, "_y1", x, 0, 0 / (x - x));
6719     if (ix == 0x7ff00000)
6720         return 0.0;
6721
6722     if (n == 0)
6723         return y0(x);
6724     if (n < 0) {
6725         nm1 = -(n + 1);
6726         sign = n & 1;
6727     } else {
6728         nm1 = n - 1;
6729         sign = 0;
6730     }
6731     if (nm1 == 0)
6732         return sign ? -y1(x) : y1(x);
6733
6734     if (ix >= 0x52d00000) { /* x > 2**302 */
6735         switch(nm1 & 3) {
6736         case 0:
6737             temp = -sin(x) - cos(x);
6738             break;
6739         case 1:
6740             temp = -sin(x) + cos(x);
6741             break;
6742         case 2:
6743             temp = sin(x) + cos(x);
6744             break;
6745         default:
6746             temp = sin(x) - cos(x);
6747             break;
6748         }
6749         b = invsqrtpi * temp / sqrt(x);
6750     } else {
6751         a = y0(x);
6752         b = y1(x);
6753         /* quit if b is -inf */
6754         ib = *(ULONGLONG*)&b >> 32;
6755         for (i = 0; i < nm1 && ib != 0xfff00000;) {
6756             i++;
6757             temp = b;
6758             b = (2.0 * i / x) * b - a;
6759             ib = *(ULONGLONG*)&b >> 32;
6760             a = temp;
6761         }
6762     }
6763     return sign ? -b : b;
6764 }
6765
6766 #if _MSVCR_VER>=120
6767
6768 /*********************************************************************
6769  *              _nearbyint (MSVCR120.@)
6770  *
6771  * Based on musl: src/math/nearbyteint.c
6772  */
6773 double CDECL nearbyint(double x)
6774 {
6775     fenv_t env;
6776
6777     fegetenv(&env);
6778     _control87(_MCW_EM, _MCW_EM);
6779     x = rint(x);
6780     feclearexcept(FE_INEXACT);
6781     feupdateenv(&env);
6782     return x;
6783 }
6784
6785 /*********************************************************************
6786  *              _nearbyintf (MSVCR120.@)
6787  *
6788  * Based on musl: src/math/nearbyteintf.c
6789  */
6790 float CDECL nearbyintf(float x)
6791 {
6792     fenv_t env;
6793
6794     fegetenv(&env);
6795     _control87(_MCW_EM, _MCW_EM);
6796     x = rintf(x);
6797     feclearexcept(FE_INEXACT);
6798     feupdateenv(&env);
6799     return x;
6800 }
6801
6802 /*********************************************************************
6803  *              nexttoward (MSVCR120.@)
6804  */
6805 double CDECL MSVCRT_nexttoward(double num, double next)
6806 {
6807     return _nextafter(num, next);
6808 }
6809
6810 /*********************************************************************
6811  *              nexttowardf (MSVCR120.@)
6812  *
6813  * Copied from musl: src/math/nexttowardf.c
6814  */
6815 float CDECL MSVCRT_nexttowardf(float x, double y)
6816 {
6817     unsigned int ix = *(unsigned int*)&x;
6818     unsigned int e;
6819     float ret;
6820
6821     if (isnan(x) || isnan(y))
6822         return x + y;
6823     if (x == y)
6824         return y;
6825     if (x == 0) {
6826         ix = 1;
6827         if (signbit(y))
6828             ix |= 0x80000000;
6829     } else if (x < y) {
6830         if (signbit(x))
6831             ix--;
6832         else
6833             ix++;
6834     } else {
6835         if (signbit(x))
6836             ix++;
6837         else
6838             ix--;
6839     }
6840     e = ix & 0x7f800000;
6841     /* raise overflow if ix is infinite and x is finite */
6842     if (e == 0x7f800000) {
6843         fp_barrierf(x + x);
6844         *_errno() = ERANGE;
6845     }
6846     ret = *(float*)&ix;
6847     /* raise underflow if ret is subnormal or zero */
6848     if (e == 0) {
6849         fp_barrierf(x * x + ret * ret);
6850         *_errno() = ERANGE;
6851     }
6852     return ret;
6853 }
6854
6855 #endif /* _MSVCR_VER>=120 */
6856
6857 /*********************************************************************
6858  *              _nextafter (MSVCRT.@)
6859  *
6860  * Copied from musl: src/math/nextafter.c
6861  */
6862 double CDECL _nextafter(double x, double y)
6863 {
6864     ULONGLONG llx = *(ULONGLONG*)&x;
6865     ULONGLONG lly = *(ULONGLONG*)&y;
6866     ULONGLONG ax, ay;
6867     int e;
6868
6869     if (isnan(x) || isnan(y))
6870         return x + y;
6871     if (llx == lly) {
6872         if (_fpclass(y) & (_FPCLASS_ND | _FPCLASS_PD | _FPCLASS_NZ | _FPCLASS_PZ ))
6873             *_errno() = ERANGE;
6874         return y;
6875     }
6876     ax = llx & -1ULL / 2;
6877     ay = lly & -1ULL / 2;
6878     if (ax == 0) {
6879         if (ay == 0)
6880             return y;
6881         llx = (lly & 1ULL << 63) | 1;
6882     } else if (ax > ay || ((llx ^ lly) & 1ULL << 63))
6883         llx--;
6884     else
6885         llx++;
6886     e = llx >> 52 & 0x7ff;
6887     /* raise overflow if llx is infinite and x is finite */
6888     if (e == 0x7ff) {
6889         fp_barrier(x + x);
6890         *_errno() = ERANGE;
6891     }
6892     /* raise underflow if llx is subnormal or zero */
6893     y = *(double*)&llx;
6894     if (e == 0) {
6895         fp_barrier(x * x + y * y);
6896         *_errno() = ERANGE;
6897     }
6898     return y;
6899 }
6900
6901 /*********************************************************************
6902  *              _ecvt (MSVCRT.@)
6903  */
6904 char * CDECL _ecvt( double number, int ndigits, int *decpt, int *sign )
6905 {
6906     int prec, len;
6907     thread_data_t *data = msvcrt_get_thread_data();
6908     /* FIXME: check better for overflow (native supports over 300 chars) */
6909     ndigits = min( ndigits, 80 - 8); /* 8 : space for sign, dec point, "e",
6910                                       * 4 for exponent and one for
6911                                       * terminating '\0' */
6912     if (!data->efcvt_buffer)
6913         data->efcvt_buffer = malloc( 80 ); /* ought to be enough */
6914
6915     /* handle cases with zero ndigits or less */
6916     prec = ndigits;
6917     if( prec < 1) prec = 2;
6918     len = _snprintf(data->efcvt_buffer, 80, "%.*le", prec - 1, number);
6919
6920     if (data->efcvt_buffer[0] == '-') {
6921         memmove( data->efcvt_buffer, data->efcvt_buffer + 1, len-- );
6922         *sign = 1;
6923     } else *sign = 0;
6924
6925     /* take the decimal "point away */
6926     if( prec != 1)
6927         memmove( data->efcvt_buffer + 1, data->efcvt_buffer + 2, len - 1 );
6928     /* take the exponential "e" out */
6929     data->efcvt_buffer[ prec] = '\0';
6930     /* read the exponent */
6931     sscanf( data->efcvt_buffer + prec + 1, "%d", decpt);
6932     (*decpt)++;
6933     /* adjust for some border cases */
6934     if( data->efcvt_buffer[0] == '0')/* value is zero */
6935         *decpt = 0;
6936     /* handle cases with zero ndigits or less */
6937     if( ndigits < 1){
6938         if( data->efcvt_buffer[ 0] >= '5')
6939             (*decpt)++;
6940         data->efcvt_buffer[ 0] = '\0';
6941     }
6942     TRACE("out=\"%s\"\n",data->efcvt_buffer);
6943     return data->efcvt_buffer;
6944 }
6945
6946 /*********************************************************************
6947  *              _ecvt_s (MSVCRT.@)
6948  */
6949 int CDECL _ecvt_s( char *buffer, size_t length, double number, int ndigits, int *decpt, int *sign )
6950 {
6951     int prec, len;
6952     char *result;
6953
6954     if (!MSVCRT_CHECK_PMT(buffer != NULL)) return EINVAL;
6955     if (!MSVCRT_CHECK_PMT(decpt != NULL)) return EINVAL;
6956     if (!MSVCRT_CHECK_PMT(sign != NULL)) return EINVAL;
6957     if (!MSVCRT_CHECK_PMT_ERR( length > 2, ERANGE )) return ERANGE;
6958     if (!MSVCRT_CHECK_PMT_ERR(ndigits < (int)length - 1, ERANGE )) return ERANGE;
6959
6960     /* handle cases with zero ndigits or less */
6961     prec = ndigits;
6962     if( prec < 1) prec = 2;
6963     result = malloc(prec + 8);
6964
6965     len = _snprintf(result, prec + 8, "%.*le", prec - 1, number);
6966     if (result[0] == '-') {
6967         memmove( result, result + 1, len-- );
6968         *sign = 1;
6969     } else *sign = 0;
6970
6971     /* take the decimal "point away */
6972     if( prec != 1)
6973         memmove( result + 1, result + 2, len - 1 );
6974     /* take the exponential "e" out */
6975     result[ prec] = '\0';
6976     /* read the exponent */
6977     sscanf( result + prec + 1, "%d", decpt);
6978     (*decpt)++;
6979     /* adjust for some border cases */
6980     if( result[0] == '0')/* value is zero */
6981         *decpt = 0;
6982     /* handle cases with zero ndigits or less */
6983     if( ndigits < 1){
6984         if( result[ 0] >= '5')
6985             (*decpt)++;
6986         result[ 0] = '\0';
6987     }
6988     memcpy( buffer, result, max(ndigits + 1, 1) );
6989     free( result );
6990     return 0;
6991 }
6992
6993 /***********************************************************************
6994  *              _fcvt  (MSVCRT.@)
6995  */
6996 char * CDECL _fcvt( double number, int ndigits, int *decpt, int *sign )
6997 {
6998     thread_data_t *data = msvcrt_get_thread_data();
6999     int stop, dec1, dec2;
7000     char *ptr1, *ptr2, *first;
7001     char buf[80]; /* ought to be enough */
7002     char decimal_separator = get_locinfo()->lconv->decimal_point[0];
7003
7004     if (!data->efcvt_buffer)
7005         data->efcvt_buffer = malloc( 80 ); /* ought to be enough */
7006
7007     stop = _snprintf(buf, 80, "%.*f", ndigits < 0 ? 0 : ndigits, number);
7008     ptr1 = buf;
7009     ptr2 = data->efcvt_buffer;
7010     first = NULL;
7011     dec1 = 0;
7012     dec2 = 0;
7013
7014     if (*ptr1 == '-') {
7015         *sign = 1;
7016         ptr1++;
7017     } else *sign = 0;
7018
7019     /* For numbers below the requested resolution, work out where
7020        the decimal point will be rather than finding it in the string */
7021     if (number < 1.0 && number > 0.0) {
7022         dec2 = log10(number + 1e-10);
7023         if (-dec2 <= ndigits) dec2 = 0;
7024     }
7025
7026     /* If requested digits is zero or less, we will need to truncate
7027      * the returned string */
7028     if (ndigits < 1) {
7029         stop += ndigits;
7030     }
7031
7032     while (*ptr1 == '0') ptr1++; /* Skip leading zeroes */
7033     while (*ptr1 != '\0' && *ptr1 != decimal_separator) {
7034         if (!first) first = ptr2;
7035         if ((ptr1 - buf) < stop) {
7036             *ptr2++ = *ptr1++;
7037         } else {
7038             ptr1++;
7039         }
7040         dec1++;
7041     }
7042
7043     if (ndigits > 0) {
7044         ptr1++;
7045         if (!first) {
7046             while (*ptr1 == '0') { /* Process leading zeroes */
7047                 *ptr2++ = *ptr1++;
7048                 dec1--;
7049             }
7050         }
7051         while (*ptr1 != '\0') {
7052             if (!first) first = ptr2;
7053             *ptr2++ = *ptr1++;
7054         }
7055     }
7056
7057     *ptr2 = '\0';
7058
7059     /* We never found a non-zero digit, then our number is either
7060      * smaller than the requested precision, or 0.0 */
7061     if (!first) {
7062         if (number > 0.0) {
7063             first = ptr2;
7064         } else {
7065             first = data->efcvt_buffer;
7066             dec1 = 0;
7067         }
7068     }
7069
7070     *decpt = dec2 ? dec2 : dec1;
7071     return first;
7072 }
7073
7074 /***********************************************************************
7075  *              _fcvt_s  (MSVCRT.@)
7076  */
7077 int CDECL _fcvt_s(char* outbuffer, size_t size, double number, int ndigits, int *decpt, int *sign)
7078 {
7079     int stop, dec1, dec2;
7080     char *ptr1, *ptr2, *first;
7081     char buf[80]; /* ought to be enough */
7082     char decimal_separator = get_locinfo()->lconv->decimal_point[0];
7083
7084     if (!outbuffer || !decpt || !sign || size == 0)
7085     {
7086         *_errno() = EINVAL;
7087         return EINVAL;
7088     }
7089
7090     stop = _snprintf(buf, 80, "%.*f", ndigits < 0 ? 0 : ndigits, number);
7091     ptr1 = buf;
7092     ptr2 = outbuffer;
7093     first = NULL;
7094     dec1 = 0;
7095     dec2 = 0;
7096
7097     if (*ptr1 == '-') {
7098         *sign = 1;
7099         ptr1++;
7100     } else *sign = 0;
7101
7102     /* For numbers below the requested resolution, work out where
7103        the decimal point will be rather than finding it in the string */
7104     if (number < 1.0 && number > 0.0) {
7105         dec2 = log10(number + 1e-10);
7106         if (-dec2 <= ndigits) dec2 = 0;
7107     }
7108
7109     /* If requested digits is zero or less, we will need to truncate
7110      * the returned string */
7111     if (ndigits < 1) {
7112         stop += ndigits;
7113     }
7114
7115     while (*ptr1 == '0') ptr1++; /* Skip leading zeroes */
7116     while (*ptr1 != '\0' && *ptr1 != decimal_separator) {
7117         if (!first) first = ptr2;
7118         if ((ptr1 - buf) < stop) {
7119             if (size > 1) {
7120                 *ptr2++ = *ptr1++;
7121                 size--;
7122             }
7123         } else {
7124             ptr1++;
7125         }
7126         dec1++;
7127     }
7128
7129     if (ndigits > 0) {
7130         ptr1++;
7131         if (!first) {
7132             while (*ptr1 == '0') { /* Process leading zeroes */
7133                 if (number == 0.0 && size > 1) {
7134                     *ptr2++ = '0';
7135                     size--;
7136                 }
7137                 ptr1++;
7138                 dec1--;
7139             }
7140         }
7141         while (*ptr1 != '\0') {
7142             if (!first) first = ptr2;
7143             if (size > 1) {
7144                 *ptr2++ = *ptr1++;
7145                 size--;
7146             }
7147         }
7148     }
7149
7150     *ptr2 = '\0';
7151
7152     /* We never found a non-zero digit, then our number is either
7153      * smaller than the requested precision, or 0.0 */
7154     if (!first && (number <= 0.0))
7155         dec1 = 0;
7156
7157     *decpt = dec2 ? dec2 : dec1;
7158     return 0;
7159 }
7160
7161 /***********************************************************************
7162  *              _gcvt  (MSVCRT.@)
7163  */
7164 char * CDECL _gcvt( double number, int ndigit, char *buff )
7165 {
7166     if(!buff) {
7167         *_errno() = EINVAL;
7168         return NULL;
7169     }
7170
7171     if(ndigit < 0) {
7172         *_errno() = ERANGE;
7173         return NULL;
7174     }
7175
7176     sprintf(buff, "%.*g", ndigit, number);
7177     return buff;
7178 }
7179
7180 /***********************************************************************
7181  *              _gcvt_s  (MSVCRT.@)
7182  */
7183 int CDECL _gcvt_s(char *buff, size_t size, double number, int digits)
7184 {
7185     int len;
7186
7187     if(!buff) {
7188         *_errno() = EINVAL;
7189         return EINVAL;
7190     }
7191
7192     if( digits<0 || digits>=size) {
7193         if(size)
7194             buff[0] = '\0';
7195
7196         *_errno() = ERANGE;
7197         return ERANGE;
7198     }
7199
7200     len = _scprintf("%.*g", digits, number);
7201     if(len > size) {
7202         buff[0] = '\0';
7203         *_errno() = ERANGE;
7204         return ERANGE;
7205     }
7206
7207     sprintf(buff, "%.*g", digits, number);
7208     return 0;
7209 }
7210
7211 #include <stdlib.h> /* div_t, ldiv_t */
7212
7213 /*********************************************************************
7214  *              div (MSVCRT.@)
7215  * VERSION
7216  *      [i386] Windows binary compatible - returns the struct in eax/edx.
7217  */
7218 #ifdef __i386__
7219 unsigned __int64 CDECL div(int num, int denom)
7220 {
7221     union {
7222         div_t div;
7223         unsigned __int64 uint64;
7224     } ret;
7225
7226     ret.div.quot = num / denom;
7227     ret.div.rem = num % denom;
7228     return ret.uint64;
7229 }
7230 #else
7231 /*********************************************************************
7232  *              div (MSVCRT.@)
7233  * VERSION
7234  *      [!i386] Non-x86 can't run win32 apps so we don't need binary compatibility
7235  */
7236 div_t CDECL div(int num, int denom)
7237 {
7238     div_t ret;
7239
7240     ret.quot = num / denom;
7241     ret.rem = num % denom;
7242     return ret;
7243 }
7244 #endif /* ifdef __i386__ */
7245
7246
7247 /*********************************************************************
7248  *              ldiv (MSVCRT.@)
7249  * VERSION
7250  *      [i386] Windows binary compatible - returns the struct in eax/edx.
7251  */
7252 #ifdef __i386__
7253 unsigned __int64 CDECL ldiv(__msvcrt_long num, __msvcrt_long denom)
7254 {
7255     union {
7256         ldiv_t ldiv;
7257         unsigned __int64 uint64;
7258     } ret;
7259
7260     ret.ldiv.quot = num / denom;
7261     ret.ldiv.rem = num % denom;
7262     return ret.uint64;
7263 }
7264 #else
7265 /*********************************************************************
7266  *              ldiv (MSVCRT.@)
7267  * VERSION
7268  *      [!i386] Non-x86 can't run win32 apps so we don't need binary compatibility
7269  */
7270 ldiv_t CDECL ldiv(__msvcrt_long num, __msvcrt_long denom)
7271 {
7272     ldiv_t ret;
7273
7274     ret.quot = num / denom;
7275     ret.rem = num % denom;
7276     return ret;
7277 }
7278 #endif /* ifdef __i386__ */
7279
7280 #if _MSVCR_VER>=100
7281 /*********************************************************************
7282  *              lldiv (MSVCR100.@)
7283  */
7284 lldiv_t CDECL lldiv(__int64 num, __int64 denom)
7285 {
7286   lldiv_t ret;
7287
7288   ret.quot = num / denom;
7289   ret.rem = num % denom;
7290
7291   return ret;
7292 }
7293 #endif
7294
7295 #ifdef __i386__
7296
7297 /*********************************************************************
7298  *              _adjust_fdiv (MSVCRT.@)
7299  * Used by the MSVC compiler to work around the Pentium FDIV bug.
7300  */
7301 int MSVCRT__adjust_fdiv = 0;
7302
7303 /***********************************************************************
7304  *              _adj_fdiv_m16i (MSVCRT.@)
7305  *
7306  * NOTE
7307  *    I _think_ this function is intended to work around the Pentium
7308  *    fdiv bug.
7309  */
7310 void __stdcall _adj_fdiv_m16i( short arg )
7311 {
7312   TRACE("(): stub\n");
7313 }
7314
7315 /***********************************************************************
7316  *              _adj_fdiv_m32 (MSVCRT.@)
7317  *
7318  * NOTE
7319  *    I _think_ this function is intended to work around the Pentium
7320  *    fdiv bug.
7321  */
7322 void __stdcall _adj_fdiv_m32( unsigned int arg )
7323 {
7324   TRACE("(): stub\n");
7325 }
7326
7327 /***********************************************************************
7328  *              _adj_fdiv_m32i (MSVCRT.@)
7329  *
7330  * NOTE
7331  *    I _think_ this function is intended to work around the Pentium
7332  *    fdiv bug.
7333  */
7334 void __stdcall _adj_fdiv_m32i( int arg )
7335 {
7336   TRACE("(): stub\n");
7337 }
7338
7339 /***********************************************************************
7340  *              _adj_fdiv_m64 (MSVCRT.@)
7341  *
7342  * NOTE
7343  *    I _think_ this function is intended to work around the Pentium
7344  *    fdiv bug.
7345  */
7346 void __stdcall _adj_fdiv_m64( unsigned __int64 arg )
7347 {
7348   TRACE("(): stub\n");
7349 }
7350
7351 /***********************************************************************
7352  *              _adj_fdiv_r (MSVCRT.@)
7353  * FIXME
7354  *    This function is likely to have the wrong number of arguments.
7355  *
7356  * NOTE
7357  *    I _think_ this function is intended to work around the Pentium
7358  *    fdiv bug.
7359  */
7360 void _adj_fdiv_r(void)
7361 {
7362   TRACE("(): stub\n");
7363 }
7364
7365 /***********************************************************************
7366  *              _adj_fdivr_m16i (MSVCRT.@)
7367  *
7368  * NOTE
7369  *    I _think_ this function is intended to work around the Pentium
7370  *    fdiv bug.
7371  */
7372 void __stdcall _adj_fdivr_m16i( short arg )
7373 {
7374   TRACE("(): stub\n");
7375 }
7376
7377 /***********************************************************************
7378  *              _adj_fdivr_m32 (MSVCRT.@)
7379  *
7380  * NOTE
7381  *    I _think_ this function is intended to work around the Pentium
7382  *    fdiv bug.
7383  */
7384 void __stdcall _adj_fdivr_m32( unsigned int arg )
7385 {
7386   TRACE("(): stub\n");
7387 }
7388
7389 /***********************************************************************
7390  *              _adj_fdivr_m32i (MSVCRT.@)
7391  *
7392  * NOTE
7393  *    I _think_ this function is intended to work around the Pentium
7394  *    fdiv bug.
7395  */
7396 void __stdcall _adj_fdivr_m32i( int arg )
7397 {
7398   TRACE("(): stub\n");
7399 }
7400
7401 /***********************************************************************
7402  *              _adj_fdivr_m64 (MSVCRT.@)
7403  *
7404  * NOTE
7405  *    I _think_ this function is intended to work around the Pentium
7406  *    fdiv bug.
7407  */
7408 void __stdcall _adj_fdivr_m64( unsigned __int64 arg )
7409 {
7410   TRACE("(): stub\n");
7411 }
7412
7413 /***********************************************************************
7414  *              _adj_fpatan (MSVCRT.@)
7415  * FIXME
7416  *    This function is likely to have the wrong number of arguments.
7417  *
7418  * NOTE
7419  *    I _think_ this function is intended to work around the Pentium
7420  *    fdiv bug.
7421  */
7422 void _adj_fpatan(void)
7423 {
7424   TRACE("(): stub\n");
7425 }
7426
7427 /***********************************************************************
7428  *              _adj_fprem (MSVCRT.@)
7429  * FIXME
7430  *    This function is likely to have the wrong number of arguments.
7431  *
7432  * NOTE
7433  *    I _think_ this function is intended to work around the Pentium
7434  *    fdiv bug.
7435  */
7436 void _adj_fprem(void)
7437 {
7438   TRACE("(): stub\n");
7439 }
7440
7441 /***********************************************************************
7442  *              _adj_fprem1 (MSVCRT.@)
7443  * FIXME
7444  *    This function is likely to have the wrong number of arguments.
7445  *
7446  * NOTE
7447  *    I _think_ this function is intended to work around the Pentium
7448  *    fdiv bug.
7449  */
7450 void _adj_fprem1(void)
7451 {
7452   TRACE("(): stub\n");
7453 }
7454
7455 /***********************************************************************
7456  *              _adj_fptan (MSVCRT.@)
7457  * FIXME
7458  *    This function is likely to have the wrong number of arguments.
7459  *
7460  * NOTE
7461  *    I _think_ this function is intended to work around the Pentium
7462  *    fdiv bug.
7463  */
7464 void _adj_fptan(void)
7465 {
7466   TRACE("(): stub\n");
7467 }
7468
7469 /***********************************************************************
7470  *              _safe_fdiv (MSVCRT.@)
7471  * FIXME
7472  *    This function is likely to have the wrong number of arguments.
7473  *
7474  * NOTE
7475  *    I _think_ this function is intended to work around the Pentium
7476  *    fdiv bug.
7477  */
7478 void _safe_fdiv(void)
7479 {
7480   TRACE("(): stub\n");
7481 }
7482
7483 /***********************************************************************
7484  *              _safe_fdivr (MSVCRT.@)
7485  * FIXME
7486  *    This function is likely to have the wrong number of arguments.
7487  *
7488  * NOTE
7489  *    I _think_ this function is intended to work around the Pentium
7490  *    fdiv bug.
7491  */
7492 void _safe_fdivr(void)
7493 {
7494   TRACE("(): stub\n");
7495 }
7496
7497 /***********************************************************************
7498  *              _safe_fprem (MSVCRT.@)
7499  * FIXME
7500  *    This function is likely to have the wrong number of arguments.
7501  *
7502  * NOTE
7503  *    I _think_ this function is intended to work around the Pentium
7504  *    fdiv bug.
7505  */
7506 void _safe_fprem(void)
7507 {
7508   TRACE("(): stub\n");
7509 }
7510
7511 /***********************************************************************
7512  *              _safe_fprem1 (MSVCRT.@)
7513  *
7514  * FIXME
7515  *    This function is likely to have the wrong number of arguments.
7516  *
7517  * NOTE
7518  *    I _think_ this function is intended to work around the Pentium
7519  *    fdiv bug.
7520  */
7521 void _safe_fprem1(void)
7522 {
7523   TRACE("(): stub\n");
7524 }
7525
7526 /***********************************************************************
7527  *              __libm_sse2_acos   (MSVCRT.@)
7528  */
7529 void __cdecl __libm_sse2_acos(void)
7530 {
7531     double d;
7532     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7533     d = acos( d );
7534     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7535 }
7536
7537 /***********************************************************************
7538  *              __libm_sse2_acosf   (MSVCRT.@)
7539  */
7540 void __cdecl __libm_sse2_acosf(void)
7541 {
7542     float f;
7543     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7544     f = acosf( f );
7545     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7546 }
7547
7548 /***********************************************************************
7549  *              __libm_sse2_asin   (MSVCRT.@)
7550  */
7551 void __cdecl __libm_sse2_asin(void)
7552 {
7553     double d;
7554     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7555     d = asin( d );
7556     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7557 }
7558
7559 /***********************************************************************
7560  *              __libm_sse2_asinf   (MSVCRT.@)
7561  */
7562 void __cdecl __libm_sse2_asinf(void)
7563 {
7564     float f;
7565     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7566     f = asinf( f );
7567     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7568 }
7569
7570 /***********************************************************************
7571  *              __libm_sse2_atan   (MSVCRT.@)
7572  */
7573 void __cdecl __libm_sse2_atan(void)
7574 {
7575     double d;
7576     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7577     d = atan( d );
7578     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7579 }
7580
7581 /***********************************************************************
7582  *              __libm_sse2_atan2   (MSVCRT.@)
7583  */
7584 void __cdecl __libm_sse2_atan2(void)
7585 {
7586     double d1, d2;
7587     __asm__ __volatile__( "movq %%xmm0,%0; movq %%xmm1,%1 " : "=m" (d1), "=m" (d2) );
7588     d1 = atan2( d1, d2 );
7589     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d1) );
7590 }
7591
7592 /***********************************************************************
7593  *              __libm_sse2_atanf   (MSVCRT.@)
7594  */
7595 void __cdecl __libm_sse2_atanf(void)
7596 {
7597     float f;
7598     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7599     f = atanf( f );
7600     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7601 }
7602
7603 /***********************************************************************
7604  *              __libm_sse2_cos   (MSVCRT.@)
7605  */
7606 void __cdecl __libm_sse2_cos(void)
7607 {
7608     double d;
7609     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7610     d = cos( d );
7611     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7612 }
7613
7614 /***********************************************************************
7615  *              __libm_sse2_cosf   (MSVCRT.@)
7616  */
7617 void __cdecl __libm_sse2_cosf(void)
7618 {
7619     float f;
7620     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7621     f = cosf( f );
7622     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7623 }
7624
7625 /***********************************************************************
7626  *              __libm_sse2_exp   (MSVCRT.@)
7627  */
7628 void __cdecl __libm_sse2_exp(void)
7629 {
7630     double d;
7631     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7632     d = exp( d );
7633     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7634 }
7635
7636 /***********************************************************************
7637  *              __libm_sse2_expf   (MSVCRT.@)
7638  */
7639 void __cdecl __libm_sse2_expf(void)
7640 {
7641     float f;
7642     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7643     f = expf( f );
7644     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7645 }
7646
7647 /***********************************************************************
7648  *              __libm_sse2_log   (MSVCRT.@)
7649  */
7650 void __cdecl __libm_sse2_log(void)
7651 {
7652     double d;
7653     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7654     d = log( d );
7655     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7656 }
7657
7658 /***********************************************************************
7659  *              __libm_sse2_log10   (MSVCRT.@)
7660  */
7661 void __cdecl __libm_sse2_log10(void)
7662 {
7663     double d;
7664     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7665     d = log10( d );
7666     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7667 }
7668
7669 /***********************************************************************
7670  *              __libm_sse2_log10f   (MSVCRT.@)
7671  */
7672 void __cdecl __libm_sse2_log10f(void)
7673 {
7674     float f;
7675     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7676     f = log10f( f );
7677     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7678 }
7679
7680 /***********************************************************************
7681  *              __libm_sse2_logf   (MSVCRT.@)
7682  */
7683 void __cdecl __libm_sse2_logf(void)
7684 {
7685     float f;
7686     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7687     f = logf( f );
7688     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7689 }
7690
7691 /***********************************************************************
7692  *              __libm_sse2_pow   (MSVCRT.@)
7693  */
7694 void __cdecl __libm_sse2_pow(void)
7695 {
7696     double d1, d2;
7697     __asm__ __volatile__( "movq %%xmm0,%0; movq %%xmm1,%1 " : "=m" (d1), "=m" (d2) );
7698     d1 = pow( d1, d2 );
7699     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d1) );
7700 }
7701
7702 /***********************************************************************
7703  *              __libm_sse2_powf   (MSVCRT.@)
7704  */
7705 void __cdecl __libm_sse2_powf(void)
7706 {
7707     float f1, f2;
7708     __asm__ __volatile__( "movd %%xmm0,%0; movd %%xmm1,%1" : "=g" (f1), "=g" (f2) );
7709     f1 = powf( f1, f2 );
7710     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f1) );
7711 }
7712
7713 /***********************************************************************
7714  *              __libm_sse2_sin   (MSVCRT.@)
7715  */
7716 void __cdecl __libm_sse2_sin(void)
7717 {
7718     double d;
7719     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7720     d = sin( d );
7721     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7722 }
7723
7724 /***********************************************************************
7725  *              __libm_sse2_sinf   (MSVCRT.@)
7726  */
7727 void __cdecl __libm_sse2_sinf(void)
7728 {
7729     float f;
7730     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7731     f = sinf( f );
7732     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7733 }
7734
7735 /***********************************************************************
7736  *              __libm_sse2_tan   (MSVCRT.@)
7737  */
7738 void __cdecl __libm_sse2_tan(void)
7739 {
7740     double d;
7741     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7742     d = tan( d );
7743     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7744 }
7745
7746 /***********************************************************************
7747  *              __libm_sse2_tanf   (MSVCRT.@)
7748  */
7749 void __cdecl __libm_sse2_tanf(void)
7750 {
7751     float f;
7752     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7753     f = tanf( f );
7754     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7755 }
7756
7757 /***********************************************************************
7758  *              __libm_sse2_sqrt_precise   (MSVCR110.@)
7759  */
7760 void __cdecl __libm_sse2_sqrt_precise(void)
7761 {
7762     unsigned int cw;
7763     double d;
7764
7765     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7766     __control87_2(0, 0, NULL, &cw);
7767     if (cw & _MCW_RC)
7768     {
7769         d = sqrt(d);
7770         __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7771         return;
7772     }
7773
7774     if (!sqrt_validate(&d, FALSE))
7775     {
7776         __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7777         return;
7778     }
7779     __asm__ __volatile__( "call " __ASM_NAME( "sse2_sqrt" ) );
7780 }
7781 #endif  /* __i386__ */
7782
7783 /*********************************************************************
7784  *      _fdclass (MSVCR120.@)
7785  *
7786  * Copied from musl: src/math/__fpclassifyf.c
7787  */
7788 short CDECL _fdclass(float x)
7789 {
7790     union { float f; UINT32 i; } u = { x };
7791     int e = u.i >> 23 & 0xff;
7792
7793     if (!e) return u.i << 1 ? FP_SUBNORMAL : FP_ZERO;
7794     if (e == 0xff) return u.i << 9 ? FP_NAN : FP_INFINITE;
7795     return FP_NORMAL;
7796 }
7797
7798 /*********************************************************************
7799  *      _dclass (MSVCR120.@)
7800  *
7801  * Copied from musl: src/math/__fpclassify.c
7802  */
7803 short CDECL _dclass(double x)
7804 {
7805     union { double f; UINT64 i; } u = { x };
7806     int e = u.i >> 52 & 0x7ff;
7807
7808     if (!e) return u.i << 1 ? FP_SUBNORMAL : FP_ZERO;
7809     if (e == 0x7ff) return (u.i << 12) ? FP_NAN : FP_INFINITE;
7810     return FP_NORMAL;
7811 }
7812
7813 #if _MSVCR_VER>=120
7814
7815 /*********************************************************************
7816  *      cbrt (MSVCR120.@)
7817  *
7818  * Copied from musl: src/math/cbrt.c
7819  */
7820 double CDECL cbrt(double x)
7821 {
7822     static const UINT32 B1 = 715094163, B2 = 696219795;
7823     static const double P0 =  1.87595182427177009643,
7824                  P1 = -1.88497979543377169875,
7825                  P2 =  1.621429720105354466140,
7826                  P3 = -0.758397934778766047437,
7827                  P4 =  0.145996192886612446982;
7828
7829     union {double f; UINT64 i;} u = {x};
7830     double r,s,t,w;
7831     UINT32 hx = u.i >> 32 & 0x7fffffff;
7832
7833     if (hx >= 0x7ff00000)  /* cbrt(NaN,INF) is itself */
7834         return x + x;
7835
7836     if (hx < 0x00100000) { /* zero or subnormal? */
7837         u.f = x * 0x1p54;
7838         hx = u.i>>32 & 0x7fffffff;
7839         if (hx == 0)
7840             return x;
7841         hx = hx / 3 + B2;
7842     } else
7843         hx = hx / 3 + B1;
7844     u.i &= 1ULL << 63;
7845     u.i |= (UINT64)hx << 32;
7846     t = u.f;
7847
7848     r = (t * t) * (t / x);
7849     t = t * ((P0 + r * (P1 + r * P2)) + ((r * r) * r) * (P3 + r * P4));
7850
7851     u.f = t;
7852     u.i = (u.i + 0x80000000) & 0xffffffffc0000000ULL;
7853     t = u.f;
7854
7855     s = t * t;
7856     r = x / s;
7857     w = t + t;
7858     r = (r - t) / (w + r);
7859     t = t + t * r;
7860     return t;
7861 }
7862
7863 /*********************************************************************
7864  *      cbrtf (MSVCR120.@)
7865  *
7866  * Copied from musl: src/math/cbrtf.c
7867  */
7868 float CDECL cbrtf(float x)
7869 {
7870     static const unsigned B1 = 709958130, B2 = 642849266;
7871
7872     double r,T;
7873     union {float f; UINT32 i;} u = {x};
7874     UINT32 hx = u.i & 0x7fffffff;
7875
7876     if (hx >= 0x7f800000)
7877         return x + x;
7878
7879     if (hx < 0x00800000) {  /* zero or subnormal? */
7880         if (hx == 0)
7881             return x;
7882         u.f = x * 0x1p24f;
7883         hx = u.i & 0x7fffffff;
7884         hx = hx / 3 + B2;
7885     } else
7886         hx = hx / 3 + B1;
7887     u.i &= 0x80000000;
7888     u.i |= hx;
7889
7890     T = u.f;
7891     r = T * T * T;
7892     T = T * (x + x + r) / (x + r + r);
7893
7894     r = T * T * T;
7895     T = T * (x + x + r) / (x + r + r);
7896     return T;
7897 }
7898
7899 /*********************************************************************
7900  *      exp2 (MSVCR120.@)
7901  *
7902  * Copied from musl: src/math/exp2.c
7903  */
7904 double CDECL exp2(double x)
7905 {
7906     static const double C[] = {
7907         0x1.62e42fefa39efp-1,
7908         0x1.ebfbdff82c424p-3,
7909         0x1.c6b08d70cf4b5p-5,
7910         0x1.3b2abd24650ccp-7,
7911         0x1.5d7e09b4e3a84p-10
7912     };
7913
7914     UINT32 abstop;
7915     UINT64 ki, idx, top, sbits;
7916     double kd, r, r2, scale, tail, tmp;
7917
7918     abstop = (*(UINT64*)&x >> 52) & 0x7ff;
7919     if (abstop - 0x3c9 >= 0x408 - 0x3c9) {
7920         if (abstop - 0x3c9 >= 0x80000000) {
7921             /* Avoid spurious underflow for tiny x. */
7922             /* Note: 0 is common input. */
7923             return 1.0 + x;
7924         }
7925         if (abstop >= 409) {
7926             if (*(UINT64*)&x == 0xfff0000000000000ull)
7927                 return 0.0;
7928             if (abstop >= 0x7ff)
7929                 return 1.0 + x;
7930             if (!(*(UINT64*)&x >> 63)) {
7931                 *_errno() = ERANGE;
7932                 return fp_barrier(DBL_MAX) * DBL_MAX;
7933             }
7934             else if (x <= -2147483648.0) {
7935                 fp_barrier(x + 0x1p120f);
7936                 return 0;
7937             }
7938             else if (*(UINT64*)&x >= 0xc090cc0000000000ull) {
7939                 *_errno() = ERANGE;
7940                 fp_barrier(x + 0x1p120f);
7941                 return 0;
7942             }
7943         }
7944         if (2 * *(UINT64*)&x > 2 * 0x408d000000000000ull)
7945             /* Large x is special cased below. */
7946             abstop = 0;
7947     }
7948
7949     /* exp2(x) = 2^(k/N) * 2^r, with 2^r in [2^(-1/2N),2^(1/2N)]. */
7950     /* x = k/N + r, with int k and r in [-1/2N, 1/2N]. */
7951     kd = fp_barrier(x + 0x1.8p52 / (1 << 7));
7952     ki = *(UINT64*)&kd; /* k. */
7953     kd -= 0x1.8p52 / (1 << 7); /* k/N for int k. */
7954     r = x - kd;
7955     /* 2^(k/N) ~= scale * (1 + tail). */
7956     idx = 2 * (ki % (1 << 7));
7957     top = ki << (52 - 7);
7958     tail = *(double*)&exp_T[idx];
7959     /* This is only a valid scale when -1023*N < k < 1024*N. */
7960     sbits = exp_T[idx + 1] + top;
7961     /* exp2(x) = 2^(k/N) * 2^r ~= scale + scale * (tail + 2^r - 1). */
7962     /* Evaluation is optimized assuming superscalar pipelined execution. */
7963     r2 = r * r;
7964     /* Without fma the worst case error is 0.5/N ulp larger. */
7965     /* Worst case error is less than 0.5+0.86/N+(abs poly error * 2^53) ulp. */
7966     tmp = tail + r * C[0] + r2 * (C[1] + r * C[2]) + r2 * r2 * (C[3] + r * C[4]);
7967     if (abstop == 0)
7968     {
7969         /* Handle cases that may overflow or underflow when computing the result that
7970            is scale*(1+TMP) without intermediate rounding. The bit representation of
7971            scale is in SBITS, however it has a computed exponent that may have
7972            overflown into the sign bit so that needs to be adjusted before using it as
7973            a double. (int32_t)KI is the k used in the argument reduction and exponent
7974            adjustment of scale, positive k here means the result may overflow and
7975            negative k means the result may underflow. */
7976         double scale, y;
7977
7978         if ((ki & 0x80000000) == 0) {
7979             /* k > 0, the exponent of scale might have overflowed by 1. */
7980             sbits -= 1ull << 52;
7981             scale = *(double*)&sbits;
7982             y = 2 * (scale + scale * tmp);
7983             return y;
7984         }
7985         /* k < 0, need special care in the subnormal range. */
7986         sbits += 1022ull << 52;
7987         scale = *(double*)&sbits;
7988         y = scale + scale * tmp;
7989         if (y < 1.0) {
7990             /* Round y to the right precision before scaling it into the subnormal
7991                range to avoid double rounding that can cause 0.5+E/2 ulp error where
7992                E is the worst-case ulp error outside the subnormal range. So this
7993                is only useful if the goal is better than 1 ulp worst-case error. */
7994             double hi, lo;
7995             lo = scale - y + scale * tmp;
7996             hi = 1.0 + y;
7997             lo = 1.0 - hi + y + lo;
7998             y = hi + lo - 1.0;
7999             /* Avoid -0.0 with downward rounding. */
8000             if (y == 0.0)
8001                 y = 0.0;
8002             /* The underflow exception needs to be signaled explicitly. */
8003             fp_barrier(fp_barrier(0x1p-1022) * 0x1p-1022);
8004         }
8005         y = 0x1p-1022 * y;
8006         return y;
8007     }
8008     scale = *(double*)&sbits;
8009     /* Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-928, so there
8010        is no spurious underflow here even without fma. */
8011     return scale + scale * tmp;
8012 }
8013
8014 /*********************************************************************
8015  *      exp2f (MSVCR120.@)
8016  *
8017  * Copied from musl: src/math/exp2f.c
8018  */
8019 float CDECL exp2f(float x)
8020 {
8021     static const double C[] = {
8022         0x1.c6af84b912394p-5, 0x1.ebfce50fac4f3p-3, 0x1.62e42ff0c52d6p-1
8023     };
8024     static const double shift = 0x1.8p+52 / (1 << 5);
8025
8026     double kd, xd, z, r, r2, y, s;
8027     UINT32 abstop;
8028     UINT64 ki, t;
8029
8030     xd = x;
8031     abstop = (*(UINT32*)&x >> 20) & 0x7ff;
8032     if (abstop >= 0x430) {
8033         /* |x| >= 128 or x is nan.  */
8034         if (*(UINT32*)&x == 0xff800000)
8035             return 0.0f;
8036         if (abstop >= 0x7f8)
8037             return x + x;
8038         if (x > 0.0f) {
8039             *_errno() = ERANGE;
8040             return fp_barrierf(x * FLT_MAX);
8041         }
8042         if (x <= -150.0f) {
8043             fp_barrierf(x - 0x1p120);
8044             return 0;
8045         }
8046     }
8047
8048     /* x = k/N + r with r in [-1/(2N), 1/(2N)] and int k, N = 1 << 5. */
8049     kd = xd + shift;
8050     ki = *(UINT64*)&kd;
8051     kd -= shift; /* k/(1<<5) for int k.  */
8052     r = xd - kd;
8053
8054     /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
8055     t = exp2f_T[ki % (1 << 5)];
8056     t += ki << (52 - 5);
8057     s = *(double*)&t;
8058     z = C[0] * r + C[1];
8059     r2 = r * r;
8060     y = C[2] * r + 1;
8061     y = z * r2 + y;
8062     y = y * s;
8063     return y;
8064 }
8065
8066 /*********************************************************************
8067  *      expm1 (MSVCR120.@)
8068  */
8069 double CDECL expm1(double x)
8070 {
8071     return __expm1(x);
8072 }
8073
8074 /*********************************************************************
8075  *      expm1f (MSVCR120.@)
8076  */
8077 float CDECL expm1f(float x)
8078 {
8079     return __expm1f(x);
8080 }
8081
8082 /*********************************************************************
8083  *      log1p (MSVCR120.@)
8084  *
8085  * Copied from musl: src/math/log1p.c
8086  */
8087 double CDECL log1p(double x)
8088 {
8089     static const double ln2_hi = 6.93147180369123816490e-01,
8090         ln2_lo = 1.90821492927058770002e-10,
8091         Lg1 = 6.666666666666735130e-01,
8092         Lg2 = 3.999999999940941908e-01,
8093         Lg3 = 2.857142874366239149e-01,
8094         Lg4 = 2.222219843214978396e-01,
8095         Lg5 = 1.818357216161805012e-01,
8096         Lg6 = 1.531383769920937332e-01,
8097         Lg7 = 1.479819860511658591e-01;
8098
8099     union {double f; UINT64 i;} u = {x};
8100     double hfsq, f, c, s, z, R, w, t1, t2, dk;
8101     UINT32 hx, hu;
8102     int k;
8103
8104     hx = u.i >> 32;
8105     k = 1;
8106     if (hx < 0x3fda827a || hx >> 31) { /* 1+x < sqrt(2)+ */
8107         if (hx >= 0xbff00000) { /* x <= -1.0 */
8108             if (x == -1) {
8109                 *_errno() = ERANGE;
8110                 return x / 0.0; /* og1p(-1) = -inf */
8111             }
8112             *_errno() = EDOM;
8113             return (x-x) / 0.0; /* log1p(x<-1) = NaN */
8114         }
8115         if (hx << 1 < 0x3ca00000 << 1) { /* |x| < 2**-53 */
8116             fp_barrier(x + 0x1p120f);
8117             /* underflow if subnormal */
8118             if ((hx & 0x7ff00000) == 0)
8119                 fp_barrierf(x);
8120             return x;
8121         }
8122         if (hx <= 0xbfd2bec4) { /* sqrt(2)/2- <= 1+x < sqrt(2)+ */
8123             k = 0;
8124             c = 0;
8125             f = x;
8126         }
8127     } else if (hx >= 0x7ff00000)
8128         return x;
8129     if (k) {
8130         u.f = 1 + x;
8131         hu = u.i >> 32;
8132         hu += 0x3ff00000 - 0x3fe6a09e;
8133         k = (int)(hu >> 20) - 0x3ff;
8134         /* correction term ~ log(1+x)-log(u), avoid underflow in c/u */
8135         if (k < 54) {
8136             c = k >= 2 ? 1 - (u.f - x) : x - (u.f - 1);
8137             c /= u.f;
8138         } else
8139             c = 0;
8140         /* reduce u into [sqrt(2)/2, sqrt(2)] */
8141         hu = (hu & 0x000fffff) + 0x3fe6a09e;
8142         u.i = (UINT64)hu << 32 | (u.i & 0xffffffff);
8143         f = u.f - 1;
8144     }
8145     hfsq = 0.5 * f * f;
8146     s = f / (2.0 + f);
8147     z = s * s;
8148     w = z * z;
8149     t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
8150     t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
8151     R = t2 + t1;
8152     dk = k;
8153     return s * (hfsq + R) + (dk * ln2_lo + c) - hfsq + f + dk * ln2_hi;
8154 }
8155
8156 /*********************************************************************
8157  *      log1pf (MSVCR120.@)
8158  *
8159  * Copied from musl: src/math/log1pf.c
8160  */
8161 float CDECL log1pf(float x)
8162 {
8163     static const float ln2_hi = 6.9313812256e-01,
8164         ln2_lo = 9.0580006145e-06,
8165         Lg1 = 0xaaaaaa.0p-24,
8166         Lg2 = 0xccce13.0p-25,
8167         Lg3 = 0x91e9ee.0p-25,
8168         Lg4 = 0xf89e26.0p-26;
8169
8170     union {float f; UINT32 i;} u = {x};
8171     float hfsq, f, c, s, z, R, w, t1, t2, dk;
8172     UINT32 ix, iu;
8173     int k;
8174
8175     ix = u.i;
8176     k = 1;
8177     if (ix < 0x3ed413d0 || ix >> 31) { /* 1+x < sqrt(2)+ */
8178         if (ix >= 0xbf800000) { /* x <= -1.0 */
8179             if (x == -1) {
8180                 *_errno() = ERANGE;
8181                 return x / 0.0f; /* log1p(-1)=+inf */
8182             }
8183             *_errno() = EDOM;
8184             return (x - x) / 0.0f; /* log1p(x<-1)=NaN */
8185         }
8186         if (ix<<1 < 0x33800000<<1) { /* |x| < 2**-24 */
8187             /* underflow if subnormal */
8188             if ((ix & 0x7f800000) == 0)
8189                 fp_barrierf(x * x);
8190             return x;
8191         }
8192         if (ix <= 0xbe95f619) { /* sqrt(2)/2- <= 1+x < sqrt(2)+ */
8193             k = 0;
8194             c = 0;
8195             f = x;
8196         }
8197     } else if (ix >= 0x7f800000)
8198         return x;
8199     if (k) {
8200         u.f = 1 + x;
8201         iu = u.i;
8202         iu += 0x3f800000 - 0x3f3504f3;
8203         k = (int)(iu >> 23) - 0x7f;
8204         /* correction term ~ log(1+x)-log(u), avoid underflow in c/u */
8205         if (k < 25) {
8206             c = k >= 2 ? 1 - (u.f - x) : x - (u.f - 1);
8207             c /= u.f;
8208         } else
8209             c = 0;
8210         /* reduce u into [sqrt(2)/2, sqrt(2)] */
8211         iu = (iu & 0x007fffff) + 0x3f3504f3;
8212         u.i = iu;
8213         f = u.f - 1;
8214     }
8215     s = f / (2.0f + f);
8216     z = s * s;
8217     w = z * z;
8218     t1= w * (Lg2 + w * Lg4);
8219     t2= z * (Lg1 + w * Lg3);
8220     R = t2 + t1;
8221     hfsq = 0.5f * f * f;
8222     dk = k;
8223     return s * (hfsq + R) + (dk * ln2_lo + c) - hfsq + f + dk * ln2_hi;
8224 }
8225
8226 /*********************************************************************
8227  *      log2 (MSVCR120.@)
8228  *
8229  * Copied from musl: src/math/log2.c
8230  */
8231 double CDECL log2(double x)
8232 {
8233     static const double invln2hi = 0x1.7154765200000p+0,
8234         invln2lo = 0x1.705fc2eefa200p-33;
8235     static const double A[] = {
8236         -0x1.71547652b8339p-1,
8237         0x1.ec709dc3a04bep-2,
8238         -0x1.7154764702ffbp-2,
8239         0x1.2776c50034c48p-2,
8240         -0x1.ec7b328ea92bcp-3,
8241         0x1.a6225e117f92ep-3
8242     };
8243     static const double B[] = {
8244         -0x1.71547652b82fep-1,
8245         0x1.ec709dc3a03f7p-2,
8246         -0x1.71547652b7c3fp-2,
8247         0x1.2776c50f05be4p-2,
8248         -0x1.ec709dd768fe5p-3,
8249         0x1.a61761ec4e736p-3,
8250         -0x1.7153fbc64a79bp-3,
8251         0x1.484d154f01b4ap-3,
8252         -0x1.289e4a72c383cp-3,
8253         0x1.0b32f285aee66p-3
8254     };
8255     static const struct {
8256         double invc, logc;
8257     } T[] = {
8258         {0x1.724286bb1acf8p+0, -0x1.1095feecdb000p-1},
8259         {0x1.6e1f766d2cca1p+0, -0x1.08494bd76d000p-1},
8260         {0x1.6a13d0e30d48ap+0, -0x1.00143aee8f800p-1},
8261         {0x1.661ec32d06c85p+0, -0x1.efec5360b4000p-2},
8262         {0x1.623fa951198f8p+0, -0x1.dfdd91ab7e000p-2},
8263         {0x1.5e75ba4cf026cp+0, -0x1.cffae0cc79000p-2},
8264         {0x1.5ac055a214fb8p+0, -0x1.c043811fda000p-2},
8265         {0x1.571ed0f166e1ep+0, -0x1.b0b67323ae000p-2},
8266         {0x1.53909590bf835p+0, -0x1.a152f5a2db000p-2},
8267         {0x1.5014fed61adddp+0, -0x1.9217f5af86000p-2},
8268         {0x1.4cab88e487bd0p+0, -0x1.8304db0719000p-2},
8269         {0x1.49539b4334feep+0, -0x1.74189f9a9e000p-2},
8270         {0x1.460cbdfafd569p+0, -0x1.6552bb5199000p-2},
8271         {0x1.42d664ee4b953p+0, -0x1.56b23a29b1000p-2},
8272         {0x1.3fb01111dd8a6p+0, -0x1.483650f5fa000p-2},
8273         {0x1.3c995b70c5836p+0, -0x1.39de937f6a000p-2},
8274         {0x1.3991c4ab6fd4ap+0, -0x1.2baa1538d6000p-2},
8275         {0x1.3698e0ce099b5p+0, -0x1.1d98340ca4000p-2},
8276         {0x1.33ae48213e7b2p+0, -0x1.0fa853a40e000p-2},
8277         {0x1.30d191985bdb1p+0, -0x1.01d9c32e73000p-2},
8278         {0x1.2e025cab271d7p+0, -0x1.e857da2fa6000p-3},
8279         {0x1.2b404cf13cd82p+0, -0x1.cd3c8633d8000p-3},
8280         {0x1.288b02c7ccb50p+0, -0x1.b26034c14a000p-3},
8281         {0x1.25e2263944de5p+0, -0x1.97c1c2f4fe000p-3},
8282         {0x1.234563d8615b1p+0, -0x1.7d6023f800000p-3},
8283         {0x1.20b46e33eaf38p+0, -0x1.633a71a05e000p-3},
8284         {0x1.1e2eefdcda3ddp+0, -0x1.494f5e9570000p-3},
8285         {0x1.1bb4a580b3930p+0, -0x1.2f9e424e0a000p-3},
8286         {0x1.19453847f2200p+0, -0x1.162595afdc000p-3},
8287         {0x1.16e06c0d5d73cp+0, -0x1.f9c9a75bd8000p-4},
8288         {0x1.1485f47b7e4c2p+0, -0x1.c7b575bf9c000p-4},
8289         {0x1.12358ad0085d1p+0, -0x1.960c60ff48000p-4},
8290         {0x1.0fef00f532227p+0, -0x1.64ce247b60000p-4},
8291         {0x1.0db2077d03a8fp+0, -0x1.33f78b2014000p-4},
8292         {0x1.0b7e6d65980d9p+0, -0x1.0387d1a42c000p-4},
8293         {0x1.0953efe7b408dp+0, -0x1.a6f9208b50000p-5},
8294         {0x1.07325cac53b83p+0, -0x1.47a954f770000p-5},
8295         {0x1.05197e40d1b5cp+0, -0x1.d23a8c50c0000p-6},
8296         {0x1.03091c1208ea2p+0, -0x1.16a2629780000p-6},
8297         {0x1.0101025b37e21p+0, -0x1.720f8d8e80000p-8},
8298         {0x1.fc07ef9caa76bp-1, 0x1.6fe53b1500000p-7},
8299         {0x1.f4465d3f6f184p-1, 0x1.11ccce10f8000p-5},
8300         {0x1.ecc079f84107fp-1, 0x1.c4dfc8c8b8000p-5},
8301         {0x1.e573a99975ae8p-1, 0x1.3aa321e574000p-4},
8302         {0x1.de5d6f0bd3de6p-1, 0x1.918a0d08b8000p-4},
8303         {0x1.d77b681ff38b3p-1, 0x1.e72e9da044000p-4},
8304         {0x1.d0cb5724de943p-1, 0x1.1dcd2507f6000p-3},
8305         {0x1.ca4b2dc0e7563p-1, 0x1.476ab03dea000p-3},
8306         {0x1.c3f8ee8d6cb51p-1, 0x1.7074377e22000p-3},
8307         {0x1.bdd2b4f020c4cp-1, 0x1.98ede8ba94000p-3},
8308         {0x1.b7d6c006015cap-1, 0x1.c0db86ad2e000p-3},
8309         {0x1.b20366e2e338fp-1, 0x1.e840aafcee000p-3},
8310         {0x1.ac57026295039p-1, 0x1.0790ab4678000p-2},
8311         {0x1.a6d01bc2731ddp-1, 0x1.1ac056801c000p-2},
8312         {0x1.a16d3bc3ff18bp-1, 0x1.2db11d4fee000p-2},
8313         {0x1.9c2d14967feadp-1, 0x1.406464ec58000p-2},
8314         {0x1.970e4f47c9902p-1, 0x1.52dbe093af000p-2},
8315         {0x1.920fb3982bcf2p-1, 0x1.651902050d000p-2},
8316         {0x1.8d30187f759f1p-1, 0x1.771d2cdeaf000p-2},
8317         {0x1.886e5ebb9f66dp-1, 0x1.88e9c857d9000p-2},
8318         {0x1.83c97b658b994p-1, 0x1.9a80155e16000p-2},
8319         {0x1.7f405ffc61022p-1, 0x1.abe186ed3d000p-2},
8320         {0x1.7ad22181415cap-1, 0x1.bd0f2aea0e000p-2},
8321         {0x1.767dcf99eff8cp-1, 0x1.ce0a43dbf4000p-2}
8322     };
8323     static const struct {
8324         double chi, clo;
8325     } T2[] = {
8326         {0x1.6200012b90a8ep-1, 0x1.904ab0644b605p-55},
8327         {0x1.66000045734a6p-1, 0x1.1ff9bea62f7a9p-57},
8328         {0x1.69fffc325f2c5p-1, 0x1.27ecfcb3c90bap-55},
8329         {0x1.6e00038b95a04p-1, 0x1.8ff8856739326p-55},
8330         {0x1.71fffe09994e3p-1, 0x1.afd40275f82b1p-55},
8331         {0x1.7600015590e1p-1, -0x1.2fd75b4238341p-56},
8332         {0x1.7a00012655bd5p-1, 0x1.808e67c242b76p-56},
8333         {0x1.7e0003259e9a6p-1, -0x1.208e426f622b7p-57},
8334         {0x1.81fffedb4b2d2p-1, -0x1.402461ea5c92fp-55},
8335         {0x1.860002dfafcc3p-1, 0x1.df7f4a2f29a1fp-57},
8336         {0x1.89ffff78c6b5p-1, -0x1.e0453094995fdp-55},
8337         {0x1.8e00039671566p-1, -0x1.a04f3bec77b45p-55},
8338         {0x1.91fffe2bf1745p-1, -0x1.7fa34400e203cp-56},
8339         {0x1.95fffcc5c9fd1p-1, -0x1.6ff8005a0695dp-56},
8340         {0x1.9a0003bba4767p-1, 0x1.0f8c4c4ec7e03p-56},
8341         {0x1.9dfffe7b92da5p-1, 0x1.e7fd9478c4602p-55},
8342         {0x1.a1fffd72efdafp-1, -0x1.a0c554dcdae7ep-57},
8343         {0x1.a5fffde04ff95p-1, 0x1.67da98ce9b26bp-55},
8344         {0x1.a9fffca5e8d2bp-1, -0x1.284c9b54c13dep-55},
8345         {0x1.adfffddad03eap-1, 0x1.812c8ea602e3cp-58},
8346         {0x1.b1ffff10d3d4dp-1, -0x1.efaddad27789cp-55},
8347         {0x1.b5fffce21165ap-1, 0x1.3cb1719c61237p-58},
8348         {0x1.b9fffd950e674p-1, 0x1.3f7d94194cep-56},
8349         {0x1.be000139ca8afp-1, 0x1.50ac4215d9bcp-56},
8350         {0x1.c20005b46df99p-1, 0x1.beea653e9c1c9p-57},
8351         {0x1.c600040b9f7aep-1, -0x1.c079f274a70d6p-56},
8352         {0x1.ca0006255fd8ap-1, -0x1.a0b4076e84c1fp-56},
8353         {0x1.cdfffd94c095dp-1, 0x1.8f933f99ab5d7p-55},
8354         {0x1.d1ffff975d6cfp-1, -0x1.82c08665fe1bep-58},
8355         {0x1.d5fffa2561c93p-1, -0x1.b04289bd295f3p-56},
8356         {0x1.d9fff9d228b0cp-1, 0x1.70251340fa236p-55},
8357         {0x1.de00065bc7e16p-1, -0x1.5011e16a4d80cp-56},
8358         {0x1.e200002f64791p-1, 0x1.9802f09ef62ep-55},
8359         {0x1.e600057d7a6d8p-1, -0x1.e0b75580cf7fap-56},
8360         {0x1.ea00027edc00cp-1, -0x1.c848309459811p-55},
8361         {0x1.ee0006cf5cb7cp-1, -0x1.f8027951576f4p-55},
8362         {0x1.f2000782b7dccp-1, -0x1.f81d97274538fp-55},
8363         {0x1.f6000260c450ap-1, -0x1.071002727ffdcp-59},
8364         {0x1.f9fffe88cd533p-1, -0x1.81bdce1fda8bp-58},
8365         {0x1.fdfffd50f8689p-1, 0x1.7f91acb918e6ep-55},
8366         {0x1.0200004292367p+0, 0x1.b7ff365324681p-54},
8367         {0x1.05fffe3e3d668p+0, 0x1.6fa08ddae957bp-55},
8368         {0x1.0a0000a85a757p+0, -0x1.7e2de80d3fb91p-58},
8369         {0x1.0e0001a5f3fccp+0, -0x1.1823305c5f014p-54},
8370         {0x1.11ffff8afbaf5p+0, -0x1.bfabb6680bac2p-55},
8371         {0x1.15fffe54d91adp+0, -0x1.d7f121737e7efp-54},
8372         {0x1.1a00011ac36e1p+0, 0x1.c000a0516f5ffp-54},
8373         {0x1.1e00019c84248p+0, -0x1.082fbe4da5dap-54},
8374         {0x1.220000ffe5e6ep+0, -0x1.8fdd04c9cfb43p-55},
8375         {0x1.26000269fd891p+0, 0x1.cfe2a7994d182p-55},
8376         {0x1.2a00029a6e6dap+0, -0x1.00273715e8bc5p-56},
8377         {0x1.2dfffe0293e39p+0, 0x1.b7c39dab2a6f9p-54},
8378         {0x1.31ffff7dcf082p+0, 0x1.df1336edc5254p-56},
8379         {0x1.35ffff05a8b6p+0, -0x1.e03564ccd31ebp-54},
8380         {0x1.3a0002e0eaeccp+0, 0x1.5f0e74bd3a477p-56},
8381         {0x1.3e000043bb236p+0, 0x1.c7dcb149d8833p-54},
8382         {0x1.4200002d187ffp+0, 0x1.e08afcf2d3d28p-56},
8383         {0x1.460000d387cb1p+0, 0x1.20837856599a6p-55},
8384         {0x1.4a00004569f89p+0, -0x1.9fa5c904fbcd2p-55},
8385         {0x1.4e000043543f3p+0, -0x1.81125ed175329p-56},
8386         {0x1.51fffcc027f0fp+0, 0x1.883d8847754dcp-54},
8387         {0x1.55ffffd87b36fp+0, -0x1.709e731d02807p-55},
8388         {0x1.59ffff21df7bap+0, 0x1.7f79f68727b02p-55},
8389         {0x1.5dfffebfc3481p+0, -0x1.180902e30e93ep-54}
8390     };
8391
8392     double z, r, r2, r4, y, invc, logc, kd, hi, lo, t1, t2, t3, p, rhi, rlo;
8393     UINT64 ix, iz, tmp;
8394     UINT32 top;
8395     int k, i;
8396
8397     ix = *(UINT64*)&x;
8398     top = ix >> 48;
8399     if (ix - 0x3feea4af00000000ULL < 0x210aa00000000ULL) {
8400         /* Handle close to 1.0 inputs separately.  */
8401         /* Fix sign of zero with downward rounding when x==1.  */
8402         if (ix == 0x3ff0000000000000ULL)
8403             return 0;
8404         r = x - 1.0;
8405         *(UINT64*)&rhi = *(UINT64*)&r & -1ULL << 32;
8406         rlo = r - rhi;
8407         hi = rhi * invln2hi;
8408         lo = rlo * invln2hi + r * invln2lo;
8409         r2 = r * r; /* rounding error: 0x1p-62.  */
8410         r4 = r2 * r2;
8411         /* Worst-case error is less than 0.54 ULP (0.55 ULP without fma).  */
8412         p = r2 * (B[0] + r * B[1]);
8413         y = hi + p;
8414         lo += hi - y + p;
8415         lo += r4 * (B[2] + r * B[3] + r2 * (B[4] + r * B[5]) +
8416                 r4 * (B[6] + r * B[7] + r2 * (B[8] + r * B[9])));
8417         y += lo;
8418         return y;
8419     }
8420     if (top - 0x0010 >= 0x7ff0 - 0x0010) {
8421         /* x < 0x1p-1022 or inf or nan.  */
8422         if (ix * 2 == 0) {
8423             *_errno() = ERANGE;
8424             return -1.0 / x;
8425         }
8426         if (ix == 0x7ff0000000000000ULL) /* log(inf) == inf.  */
8427             return x;
8428         if ((top & 0x7ff0) == 0x7ff0 && (ix & 0xfffffffffffffULL))
8429             return x;
8430         if (top & 0x8000) {
8431             *_errno() = EDOM;
8432             return (x - x) / (x - x);
8433         }
8434         /* x is subnormal, normalize it.  */
8435         x *= 0x1p52;
8436         ix = *(UINT64*)&x;
8437         ix -= 52ULL << 52;
8438     }
8439
8440     /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
8441        The range is split into N subintervals.
8442        The ith subinterval contains z and c is near its center.  */
8443     tmp = ix - 0x3fe6000000000000ULL;
8444     i = (tmp >> (52 - 6)) % (1 << 6);
8445     k = (INT64)tmp >> 52; /* arithmetic shift */
8446     iz = ix - (tmp & 0xfffULL << 52);
8447     invc = T[i].invc;
8448     logc = T[i].logc;
8449     z = *(double*)&iz;
8450     kd = k;
8451
8452     /* log2(x) = log2(z/c) + log2(c) + k.  */
8453     /* r ~= z/c - 1, |r| < 1/(2*N).  */
8454     /* rounding error: 0x1p-55/N + 0x1p-65.  */
8455     r = (z - T2[i].chi - T2[i].clo) * invc;
8456     *(UINT64*)&rhi = *(UINT64*)&r & -1ULL << 32;
8457     rlo = r - rhi;
8458     t1 = rhi * invln2hi;
8459     t2 = rlo * invln2hi + r * invln2lo;
8460
8461     /* hi + lo = r/ln2 + log2(c) + k.  */
8462     t3 = kd + logc;
8463     hi = t3 + t1;
8464     lo = t3 - hi + t1 + t2;
8465
8466     /* log2(r+1) = r/ln2 + r^2*poly(r).  */
8467     /* Evaluation is optimized assuming superscalar pipelined execution.  */
8468     r2 = r * r; /* rounding error: 0x1p-54/N^2.  */
8469     r4 = r2 * r2;
8470     /* Worst-case error if |y| > 0x1p-4: 0.547 ULP (0.550 ULP without fma).
8471        ~ 0.5 + 2/N/ln2 + abs-poly-error*0x1p56 ULP (+ 0.003 ULP without fma).  */
8472     p = A[0] + r * A[1] + r2 * (A[2] + r * A[3]) + r4 * (A[4] + r * A[5]);
8473     y = lo + r2 * p + hi;
8474     return y;
8475 }
8476
8477 /*********************************************************************
8478  *      log2f (MSVCR120.@)
8479  *
8480  * Copied from musl: src/math/log2f.c
8481  */
8482 float CDECL log2f(float x)
8483 {
8484     static const double A[] = {
8485         -0x1.712b6f70a7e4dp-2,
8486         0x1.ecabf496832ep-2,
8487         -0x1.715479ffae3dep-1,
8488         0x1.715475f35c8b8p0
8489     };
8490     static const struct {
8491         double invc, logc;
8492     } T[] = {
8493         { 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 },
8494         { 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 },
8495         { 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 },
8496         { 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 },
8497         { 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 },
8498         { 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 },
8499         { 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 },
8500         { 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 },
8501         { 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 },
8502         { 0x1p+0, 0x0p+0 },
8503         { 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 },
8504         { 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 },
8505         { 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 },
8506         { 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 },
8507         { 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 },
8508         { 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 }
8509     };
8510
8511     double z, r, r2, p, y, y0, invc, logc;
8512     UINT32 ix, iz, top, tmp;
8513     int k, i;
8514
8515     ix = *(UINT32*)&x;
8516     /* Fix sign of zero with downward rounding when x==1. */
8517     if (ix == 0x3f800000)
8518         return 0;
8519     if (ix - 0x00800000 >= 0x7f800000 - 0x00800000) {
8520         /* x < 0x1p-126 or inf or nan. */
8521         if (ix * 2 == 0) {
8522             *_errno() = ERANGE;
8523             return -1.0f / x;
8524         }
8525         if (ix == 0x7f800000) /* log2(inf) == inf. */
8526             return x;
8527         if (ix * 2 > 0xff000000)
8528             return x;
8529         if (ix & 0x80000000) {
8530             *_errno() = EDOM;
8531             return (x - x) / (x - x);
8532         }
8533         /* x is subnormal, normalize it. */
8534         x *= 0x1p23f;
8535         ix = *(UINT32*)&x;
8536         ix -= 23 << 23;
8537     }
8538
8539     /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
8540        The range is split into N subintervals.
8541        The ith subinterval contains z and c is near its center. */
8542     tmp = ix - 0x3f330000;
8543     i = (tmp >> (23 - 4)) % (1 << 4);
8544     top = tmp & 0xff800000;
8545     iz = ix - top;
8546     k = (INT32)tmp >> 23; /* arithmetic shift */
8547     invc = T[i].invc;
8548     logc = T[i].logc;
8549     z = *(float*)&iz;
8550
8551     /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
8552     r = z * invc - 1;
8553     y0 = logc + (double)k;
8554
8555     /* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */
8556     r2 = r * r;
8557     y = A[1] * r + A[2];
8558     y = A[0] * r2 + y;
8559     p = A[3] * r + y0;
8560     y = y * r2 + p;
8561     return y;
8562 }
8563
8564 /*********************************************************************
8565  *      rint (MSVCR120.@)
8566  */
8567 double CDECL rint(double x)
8568 {
8569     return __rint(x);
8570 }
8571
8572 /*********************************************************************
8573  *      rintf (MSVCR120.@)
8574  *
8575  * Copied from musl: src/math/rintf.c
8576  */
8577 float CDECL rintf(float x)
8578 {
8579     static const float toint = 1 / FLT_EPSILON;
8580
8581     unsigned int ix = *(unsigned int*)&x;
8582     int e = ix >> 23 & 0xff;
8583     int s = ix >> 31;
8584     float y;
8585
8586     if (e >= 0x7f + 23)
8587         return x;
8588     if (s)
8589         y = fp_barrierf(x - toint) + toint;
8590     else
8591         y = fp_barrierf(x + toint) - toint;
8592     if (y == 0)
8593         return s ? -0.0f : 0.0f;
8594     return y;
8595 }
8596
8597 /*********************************************************************
8598  *      lrint (MSVCR120.@)
8599  */
8600 __msvcrt_long CDECL lrint(double x)
8601 {
8602     double d;
8603
8604     d = rint(x);
8605     if ((d < 0 && d != (double)(__msvcrt_long)d)
8606             || (d >= 0 && d != (double)(__msvcrt_ulong)d)) {
8607         *_errno() = EDOM;
8608         return 0;
8609     }
8610     return d;
8611 }
8612
8613 /*********************************************************************
8614  *      lrintf (MSVCR120.@)
8615  */
8616 __msvcrt_long CDECL lrintf(float x)
8617 {
8618     float f;
8619
8620     f = rintf(x);
8621     if ((f < 0 && f != (float)(__msvcrt_long)f)
8622             || (f >= 0 && f != (float)(__msvcrt_ulong)f)) {
8623         *_errno() = EDOM;
8624         return 0;
8625     }
8626     return f;
8627 }
8628
8629 /*********************************************************************
8630  *      llrint (MSVCR120.@)
8631  */
8632 __int64 CDECL llrint(double x)
8633 {
8634     double d;
8635
8636     d = rint(x);
8637     if ((d < 0 && d != (double)(__int64)d)
8638             || (d >= 0 && d != (double)(unsigned __int64)d)) {
8639         *_errno() = EDOM;
8640         return 0;
8641     }
8642     return d;
8643 }
8644
8645 /*********************************************************************
8646  *      llrintf (MSVCR120.@)
8647  */
8648 __int64 CDECL llrintf(float x)
8649 {
8650     float f;
8651
8652     f = rintf(x);
8653     if ((f < 0 && f != (float)(__int64)f)
8654             || (f >= 0 && f != (float)(unsigned __int64)f)) {
8655         *_errno() = EDOM;
8656         return 0;
8657     }
8658     return f;
8659 }
8660
8661 /*********************************************************************
8662  *      round (MSVCR120.@)
8663  */
8664 double CDECL round(double x)
8665 {
8666     return __round(x);
8667 }
8668
8669 /*********************************************************************
8670  *      roundf (MSVCR120.@)
8671  *
8672  * Copied from musl: src/math/roundf.c
8673  */
8674 float CDECL roundf(float x)
8675 {
8676     static const float toint = 1 / FLT_EPSILON;
8677
8678     unsigned int ix = *(unsigned int*)&x;
8679     int e = ix >> 23 & 0xff;
8680     float y;
8681
8682     if (e >= 0x7f + 23)
8683         return x;
8684     if (ix >> 31)
8685         x = -x;
8686     if (e < 0x7f - 1)
8687         return 0 * *(float*)&ix;
8688     y = fp_barrierf(x + toint) - toint - x;
8689     if (y > 0.5f)
8690         y = y + x - 1;
8691     else if (y <= -0.5f)
8692         y = y + x + 1;
8693     else
8694         y = y + x;
8695     if (ix >> 31)
8696         y = -y;
8697     return y;
8698 }
8699
8700 /*********************************************************************
8701  *      lround (MSVCR120.@)
8702  *
8703  * Copied from musl: src/math/lround.c
8704  */
8705 __msvcrt_long CDECL lround(double x)
8706 {
8707     double d = round(x);
8708     if (d != (double)(__msvcrt_long)d) {
8709         *_errno() = EDOM;
8710         return 0;
8711     }
8712     return d;
8713 }
8714
8715 /*********************************************************************
8716  *      lroundf (MSVCR120.@)
8717  *
8718  * Copied from musl: src/math/lroundf.c
8719  */
8720 __msvcrt_long CDECL lroundf(float x)
8721 {
8722     float f = roundf(x);
8723     if (f != (float)(__msvcrt_long)f) {
8724         *_errno() = EDOM;
8725         return 0;
8726     }
8727     return f;
8728 }
8729
8730 /*********************************************************************
8731  *      llround (MSVCR120.@)
8732  *
8733  * Copied from musl: src/math/llround.c
8734  */
8735 __int64 CDECL llround(double x)
8736 {
8737     double d = round(x);
8738     if (d != (double)(__int64)d) {
8739         *_errno() = EDOM;
8740         return 0;
8741     }
8742     return d;
8743 }
8744
8745 /*********************************************************************
8746  *      llroundf (MSVCR120.@)
8747  *
8748  * Copied from musl: src/math/llroundf.c
8749  */
8750 __int64 CDECL llroundf(float x)
8751 {
8752     float f = roundf(x);
8753     if (f != (float)(__int64)f) {
8754         *_errno() = EDOM;
8755         return 0;
8756     }
8757     return f;
8758 }
8759
8760 /*********************************************************************
8761  *      trunc (MSVCR120.@)
8762  *
8763  * Copied from musl: src/math/trunc.c
8764  */
8765 double CDECL trunc(double x)
8766 {
8767     union {double f; UINT64 i;} u = {x};
8768     int e = (u.i >> 52 & 0x7ff) - 0x3ff + 12;
8769     UINT64 m;
8770
8771     if (e >= 52 + 12)
8772         return x;
8773     if (e < 12)
8774         e = 1;
8775     m = -1ULL >> e;
8776     if ((u.i & m) == 0)
8777         return x;
8778     u.i &= ~m;
8779     return u.f;
8780 }
8781
8782 /*********************************************************************
8783  *      truncf (MSVCR120.@)
8784  *
8785  * Copied from musl: src/math/truncf.c
8786  */
8787 float CDECL truncf(float x)
8788 {
8789     union {float f; UINT32 i;} u = {x};
8790     int e = (u.i >> 23 & 0xff) - 0x7f + 9;
8791     UINT32 m;
8792
8793     if (e >= 23 + 9)
8794         return x;
8795     if (e < 9)
8796         e = 1;
8797     m = -1U >> e;
8798     if ((u.i & m) == 0)
8799         return x;
8800     u.i &= ~m;
8801     return u.f;
8802 }
8803
8804 /*********************************************************************
8805  *      _dtest (MSVCR120.@)
8806  */
8807 short CDECL _dtest(double *x)
8808 {
8809     return _dclass(*x);
8810 }
8811
8812 /*********************************************************************
8813  *      _fdtest (MSVCR120.@)
8814  */
8815 short CDECL _fdtest(float *x)
8816 {
8817     return _fdclass(*x);
8818 }
8819
8820 static double erfc1(double x)
8821 {
8822     static const double erx  = 8.45062911510467529297e-01,
8823                  pa0  = -2.36211856075265944077e-03,
8824                  pa1  =  4.14856118683748331666e-01,
8825                  pa2  = -3.72207876035701323847e-01,
8826                  pa3  =  3.18346619901161753674e-01,
8827                  pa4  = -1.10894694282396677476e-01,
8828                  pa5  =  3.54783043256182359371e-02,
8829                  pa6  = -2.16637559486879084300e-03,
8830                  qa1  =  1.06420880400844228286e-01,
8831                  qa2  =  5.40397917702171048937e-01,
8832                  qa3  =  7.18286544141962662868e-02,
8833                  qa4  =  1.26171219808761642112e-01,
8834                  qa5  =  1.36370839120290507362e-02,
8835                  qa6  =  1.19844998467991074170e-02;
8836
8837     double s, P, Q;
8838
8839     s = fabs(x) - 1;
8840     P = pa0 + s * (pa1 + s * (pa2 + s * (pa3 + s * (pa4 + s * (pa5 + s * pa6)))));
8841     Q = 1 + s * (qa1 + s * (qa2 + s * (qa3 + s * (qa4 + s * (qa5 + s * qa6)))));
8842     return 1 - erx - P / Q;
8843 }
8844
8845 static double erfc2(UINT32 ix, double x)
8846 {
8847     static const double ra0  = -9.86494403484714822705e-03,
8848                  ra1  = -6.93858572707181764372e-01,
8849                  ra2  = -1.05586262253232909814e+01,
8850                  ra3  = -6.23753324503260060396e+01,
8851                  ra4  = -1.62396669462573470355e+02,
8852                  ra5  = -1.84605092906711035994e+02,
8853                  ra6  = -8.12874355063065934246e+01,
8854                  ra7  = -9.81432934416914548592e+00,
8855                  sa1  =  1.96512716674392571292e+01,
8856                  sa2  =  1.37657754143519042600e+02,
8857                  sa3  =  4.34565877475229228821e+02,
8858                  sa4  =  6.45387271733267880336e+02,
8859                  sa5  =  4.29008140027567833386e+02,
8860                  sa6  =  1.08635005541779435134e+02,
8861                  sa7  =  6.57024977031928170135e+00,
8862                  sa8  = -6.04244152148580987438e-02,
8863                  rb0  = -9.86494292470009928597e-03,
8864                  rb1  = -7.99283237680523006574e-01,
8865                  rb2  = -1.77579549177547519889e+01,
8866                  rb3  = -1.60636384855821916062e+02,
8867                  rb4  = -6.37566443368389627722e+02,
8868                  rb5  = -1.02509513161107724954e+03,
8869                  rb6  = -4.83519191608651397019e+02,
8870                  sb1  =  3.03380607434824582924e+01,
8871                  sb2  =  3.25792512996573918826e+02,
8872                  sb3  =  1.53672958608443695994e+03,
8873                  sb4  =  3.19985821950859553908e+03,
8874                  sb5  =  2.55305040643316442583e+03,
8875                  sb6  =  4.74528541206955367215e+02,
8876                  sb7  = -2.24409524465858183362e+01;
8877
8878     double s, R, S, z;
8879     UINT64 iz;
8880
8881     if (ix < 0x3ff40000) /* |x| < 1.25 */
8882         return erfc1(x);
8883
8884     x = fabs(x);
8885     s = 1 / (x * x);
8886     if (ix < 0x4006db6d) { /* |x| < 1/.35 ~ 2.85714 */
8887         R = ra0 + s * (ra1 + s * (ra2 + s * (ra3 + s * (ra4 + s *
8888                             (ra5 + s * (ra6 + s * ra7))))));
8889         S = 1.0 + s * (sa1 + s * (sa2 + s * (sa3 + s * (sa4 + s *
8890                             (sa5 + s * (sa6 + s * (sa7 + s * sa8)))))));
8891     } else { /* |x| > 1/.35 */
8892         R = rb0 + s * (rb1 + s * (rb2 + s * (rb3 + s * (rb4 + s *
8893                             (rb5 + s * rb6)))));
8894         S = 1.0 + s * (sb1 + s * (sb2 + s * (sb3 + s * (sb4 + s *
8895                             (sb5 + s * (sb6 + s * sb7))))));
8896     }
8897     z = x;
8898     iz = *(ULONGLONG*)&z;
8899     iz &= 0xffffffff00000000ULL;
8900     z = *(double*)&iz;
8901     return exp(-z * z - 0.5625) * exp((z - x) * (z + x) + R / S) / x;
8902 }
8903
8904 /*********************************************************************
8905  *      erf (MSVCR120.@)
8906  */
8907 double CDECL erf(double x)
8908 {
8909     static const double efx8 =  1.02703333676410069053e+00,
8910                  pp0  =  1.28379167095512558561e-01,
8911                  pp1  = -3.25042107247001499370e-01,
8912                  pp2  = -2.84817495755985104766e-02,
8913                  pp3  = -5.77027029648944159157e-03,
8914                  pp4  = -2.37630166566501626084e-05,
8915                  qq1  =  3.97917223959155352819e-01,
8916                  qq2  =  6.50222499887672944485e-02,
8917                  qq3  =  5.08130628187576562776e-03,
8918                  qq4  =  1.32494738004321644526e-04,
8919                  qq5  = -3.96022827877536812320e-06;
8920
8921     double r, s, z, y;
8922     UINT32 ix;
8923     int sign;
8924
8925     ix = *(UINT64*)&x >> 32;
8926     sign = ix >> 31;
8927     ix &= 0x7fffffff;
8928     if (ix >= 0x7ff00000) {
8929         /* erf(nan)=nan, erf(+-inf)=+-1 */
8930         return 1 - 2 * sign + 1 / x;
8931     }
8932     if (ix < 0x3feb0000) { /* |x| < 0.84375 */
8933         if (ix < 0x3e300000) { /* |x| < 2**-28 */
8934             /* avoid underflow */
8935             return 0.125 * (8 * x + efx8 * x);
8936         }
8937         z = x * x;
8938         r = pp0 + z * (pp1 + z * (pp2 + z * (pp3 + z * pp4)));
8939         s = 1.0 + z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5))));
8940         y = r / s;
8941         return x + x * y;
8942     }
8943     if (ix < 0x40180000) /* 0.84375 <= |x| < 6 */
8944         y = 1 - erfc2(ix, x);
8945     else
8946         y = 1 - DBL_MIN;
8947     return sign ? -y : y;
8948 }
8949
8950 static float erfc1f(float x)
8951 {
8952     static const float erx  =  8.4506291151e-01,
8953                  pa0  = -2.3621185683e-03,
8954                  pa1  =  4.1485610604e-01,
8955                  pa2  = -3.7220788002e-01,
8956                  pa3  =  3.1834661961e-01,
8957                  pa4  = -1.1089469492e-01,
8958                  pa5  =  3.5478305072e-02,
8959                  pa6  = -2.1663755178e-03,
8960                  qa1  =  1.0642088205e-01,
8961                  qa2  =  5.4039794207e-01,
8962                  qa3  =  7.1828655899e-02,
8963                  qa4  =  1.2617121637e-01,
8964                  qa5  =  1.3637083583e-02,
8965                  qa6  =  1.1984500103e-02;
8966
8967     float s, P, Q;
8968
8969     s = fabsf(x) - 1;
8970     P = pa0 + s * (pa1 + s * (pa2 + s * (pa3 + s * (pa4 + s * (pa5 + s * pa6)))));
8971     Q = 1 + s * (qa1 + s * (qa2 + s * (qa3 + s * (qa4 + s * (qa5 + s * qa6)))));
8972     return 1 - erx - P / Q;
8973 }
8974
8975 static float erfc2f(UINT32 ix, float x)
8976 {
8977     static const float ra0  = -9.8649440333e-03,
8978                  ra1  = -6.9385856390e-01,
8979                  ra2  = -1.0558626175e+01,
8980                  ra3  = -6.2375331879e+01,
8981                  ra4  = -1.6239666748e+02,
8982                  ra5  = -1.8460508728e+02,
8983                  ra6  = -8.1287437439e+01,
8984                  ra7  = -9.8143291473e+00,
8985                  sa1  =  1.9651271820e+01,
8986                  sa2  =  1.3765776062e+02,
8987                  sa3  =  4.3456588745e+02,
8988                  sa4  =  6.4538726807e+02,
8989                  sa5  =  4.2900814819e+02,
8990                  sa6  =  1.0863500214e+02,
8991                  sa7  =  6.5702495575e+00,
8992                  sa8  = -6.0424413532e-02,
8993                  rb0  = -9.8649431020e-03,
8994                  rb1  = -7.9928326607e-01,
8995                  rb2  = -1.7757955551e+01,
8996                  rb3  = -1.6063638306e+02,
8997                  rb4  = -6.3756646729e+02,
8998                  rb5  = -1.0250950928e+03,
8999                  rb6  = -4.8351919556e+02,
9000                  sb1  =  3.0338060379e+01,
9001                  sb2  =  3.2579251099e+02,
9002                  sb3  =  1.5367296143e+03,
9003                  sb4  =  3.1998581543e+03,
9004                  sb5  =  2.5530502930e+03,
9005                  sb6  =  4.7452853394e+02,
9006                  sb7  = -2.2440952301e+01;
9007
9008     float s, R, S, z;
9009
9010     if (ix < 0x3fa00000) /* |x| < 1.25 */
9011         return erfc1f(x);
9012
9013     x = fabsf(x);
9014     s = 1 / (x * x);
9015     if (ix < 0x4036db6d) { /* |x| < 1/0.35 */
9016         R = ra0 + s * (ra1 + s * (ra2 + s * (ra3 + s * (ra4 + s *
9017                             (ra5 + s * (ra6 + s * ra7))))));
9018         S = 1.0f + s * (sa1 + s * (sa2 + s * (sa3 + s * (sa4 + s *
9019                             (sa5 + s * (sa6 + s * (sa7 + s * sa8)))))));
9020     } else { /* |x| >= 1/0.35 */
9021         R = rb0 + s * (rb1 + s * (rb2 + s * (rb3 + s * (rb4 + s * (rb5 + s * rb6)))));
9022         S = 1.0f + s * (sb1 + s * (sb2 + s * (sb3 + s * (sb4 + s *
9023                             (sb5 + s * (sb6 + s * sb7))))));
9024     }
9025
9026     ix = *(UINT32*)&x & 0xffffe000;
9027     z = *(float*)&ix;
9028     return expf(-z * z - 0.5625f) * expf((z - x) * (z + x) + R / S) / x;
9029 }
9030
9031 /*********************************************************************
9032  *      erff (MSVCR120.@)
9033  *
9034  * Copied from musl: src/math/erff.c
9035  */
9036 float CDECL erff(float x)
9037 {
9038     static const float efx8 =  1.0270333290e+00,
9039                  pp0  =  1.2837916613e-01,
9040                  pp1  = -3.2504209876e-01,
9041                  pp2  = -2.8481749818e-02,
9042                  pp3  = -5.7702702470e-03,
9043                  pp4  = -2.3763017452e-05,
9044                  qq1  =  3.9791721106e-01,
9045                  qq2  =  6.5022252500e-02,
9046                  qq3  =  5.0813062117e-03,
9047                  qq4  =  1.3249473704e-04,
9048                  qq5  = -3.9602282413e-06;
9049
9050     float r, s, z, y;
9051     UINT32 ix;
9052     int sign;
9053
9054     ix = *(UINT32*)&x;
9055     sign = ix >> 31;
9056     ix &= 0x7fffffff;
9057     if (ix >= 0x7f800000) {
9058         /* erf(nan)=nan, erf(+-inf)=+-1 */
9059         return 1 - 2 * sign + 1 / x;
9060     }
9061     if (ix < 0x3f580000) { /* |x| < 0.84375 */
9062         if (ix < 0x31800000) { /* |x| < 2**-28 */
9063             /*avoid underflow */
9064             return 0.125f * (8 * x + efx8 * x);
9065         }
9066         z = x * x;
9067         r = pp0 + z * (pp1 + z * (pp2 + z * (pp3 + z * pp4)));
9068         s = 1 + z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5))));
9069         y = r / s;
9070         return x + x * y;
9071     }
9072     if (ix < 0x40c00000) /* |x| < 6 */
9073         y = 1 - erfc2f(ix, x);
9074     else
9075         y = 1 - FLT_MIN;
9076     return sign ? -y : y;
9077 }
9078
9079 /*********************************************************************
9080  *      erfc (MSVCR120.@)
9081  *
9082  * Copied from musl: src/math/erf.c
9083  */
9084 double CDECL erfc(double x)
9085 {
9086     static const double pp0  =  1.28379167095512558561e-01,
9087                  pp1  = -3.25042107247001499370e-01,
9088                  pp2  = -2.84817495755985104766e-02,
9089                  pp3  = -5.77027029648944159157e-03,
9090                  pp4  = -2.37630166566501626084e-05,
9091                  qq1  =  3.97917223959155352819e-01,
9092                  qq2  =  6.50222499887672944485e-02,
9093                  qq3  =  5.08130628187576562776e-03,
9094                  qq4  =  1.32494738004321644526e-04,
9095                  qq5  = -3.96022827877536812320e-06;
9096
9097     double r, s, z, y;
9098     UINT32 ix;
9099     int sign;
9100
9101     ix = *(ULONGLONG*)&x >> 32;
9102     sign = ix >> 31;
9103     ix &= 0x7fffffff;
9104     if (ix >= 0x7ff00000) {
9105         /* erfc(nan)=nan, erfc(+-inf)=0,2 */
9106         return 2 * sign + 1 / x;
9107     }
9108     if (ix < 0x3feb0000) { /* |x| < 0.84375 */
9109         if (ix < 0x3c700000) /* |x| < 2**-56 */
9110             return 1.0 - x;
9111         z = x * x;
9112         r = pp0 + z * (pp1 + z * (pp2 + z * (pp3 + z * pp4)));
9113         s = 1.0 + z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5))));
9114         y = r / s;
9115         if (sign || ix < 0x3fd00000) { /* x < 1/4 */
9116             return 1.0 - (x + x * y);
9117         }
9118         return 0.5 - (x - 0.5 + x * y);
9119     }
9120     if (ix < 0x403c0000) { /* 0.84375 <= |x| < 28 */
9121         return sign ? 2 - erfc2(ix, x) : erfc2(ix, x);
9122     }
9123     if (sign)
9124         return 2 - DBL_MIN;
9125     *_errno() = ERANGE;
9126     return fp_barrier(DBL_MIN) * DBL_MIN;
9127 }
9128
9129 /*********************************************************************
9130  *      erfcf (MSVCR120.@)
9131  *
9132  * Copied from musl: src/math/erff.c
9133  */
9134 float CDECL erfcf(float x)
9135 {
9136     static const float pp0  =  1.2837916613e-01,
9137                  pp1  = -3.2504209876e-01,
9138                  pp2  = -2.8481749818e-02,
9139                  pp3  = -5.7702702470e-03,
9140                  pp4  = -2.3763017452e-05,
9141                  qq1  =  3.9791721106e-01,
9142                  qq2  =  6.5022252500e-02,
9143                  qq3  =  5.0813062117e-03,
9144                  qq4  =  1.3249473704e-04,
9145                  qq5  = -3.9602282413e-06;
9146
9147     float r, s, z, y;
9148     UINT32 ix;
9149     int sign;
9150
9151     ix = *(UINT32*)&x;
9152     sign = ix >> 31;
9153     ix &= 0x7fffffff;
9154     if (ix >= 0x7f800000) {
9155         /* erfc(nan)=nan, erfc(+-inf)=0,2 */
9156         return 2 * sign + 1 / x;
9157     }
9158
9159     if (ix < 0x3f580000) { /* |x| < 0.84375 */
9160         if (ix < 0x23800000) /* |x| < 2**-56 */
9161             return 1.0f - x;
9162         z = x * x;
9163         r = pp0 + z * (pp1 + z * (pp2 + z * (pp3 + z * pp4)));
9164         s = 1.0f + z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5))));
9165         y = r / s;
9166         if (sign || ix < 0x3e800000) /* x < 1/4 */
9167             return 1.0f - (x + x * y);
9168         return 0.5f - (x - 0.5f + x * y);
9169     }
9170     if (ix < 0x41e00000) { /* |x| < 28 */
9171         return sign ? 2 - erfc2f(ix, x) : erfc2f(ix, x);
9172     }
9173     if (sign)
9174         return 2 - FLT_MIN;
9175     *_errno() = ERANGE;
9176     return FLT_MIN * FLT_MIN;
9177 }
9178
9179 /*********************************************************************
9180  *      fmaxf (MSVCR120.@)
9181  */
9182 float CDECL fmaxf(float x, float y)
9183 {
9184     if(isnan(x))
9185         return y;
9186     if(isnan(y))
9187         return x;
9188     if(x==0 && y==0)
9189         return signbit(x) ? y : x;
9190     return x<y ? y : x;
9191 }
9192
9193 /*********************************************************************
9194  *      fmax (MSVCR120.@)
9195  */
9196 double CDECL fmax(double x, double y)
9197 {
9198     if(isnan(x))
9199         return y;
9200     if(isnan(y))
9201         return x;
9202     if(x==0 && y==0)
9203         return signbit(x) ? y : x;
9204     return x<y ? y : x;
9205 }
9206
9207 /*********************************************************************
9208  *      fdimf (MSVCR120.@)
9209  */
9210 float CDECL fdimf(float x, float y)
9211 {
9212     if(isnan(x))
9213         return x;
9214     if(isnan(y))
9215         return y;
9216     return x>y ? x-y : 0;
9217 }
9218
9219 /*********************************************************************
9220  *      fdim (MSVCR120.@)
9221  */
9222 double CDECL fdim(double x, double y)
9223 {
9224     if(isnan(x))
9225         return x;
9226     if(isnan(y))
9227         return y;
9228     return x>y ? x-y : 0;
9229 }
9230
9231 /*********************************************************************
9232  *      _fdsign (MSVCR120.@)
9233  */
9234 int CDECL _fdsign(float x)
9235 {
9236     union { float f; UINT32 i; } u = { x };
9237     return (u.i >> 16) & 0x8000;
9238 }
9239
9240 /*********************************************************************
9241  *      _dsign (MSVCR120.@)
9242  */
9243 int CDECL _dsign(double x)
9244 {
9245     union { double f; UINT64 i; } u = { x };
9246     return (u.i >> 48) & 0x8000;
9247 }
9248
9249
9250 /*********************************************************************
9251  *      _dpcomp (MSVCR120.@)
9252  */
9253 int CDECL _dpcomp(double x, double y)
9254 {
9255     if(isnan(x) || isnan(y))
9256         return 0;
9257
9258     if(x == y) return 2;
9259     return x < y ? 1 : 4;
9260 }
9261
9262 /*********************************************************************
9263  *      _fdpcomp (MSVCR120.@)
9264  */
9265 int CDECL _fdpcomp(float x, float y)
9266 {
9267     return _dpcomp(x, y);
9268 }
9269
9270 /*********************************************************************
9271  *      fminf (MSVCR120.@)
9272  */
9273 float CDECL fminf(float x, float y)
9274 {
9275     if(isnan(x))
9276         return y;
9277     if(isnan(y))
9278         return x;
9279     if(x==0 && y==0)
9280         return signbit(x) ? x : y;
9281     return x<y ? x : y;
9282 }
9283
9284 /*********************************************************************
9285  *      fmin (MSVCR120.@)
9286  */
9287 double CDECL fmin(double x, double y)
9288 {
9289     if(isnan(x))
9290         return y;
9291     if(isnan(y))
9292         return x;
9293     if(x==0 && y==0)
9294         return signbit(x) ? x : y;
9295     return x<y ? x : y;
9296 }
9297
9298 /*********************************************************************
9299  *      asinh (MSVCR120.@)
9300  *
9301  * Copied from musl: src/math/asinh.c
9302  */
9303 double CDECL asinh(double x)
9304 {
9305     UINT64 ux = *(UINT64*)&x;
9306     int e = ux >> 52 & 0x7ff;
9307     int s = ux >> 63;
9308
9309     /* |x| */
9310     ux &= (UINT64)-1 / 2;
9311     x = *(double*)&ux;
9312
9313     if (e >= 0x3ff + 26) /* |x| >= 0x1p26 or inf or nan */
9314         x = log(x) + 0.693147180559945309417232121458176568;
9315     else if (e >= 0x3ff + 1) /* |x| >= 2 */
9316         x = log(2 * x + 1 / (sqrt(x * x + 1) + x));
9317     else if (e >= 0x3ff - 26) /* |x| >= 0x1p-26 */
9318         x = log1p(x + x * x / (sqrt(x * x + 1) + 1));
9319     else /* |x| < 0x1p-26, raise inexact if x != 0 */
9320         fp_barrier(x + 0x1p120f);
9321     return s ? -x : x;
9322 }
9323
9324 /*********************************************************************
9325  *      asinhf (MSVCR120.@)
9326  *
9327  * Copied from musl: src/math/asinhf.c
9328  */
9329 float CDECL asinhf(float x)
9330 {
9331     UINT32 ux = *(UINT32*)&x;
9332     UINT32 i = ux & 0x7fffffff;
9333     int s = ux >> 31;
9334
9335     /* |x| */
9336     x = *(float*)&i;
9337
9338     if (i >= 0x3f800000 + (12 << 23))/* |x| >= 0x1p12 or inf or nan */
9339         x = logf(x) + 0.693147180559945309417232121458176568f;
9340     else if (i >= 0x3f800000 + (1 << 23)) /* |x| >= 2 */
9341         x = logf(2 * x + 1 / (sqrtf(x * x + 1) + x));
9342     else if (i >= 0x3f800000 - (12 << 23)) /* |x| >= 0x1p-12 */
9343         x = log1pf(x + x * x / (sqrtf(x * x + 1) + 1));
9344     else /* |x| < 0x1p-12, raise inexact if x!=0 */
9345         fp_barrierf(x + 0x1p120f);
9346     return s ? -x : x;
9347 }
9348
9349 /*********************************************************************
9350  *      acosh (MSVCR120.@)
9351  *
9352  * Copied from musl: src/math/acosh.c
9353  */
9354 double CDECL acosh(double x)
9355 {
9356     int e = *(UINT64*)&x >> 52 & 0x7ff;
9357
9358     if (x < 1)
9359     {
9360         *_errno() = EDOM;
9361         feraiseexcept(FE_INVALID);
9362         return NAN;
9363     }
9364
9365     if (e < 0x3ff + 1) /* |x| < 2, up to 2ulp error in [1,1.125] */
9366         return log1p(x - 1 + sqrt((x - 1) * (x - 1) + 2 * (x - 1)));
9367     if (e < 0x3ff + 26) /* |x| < 0x1p26 */
9368         return log(2 * x - 1 / (x + sqrt(x * x - 1)));
9369     /* |x| >= 0x1p26 or nan */
9370     return log(x) + 0.693147180559945309417232121458176568;
9371 }
9372
9373 /*********************************************************************
9374  *      acoshf (MSVCR120.@)
9375  *
9376  * Copied from musl: src/math/acoshf.c
9377  */
9378 float CDECL acoshf(float x)
9379 {
9380     UINT32 a = *(UINT32*)&x & 0x7fffffff;
9381
9382     if (x < 1)
9383     {
9384         *_errno() = EDOM;
9385         feraiseexcept(FE_INVALID);
9386         return NAN;
9387     }
9388
9389     if (a < 0x3f800000 + (1 << 23)) /* |x| < 2, up to 2ulp error in [1,1.125] */
9390         return log1pf(x - 1 + sqrtf((x - 1) * (x - 1) + 2 * (x - 1)));
9391     if (*(UINT32*)&x < 0x3f800000 + (12 << 23)) /* 2 <= x < 0x1p12 */
9392         return logf(2 * x - 1 / (x + sqrtf(x * x - 1)));
9393     /* x >= 0x1p12 or x <= -2 or nan */
9394     return logf(x) + 0.693147180559945309417232121458176568f;
9395 }
9396
9397 /*********************************************************************
9398  *      atanh (MSVCR120.@)
9399  *
9400  * Copied from musl: src/math/atanh.c
9401  */
9402 double CDECL atanh(double x)
9403 {
9404     UINT64 ux = *(UINT64*)&x;
9405     int e = ux >> 52 & 0x7ff;
9406     int s = ux >> 63;
9407
9408     /* |x| */
9409     ux &= (UINT64)-1 / 2;
9410     x = *(double*)&ux;
9411
9412     if (x > 1) {
9413         *_errno() = EDOM;
9414         feraiseexcept(FE_INVALID);
9415         return NAN;
9416     }
9417
9418     if (e < 0x3ff - 1) {
9419         if (e < 0x3ff - 32) {
9420             fp_barrier(x + 0x1p120f);
9421             if (e == 0) /* handle underflow */
9422                 fp_barrier(x * x);
9423         } else { /* |x| < 0.5, up to 1.7ulp error */
9424             x = 0.5 * log1p(2 * x + 2 * x * x / (1 - x));
9425         }
9426     } else { /* avoid overflow */
9427         x = 0.5 * log1p(2 * (x / (1 - x)));
9428         if (isinf(x)) *_errno() = ERANGE;
9429     }
9430     return s ? -x : x;
9431 }
9432
9433 /*********************************************************************
9434  *      atanhf (MSVCR120.@)
9435  *
9436  * Copied from musl: src/math/atanhf.c
9437  */
9438 float CDECL atanhf(float x)
9439 {
9440     UINT32 ux = *(UINT32*)&x;
9441     int s = ux >> 31;
9442
9443     /* |x| */
9444     ux &= 0x7fffffff;
9445     x = *(float*)&ux;
9446
9447     if (x > 1) {
9448         *_errno() = EDOM;
9449         feraiseexcept(FE_INVALID);
9450         return NAN;
9451     }
9452
9453     if (ux < 0x3f800000 - (1 << 23)) {
9454         if (ux < 0x3f800000 - (32 << 23)) {
9455             fp_barrierf(x + 0x1p120f);
9456             if (ux < (1 << 23)) /* handle underflow */
9457                 fp_barrierf(x * x);
9458         } else { /* |x| < 0.5, up to 1.7ulp error */
9459             x = 0.5f * log1pf(2 * x + 2 * x * x / (1 - x));
9460         }
9461     } else { /* avoid overflow */
9462         x = 0.5f * log1pf(2 * (x / (1 - x)));
9463         if (isinf(x)) *_errno() = ERANGE;
9464     }
9465     return s ? -x : x;
9466 }
9467
9468 #endif /* _MSVCR_VER>=120 */
9469
9470 /*********************************************************************
9471  *      _scalb  (MSVCRT.@)
9472  *      scalbn  (MSVCR120.@)
9473  *      scalbln (MSVCR120.@)
9474  */
9475 double CDECL _scalb(double num, __msvcrt_long power)
9476 {
9477   return ldexp(num, power);
9478 }
9479
9480 /*********************************************************************
9481  *      _scalbf  (MSVCRT.@)
9482  *      scalbnf  (MSVCR120.@)
9483  *      scalblnf (MSVCR120.@)
9484  */
9485 float CDECL _scalbf(float num, __msvcrt_long power)
9486 {
9487   return ldexp(num, power);
9488 }
9489
9490 #if _MSVCR_VER>=120
9491
9492 /*********************************************************************
9493  *      remainder (MSVCR120.@)
9494  *
9495  * Copied from musl: src/math/remainder.c
9496  */
9497 double CDECL remainder(double x, double y)
9498 {
9499     int q;
9500 #if _MSVCR_VER == 120 && defined(__x86_64__)
9501     if (isnan(x) || isnan(y)) *_errno() = EDOM;
9502 #endif
9503     return remquo(x, y, &q);
9504 }
9505
9506 /*********************************************************************
9507  *      remainderf (MSVCR120.@)
9508  *
9509  * Copied from musl: src/math/remainderf.c
9510  */
9511 float CDECL remainderf(float x, float y)
9512 {
9513     int q;
9514 #if _MSVCR_VER == 120 && defined(__x86_64__)
9515     if (isnan(x) || isnan(y)) *_errno() = EDOM;
9516 #endif
9517     return remquof(x, y, &q);
9518 }
9519
9520 /*********************************************************************
9521  *      remquo (MSVCR120.@)
9522  *
9523  * Copied from musl: src/math/remquo.c
9524  */
9525 double CDECL remquo(double x, double y, int *quo)
9526 {
9527     UINT64 uxi = *(UINT64*)&x;
9528     UINT64 uyi = *(UINT64*)&y;
9529     int ex = uxi >> 52 & 0x7ff;
9530     int ey = uyi >> 52 & 0x7ff;
9531     int sx = uxi >> 63;
9532     int sy = uyi >> 63;
9533     UINT32 q;
9534     UINT64 i;
9535
9536     *quo = 0;
9537     if (y == 0 || isinf(x)) *_errno() = EDOM;
9538     if (uyi << 1 == 0 || isnan(y) || ex == 0x7ff)
9539         return (x * y) / (x * y);
9540     if (uxi << 1 == 0)
9541         return x;
9542
9543     /* normalize x and y */
9544     if (!ex) {
9545         for (i = uxi << 12; i >> 63 == 0; ex--, i <<= 1);
9546         uxi <<= -ex + 1;
9547     } else {
9548         uxi &= -1ULL >> 12;
9549         uxi |= 1ULL << 52;
9550     }
9551     if (!ey) {
9552         for (i = uyi << 12; i >> 63 == 0; ey--, i <<= 1);
9553         uyi <<= -ey + 1;
9554     } else {
9555         uyi &= -1ULL >> 12;
9556         uyi |= 1ULL << 52;
9557     }
9558
9559     q = 0;
9560     if (ex < ey) {
9561         if (ex+1 == ey)
9562             goto end;
9563         return x;
9564     }
9565
9566     /* x mod y */
9567     for (; ex > ey; ex--) {
9568         i = uxi - uyi;
9569         if (i >> 63 == 0) {
9570             uxi = i;
9571             q++;
9572         }
9573         uxi <<= 1;
9574         q <<= 1;
9575     }
9576     i = uxi - uyi;
9577     if (i >> 63 == 0) {
9578         uxi = i;
9579         q++;
9580     }
9581     if (uxi == 0)
9582         ex = -60;
9583     else
9584         for (; uxi >> 52 == 0; uxi <<= 1, ex--);
9585 end:
9586     /* scale result and decide between |x| and |x|-|y| */
9587     if (ex > 0) {
9588         uxi -= 1ULL << 52;
9589         uxi |= (UINT64)ex << 52;
9590     } else {
9591         uxi >>= -ex + 1;
9592     }
9593     x = *(double*)&uxi;
9594     if (sy)
9595         y = -y;
9596     if (ex == ey || (ex + 1 == ey && (2 * x > y || (2 * x == y && q % 2)))) {
9597         x -= y;
9598         q++;
9599     }
9600     q &= 0x7fffffff;
9601     *quo = sx ^ sy ? -(int)q : (int)q;
9602     return sx ? -x : x;
9603 }
9604
9605 /*********************************************************************
9606  *      remquof (MSVCR120.@)
9607  *
9608  * Copied from musl: src/math/remquof.c
9609  */
9610 float CDECL remquof(float x, float y, int *quo)
9611 {
9612     UINT32 uxi = *(UINT32*)&x;
9613     UINT32 uyi = *(UINT32*)&y;
9614     int ex = uxi >> 23 & 0xff;
9615     int ey = uyi >> 23 & 0xff;
9616     int sx = uxi >> 31;
9617     int sy = uyi>> 31;
9618     UINT32 q, i;
9619
9620     *quo = 0;
9621     if (y == 0 || isinf(x)) *_errno() = EDOM;
9622     if (uyi << 1 == 0 || isnan(y) || ex == 0xff)
9623         return (x * y) / (x * y);
9624     if (uxi << 1 == 0)
9625         return x;
9626
9627     /* normalize x and y */
9628     if (!ex) {
9629         for (i = uxi << 9; i >> 31 == 0; ex--, i <<= 1);
9630         uxi <<= -ex + 1;
9631     } else {
9632         uxi &= -1U >> 9;
9633         uxi |= 1U << 23;
9634     }
9635     if (!ey) {
9636         for (i = uyi << 9; i >> 31 == 0; ey--, i <<= 1);
9637         uyi <<= -ey + 1;
9638     } else {
9639         uyi &= -1U >> 9;
9640         uyi |= 1U << 23;
9641     }
9642
9643     q = 0;
9644     if (ex < ey) {
9645         if (ex + 1 == ey)
9646             goto end;
9647         return x;
9648     }
9649
9650     /* x mod y */
9651     for (; ex > ey; ex--) {
9652         i = uxi - uyi;
9653         if (i >> 31 == 0) {
9654             uxi = i;
9655             q++;
9656         }
9657         uxi <<= 1;
9658         q <<= 1;
9659     }
9660     i = uxi - uyi;
9661     if (i >> 31 == 0) {
9662         uxi = i;
9663         q++;
9664     }
9665     if (uxi == 0)
9666         ex = -30;
9667     else
9668         for (; uxi >> 23 == 0; uxi <<= 1, ex--);
9669 end:
9670     /* scale result and decide between |x| and |x|-|y| */
9671     if (ex > 0) {
9672         uxi -= 1U << 23;
9673         uxi |= (UINT32)ex << 23;
9674     } else {
9675         uxi >>= -ex + 1;
9676     }
9677     x = *(float*)&uxi;
9678     if (sy)
9679         y = -y;
9680     if (ex == ey || (ex + 1 == ey && (2 * x > y || (2 * x == y && q % 2)))) {
9681         x -= y;
9682         q++;
9683     }
9684     q &= 0x7fffffff;
9685     *quo = sx ^ sy ? -(int)q : (int)q;
9686     return sx ? -x : x;
9687 }
9688
9689 /* sin(pi*x) assuming x > 2^-100, if sin(pi*x)==0 the sign is arbitrary */
9690 static double sin_pi(double x)
9691 {
9692     int n;
9693
9694     /* spurious inexact if odd int */
9695     x = 2.0 * (x * 0.5 - floor(x * 0.5)); /* x mod 2.0 */
9696
9697     n = x * 4.0;
9698     n = (n + 1) / 2;
9699     x -= n * 0.5f;
9700     x *= M_PI;
9701
9702     switch (n) {
9703     default: /* case 4: */
9704     case 0: return __sin(x, 0.0, 0);
9705     case 1: return __cos(x, 0.0);
9706     case 2: return __sin(-x, 0.0, 0);
9707     case 3: return -__cos(x, 0.0);
9708     }
9709 }
9710
9711 /*********************************************************************
9712  *      lgamma (MSVCR120.@)
9713  *
9714  * Copied from musl: src/math/lgamma_r.c
9715  */
9716 double CDECL lgamma(double x)
9717 {
9718     static const double pi = 3.14159265358979311600e+00,
9719         a0 = 7.72156649015328655494e-02,
9720         a1 = 3.22467033424113591611e-01,
9721         a2 = 6.73523010531292681824e-02,
9722         a3 = 2.05808084325167332806e-02,
9723         a4 = 7.38555086081402883957e-03,
9724         a5 = 2.89051383673415629091e-03,
9725         a6 = 1.19270763183362067845e-03,
9726         a7 = 5.10069792153511336608e-04,
9727         a8 = 2.20862790713908385557e-04,
9728         a9 = 1.08011567247583939954e-04,
9729         a10 = 2.52144565451257326939e-05,
9730         a11 = 4.48640949618915160150e-05,
9731         tc = 1.46163214496836224576e+00,
9732         tf = -1.21486290535849611461e-01,
9733         tt = -3.63867699703950536541e-18,
9734         t0 = 4.83836122723810047042e-01,
9735         t1 = -1.47587722994593911752e-01,
9736         t2 = 6.46249402391333854778e-02,
9737         t3 = -3.27885410759859649565e-02,
9738         t4 = 1.79706750811820387126e-02,
9739         t5 = -1.03142241298341437450e-02,
9740         t6 = 6.10053870246291332635e-03,
9741         t7 = -3.68452016781138256760e-03,
9742         t8 = 2.25964780900612472250e-03,
9743         t9 = -1.40346469989232843813e-03,
9744         t10 = 8.81081882437654011382e-04,
9745         t11 = -5.38595305356740546715e-04,
9746         t12 = 3.15632070903625950361e-04,
9747         t13 = -3.12754168375120860518e-04,
9748         t14 = 3.35529192635519073543e-04,
9749         u0 = -7.72156649015328655494e-02,
9750         u1 = 6.32827064025093366517e-01,
9751         u2 = 1.45492250137234768737e+00,
9752         u3 = 9.77717527963372745603e-01,
9753         u4 = 2.28963728064692451092e-01,
9754         u5 = 1.33810918536787660377e-02,
9755         v1 = 2.45597793713041134822e+00,
9756         v2 = 2.12848976379893395361e+00,
9757         v3 = 7.69285150456672783825e-01,
9758         v4 = 1.04222645593369134254e-01,
9759         v5 = 3.21709242282423911810e-03,
9760         s0 = -7.72156649015328655494e-02,
9761         s1 = 2.14982415960608852501e-01,
9762         s2 = 3.25778796408930981787e-01,
9763         s3 = 1.46350472652464452805e-01,
9764         s4 = 2.66422703033638609560e-02,
9765         s5 = 1.84028451407337715652e-03,
9766         s6 = 3.19475326584100867617e-05,
9767         r1 = 1.39200533467621045958e+00,
9768         r2 = 7.21935547567138069525e-01,
9769         r3 = 1.71933865632803078993e-01,
9770         r4 = 1.86459191715652901344e-02,
9771         r5 = 7.77942496381893596434e-04,
9772         r6 = 7.32668430744625636189e-06,
9773         w0 = 4.18938533204672725052e-01,
9774         w1 = 8.33333333333329678849e-02,
9775         w2 = -2.77777777728775536470e-03,
9776         w3 = 7.93650558643019558500e-04,
9777         w4 = -5.95187557450339963135e-04,
9778         w5 = 8.36339918996282139126e-04,
9779         w6 = -1.63092934096575273989e-03;
9780
9781     union {double f; UINT64 i;} u = {x};
9782     double t, y, z, nadj, p, p1, p2, p3, q, r, w;
9783     UINT32 ix;
9784     int sign,i;
9785
9786     /* purge off +-inf, NaN, +-0, tiny and negative arguments */
9787     sign = u.i >> 63;
9788     ix = u.i >> 32 & 0x7fffffff;
9789     if (ix >= 0x7ff00000)
9790         return x * x;
9791     if (ix < (0x3ff - 70) << 20) { /* |x|<2**-70, return -log(|x|) */
9792         if(sign)
9793             x = -x;
9794         return -log(x);
9795     }
9796     if (sign) {
9797         x = -x;
9798         t = sin_pi(x);
9799         if (t == 0.0) { /* -integer */
9800             *_errno() = ERANGE;
9801             return 1.0 / (x - x);
9802         }
9803         if (t <= 0.0)
9804             t = -t;
9805         nadj = log(pi / (t * x));
9806     }
9807
9808     /* purge off 1 and 2 */
9809     if ((ix == 0x3ff00000 || ix == 0x40000000) && (UINT32)u.i == 0)
9810         r = 0;
9811     /* for x < 2.0 */
9812     else if (ix < 0x40000000) {
9813         if (ix <= 0x3feccccc) { /* lgamma(x) = lgamma(x+1)-log(x) */
9814             r = -log(x);
9815             if (ix >= 0x3FE76944) {
9816                 y = 1.0 - x;
9817                 i = 0;
9818             } else if (ix >= 0x3FCDA661) {
9819                 y = x - (tc - 1.0);
9820                 i = 1;
9821             } else {
9822                 y = x;
9823                 i = 2;
9824             }
9825         } else {
9826             r = 0.0;
9827             if (ix >= 0x3FFBB4C3) { /* [1.7316,2] */
9828                 y = 2.0 - x;
9829                 i = 0;
9830             } else if(ix >= 0x3FF3B4C4) { /* [1.23,1.73] */
9831                 y = x - tc;
9832                 i = 1;
9833             } else {
9834                 y = x - 1.0;
9835                 i = 2;
9836             }
9837         }
9838         switch (i) {
9839         case 0:
9840             z = y * y;
9841             p1 = a0 + z * (a2 + z * (a4 + z * (a6 + z * (a8 + z * a10))));
9842             p2 = z * (a1 + z * (a3 + z * (a5 + z * (a7 + z * (a9 + z * a11)))));
9843             p = y * p1 + p2;
9844             r += (p - 0.5 * y);
9845             break;
9846         case 1:
9847             z = y * y;
9848             w = z * y;
9849             p1 = t0 + w * (t3 + w * (t6 + w * (t9 + w * t12))); /* parallel comp */
9850             p2 = t1 + w * (t4 + w * (t7 + w * (t10 + w * t13)));
9851             p3 = t2 + w * (t5 + w * (t8 + w * (t11 + w * t14)));
9852             p = z * p1 - (tt - w * (p2 + y * p3));
9853             r += tf + p;
9854             break;
9855         case 2:
9856             p1 = y * (u0 + y * (u1 + y * (u2 + y * (u3 + y * (u4 + y * u5)))));
9857             p2 = 1.0 + y * (v1 + y * (v2 + y * (v3 + y * (v4 + y * v5))));
9858             r += -0.5 * y + p1 / p2;
9859         }
9860     } else if (ix < 0x40200000) { /* x < 8.0 */
9861         i = (int)x;
9862         y = x - (double)i;
9863         p = y * (s0 + y * (s1 + y * (s2 + y * (s3 + y * (s4 + y * (s5 + y * s6))))));
9864         q = 1.0 + y * (r1 + y * (r2 + y * (r3 + y * (r4 + y * (r5 + y * r6)))));
9865         r = 0.5 * y + p / q;
9866         z = 1.0; /* lgamma(1+s) = log(s) + lgamma(s) */
9867         switch (i) {
9868         case 7: z *= y + 6.0; /* fall through */
9869         case 6: z *= y + 5.0; /* fall through */
9870         case 5: z *= y + 4.0; /* fall through */
9871         case 4: z *= y + 3.0; /* fall through */
9872         case 3:
9873             z *= y + 2.0;
9874             r += log(z);
9875             break;
9876         }
9877     } else if (ix < 0x43900000) { /* 8.0 <= x < 2**58 */
9878         t = log(x);
9879         z = 1.0 / x;
9880         y = z * z;
9881         w = w0 + z * (w1 + y * (w2 + y * (w3 + y * (w4 + y * (w5 + y * w6)))));
9882         r = (x - 0.5) * (t - 1.0) + w;
9883     } else /* 2**58 <= x <= inf */
9884         r = x * (log(x) - 1.0);
9885     if (sign)
9886         r = nadj - r;
9887     return r;
9888 }
9889
9890 /* sin(pi*x) assuming x > 2^-100, if sin(pi*x)==0 the sign is arbitrary */
9891 static float sinf_pi(float x)
9892 {
9893     double y;
9894     int n;
9895
9896     /* spurious inexact if odd int */
9897     x = 2 * (x * 0.5f - floorf(x * 0.5f)); /* x mod 2.0 */
9898
9899     n = (int)(x * 4);
9900     n = (n + 1) / 2;
9901     y = x - n * 0.5f;
9902     y *= M_PI;
9903     switch (n) {
9904     default: /* case 4: */
9905     case 0: return __sindf(y);
9906     case 1: return __cosdf(y);
9907     case 2: return __sindf(-y);
9908     case 3: return -__cosdf(y);
9909     }
9910 }
9911
9912 /*********************************************************************
9913  *      lgammaf (MSVCR120.@)
9914  *
9915  * Copied from musl: src/math/lgammaf_r.c
9916  */
9917 float CDECL lgammaf(float x)
9918 {
9919     static const float pi = 3.1415927410e+00,
9920         a0 = 7.7215664089e-02,
9921         a1 = 3.2246702909e-01,
9922         a2 = 6.7352302372e-02,
9923         a3 = 2.0580807701e-02,
9924         a4 = 7.3855509982e-03,
9925         a5 = 2.8905137442e-03,
9926         a6 = 1.1927076848e-03,
9927         a7 = 5.1006977446e-04,
9928         a8 = 2.2086278477e-04,
9929         a9 = 1.0801156895e-04,
9930         a10 = 2.5214456400e-05,
9931         a11 = 4.4864096708e-05,
9932         tc = 1.4616321325e+00,
9933         tf = -1.2148628384e-01,
9934         tt = 6.6971006518e-09,
9935         t0 = 4.8383611441e-01,
9936         t1 = -1.4758771658e-01,
9937         t2 = 6.4624942839e-02,
9938         t3 = -3.2788541168e-02,
9939         t4 = 1.7970675603e-02,
9940         t5 = -1.0314224288e-02,
9941         t6 = 6.1005386524e-03,
9942         t7 = -3.6845202558e-03,
9943         t8 = 2.2596477065e-03,
9944         t9 = -1.4034647029e-03,
9945         t10 = 8.8108185446e-04,
9946         t11 = -5.3859531181e-04,
9947         t12 = 3.1563205994e-04,
9948         t13 = -3.1275415677e-04,
9949         t14 = 3.3552918467e-04,
9950         u0 = -7.7215664089e-02,
9951         u1 = 6.3282704353e-01,
9952         u2 = 1.4549225569e+00,
9953         u3 = 9.7771751881e-01,
9954         u4 = 2.2896373272e-01,
9955         u5 = 1.3381091878e-02,
9956         v1 = 2.4559779167e+00,
9957         v2 = 2.1284897327e+00,
9958         v3 = 7.6928514242e-01,
9959         v4 = 1.0422264785e-01,
9960         v5 = 3.2170924824e-03,
9961         s0 = -7.7215664089e-02,
9962         s1 = 2.1498242021e-01,
9963         s2 = 3.2577878237e-01,
9964         s3 = 1.4635047317e-01,
9965         s4 = 2.6642270386e-02,
9966         s5 = 1.8402845599e-03,
9967         s6 = 3.1947532989e-05,
9968         r1 = 1.3920053244e+00,
9969         r2 = 7.2193557024e-01,
9970         r3 = 1.7193385959e-01,
9971         r4 = 1.8645919859e-02,
9972         r5 = 7.7794247773e-04,
9973         r6 = 7.3266842264e-06,
9974         w0 = 4.1893854737e-01,
9975         w1 = 8.3333335817e-02,
9976         w2 = -2.7777778450e-03,
9977         w3 = 7.9365057172e-04,
9978         w4 = -5.9518753551e-04,
9979         w5 = 8.3633989561e-04,
9980         w6 = -1.6309292987e-03;
9981
9982     union {float f; UINT32 i;} u = {x};
9983     float t, y, z, nadj, p, p1, p2, p3, q, r, w;
9984     UINT32 ix;
9985     int i, sign;
9986
9987     /* purge off +-inf, NaN, +-0, tiny and negative arguments */
9988     sign = u.i >> 31;
9989     ix = u.i & 0x7fffffff;
9990     if (ix >= 0x7f800000)
9991         return x * x;
9992     if (ix < 0x35000000) { /* |x| < 2**-21, return -log(|x|) */
9993         if (sign)
9994             x = -x;
9995         return -logf(x);
9996     }
9997     if (sign) {
9998         x = -x;
9999         t = sinf_pi(x);
10000         if (t == 0.0f) { /* -integer */
10001             *_errno() = ERANGE;
10002             return 1.0f / (x - x);
10003         }
10004         if (t <= 0.0f)
10005             t = -t;
10006         nadj = logf(pi / (t * x));
10007     }
10008
10009     /* purge off 1 and 2 */
10010     if (ix == 0x3f800000 || ix == 0x40000000)
10011         r = 0;
10012     /* for x < 2.0 */
10013     else if (ix < 0x40000000) {
10014         if (ix <= 0x3f666666) { /* lgamma(x) = lgamma(x+1)-log(x) */
10015             r = -logf(x);
10016             if (ix >= 0x3f3b4a20) {
10017                 y = 1.0f - x;
10018                 i = 0;
10019             } else if (ix >= 0x3e6d3308) {
10020                 y = x - (tc - 1.0f);
10021                 i = 1;
10022             } else {
10023                 y = x;
10024                 i = 2;
10025             }
10026         } else {
10027             r = 0.0f;
10028             if (ix >= 0x3fdda618) { /* [1.7316,2] */
10029                 y = 2.0f - x;
10030                 i = 0;
10031             } else if (ix >= 0x3F9da620) { /* [1.23,1.73] */
10032                 y = x - tc;
10033                 i = 1;
10034             } else {
10035                 y = x - 1.0f;
10036                 i = 2;
10037             }
10038         }
10039         switch(i) {
10040         case 0:
10041             z = y * y;
10042             p1 = a0 + z * (a2 + z * (a4 + z * (a6 + z * (a8 + z * a10))));
10043             p2 = z * (a1 + z * (a3 + z * (a5 + z * (a7 + z * (a9 + z * a11)))));
10044             p = y * p1 + p2;
10045             r += p - 0.5f * y;
10046             break;
10047         case 1:
10048             z = y * y;
10049             w = z * y;
10050             p1 = t0 + w * (t3 + w * (t6 + w * (t9 + w * t12))); /* parallel comp */
10051             p2 = t1 + w * (t4 + w * (t7 + w * (t10 + w * t13)));
10052             p3 = t2 + w * (t5 + w * (t8 + w * (t11 + w * t14)));
10053             p = z * p1 - (tt - w * (p2 + y * p3));
10054             r += (tf + p);
10055             break;
10056         case 2:
10057             p1 = y * (u0 + y * (u1 + y * (u2 + y * (u3 + y * (u4 + y * u5)))));
10058             p2 = 1.0f + y * (v1 + y * (v2 + y * (v3 + y * (v4 + y * v5))));
10059             r += -0.5f * y + p1 / p2;
10060         }
10061     } else if (ix < 0x41000000) { /* x < 8.0 */
10062         i = (int)x;
10063         y = x - (float)i;
10064         p = y * (s0 + y * (s1 + y * (s2 + y * (s3 + y * (s4 + y * (s5 + y * s6))))));
10065         q = 1.0f + y * (r1 + y * (r2 + y * (r3 + y * (r4 + y * (r5 + y * r6)))));
10066         r = 0.5f * y + p / q;
10067         z = 1.0f; /* lgamma(1+s) = log(s) + lgamma(s) */
10068         switch (i) {
10069         case 7: z *= y + 6.0f; /* fall through */
10070         case 6: z *= y + 5.0f; /* fall through */
10071         case 5: z *= y + 4.0f; /* fall through */
10072         case 4: z *= y + 3.0f; /* fall through */
10073         case 3:
10074             z *= y + 2.0f;
10075             r += logf(z);
10076             break;
10077         }
10078     } else if (ix < 0x5c800000) { /* 8.0 <= x < 2**58 */
10079         t = logf(x);
10080         z = 1.0f / x;
10081         y = z * z;
10082         w = w0 + z * (w1 + y * (w2 + y * (w3 + y * (w4 + y * (w5 + y * w6)))));
10083         r = (x - 0.5f) * (t - 1.0f) + w;
10084     } else /* 2**58 <= x <= inf */
10085         r = x * (logf(x) - 1.0f);
10086     if (sign)
10087         r = nadj - r;
10088     return r;
10089 }
10090
10091 static double tgamma_S(double x)
10092 {
10093     static const double Snum[] = {
10094         23531376880.410759688572007674451636754734846804940,
10095         42919803642.649098768957899047001988850926355848959,
10096         35711959237.355668049440185451547166705960488635843,
10097         17921034426.037209699919755754458931112671403265390,
10098         6039542586.3520280050642916443072979210699388420708,
10099         1439720407.3117216736632230727949123939715485786772,
10100         248874557.86205415651146038641322942321632125127801,
10101         31426415.585400194380614231628318205362874684987640,
10102         2876370.6289353724412254090516208496135991145378768,
10103         186056.26539522349504029498971604569928220784236328,
10104         8071.6720023658162106380029022722506138218516325024,
10105         210.82427775157934587250973392071336271166969580291,
10106         2.5066282746310002701649081771338373386264310793408,
10107     };
10108     static const double Sden[] = {
10109         0, 39916800, 120543840, 150917976, 105258076, 45995730, 13339535,
10110         2637558, 357423, 32670, 1925, 66, 1,
10111     };
10112
10113     double num = 0, den = 0;
10114     int i;
10115
10116     /* to avoid overflow handle large x differently */
10117     if (x < 8)
10118         for (i = ARRAY_SIZE(Snum) - 1; i >= 0; i--) {
10119             num = num * x + Snum[i];
10120             den = den * x + Sden[i];
10121         }
10122     else
10123         for (i = 0; i < ARRAY_SIZE(Snum); i++) {
10124             num = num / x + Snum[i];
10125             den = den / x + Sden[i];
10126         }
10127     return num / den;
10128 }
10129
10130 /*********************************************************************
10131  *      tgamma (MSVCR120.@)
10132  *
10133  * Copied from musl: src/math/tgamma.c
10134  */
10135 double CDECL tgamma(double x)
10136 {
10137     static const double gmhalf = 5.524680040776729583740234375;
10138     static const double fact[] = {
10139         1, 1, 2, 6, 24, 120, 720, 5040.0, 40320.0, 362880.0, 3628800.0, 39916800.0,
10140         479001600.0, 6227020800.0, 87178291200.0, 1307674368000.0, 20922789888000.0,
10141         355687428096000.0, 6402373705728000.0, 121645100408832000.0,
10142         2432902008176640000.0, 51090942171709440000.0, 1124000727777607680000.0,
10143     };
10144
10145     union {double f; UINT64 i;} u = {x};
10146     double absx, y, dy, z, r;
10147     UINT32 ix = u.i >> 32 & 0x7fffffff;
10148     int sign = u.i >> 63;
10149
10150     /* special cases */
10151     if (ix >= 0x7ff00000) {
10152         /* tgamma(nan)=nan, tgamma(inf)=inf, tgamma(-inf)=nan with invalid */
10153         if (u.i == 0xfff0000000000000ULL)
10154             *_errno() = EDOM;
10155         return x + INFINITY;
10156     }
10157     if (ix < (0x3ff - 54) << 20) {
10158         /* |x| < 2^-54: tgamma(x) ~ 1/x, +-0 raises div-by-zero */
10159         if (x == 0.0)
10160             *_errno() = ERANGE;
10161         return 1 / x;
10162     }
10163
10164     /* integer arguments */
10165     /* raise inexact when non-integer */
10166     if (x == floor(x)) {
10167         if (sign) {
10168             *_errno() = EDOM;
10169             return 0 / (x - x);
10170         }
10171         if (x <= ARRAY_SIZE(fact))
10172             return fact[(int)x - 1];
10173     }
10174
10175     /* x >= 172: tgamma(x)=inf with overflow */
10176     /* x =< -184: tgamma(x)=+-0 with underflow */
10177     if (ix >= 0x40670000) { /* |x| >= 184 */
10178         *_errno() = ERANGE;
10179         if (sign) {
10180             fp_barrierf(0x1p-126 / x);
10181             return 0;
10182         }
10183         x *= 0x1p1023;
10184         return x;
10185     }
10186
10187     absx = sign ? -x : x;
10188
10189     /* handle the error of x + g - 0.5 */
10190     y = absx + gmhalf;
10191     if (absx > gmhalf) {
10192         dy = y - absx;
10193         dy -= gmhalf;
10194     } else {
10195         dy = y - gmhalf;
10196         dy -= absx;
10197     }
10198
10199     z = absx - 0.5;
10200     r = tgamma_S(absx) * exp(-y);
10201     if (x < 0) {
10202         /* reflection formula for negative x */
10203         /* sinpi(absx) is not 0, integers are already handled */
10204         r = -M_PI / (sin_pi(absx) * absx * r);
10205         dy = -dy;
10206         z = -z;
10207     }
10208     r += dy * (gmhalf + 0.5) * r / y;
10209     z = pow(y, 0.5 * z);
10210     y = r * z * z;
10211     return y;
10212 }
10213
10214 /*********************************************************************
10215  *      tgammaf (MSVCR120.@)
10216  *
10217  * Copied from musl: src/math/tgammaf.c
10218  */
10219 float CDECL tgammaf(float x)
10220 {
10221     return tgamma(x);
10222 }
10223
10224 /*********************************************************************
10225  *      nan (MSVCR120.@)
10226  */
10227 double CDECL nan(const char *tagp)
10228 {
10229     /* Windows ignores input (MSDN) */
10230     return NAN;
10231 }
10232
10233 /*********************************************************************
10234  *      nanf (MSVCR120.@)
10235  */
10236 float CDECL nanf(const char *tagp)
10237 {
10238     return NAN;
10239 }
10240
10241 /*********************************************************************
10242  *      _except1 (MSVCR120.@)
10243  *  TODO:
10244  *   - find meaning of ignored cw and operation bits
10245  *   - unk parameter
10246  */
10247 double CDECL _except1(DWORD fpe, _FP_OPERATION_CODE op, double arg, double res, DWORD cw, void *unk)
10248 {
10249     ULONG_PTR exception_arg;
10250     DWORD exception = 0;
10251     DWORD fpword = 0;
10252     WORD operation;
10253     int raise = 0;
10254
10255     TRACE("(%x %x %lf %lf %x %p)\n", fpe, op, arg, res, cw, unk);
10256
10257 #ifdef _WIN64
10258     cw = ((cw >> 7) & 0x3f) | ((cw >> 3) & 0xc00);
10259 #endif
10260     operation = op << 5;
10261     exception_arg = (ULONG_PTR)&operation;
10262
10263     if (fpe & 0x1) { /* overflow */
10264         if ((fpe == 0x1 && (cw & 0x8)) || (fpe==0x11 && (cw & 0x28))) {
10265             /* 32-bit version also sets SW_INEXACT here */
10266             raise |= FE_OVERFLOW;
10267             if (fpe & 0x10) raise |= FE_INEXACT;
10268             res = signbit(res) ? -INFINITY : INFINITY;
10269         } else {
10270             exception = EXCEPTION_FLT_OVERFLOW;
10271         }
10272     } else if (fpe & 0x2) { /* underflow */
10273         if ((fpe == 0x2 && (cw & 0x10)) || (fpe==0x12 && (cw & 0x30))) {
10274             raise |= FE_UNDERFLOW;
10275             if (fpe & 0x10) raise |= FE_INEXACT;
10276             res = signbit(res) ? -0.0 : 0.0;
10277         } else {
10278             exception = EXCEPTION_FLT_UNDERFLOW;
10279         }
10280     } else if (fpe & 0x4) { /* zerodivide */
10281         if ((fpe == 0x4 && (cw & 0x4)) || (fpe==0x14 && (cw & 0x24))) {
10282             raise |= FE_DIVBYZERO;
10283             if (fpe & 0x10) raise |= FE_INEXACT;
10284         } else {
10285             exception = EXCEPTION_FLT_DIVIDE_BY_ZERO;
10286         }
10287     } else if (fpe & 0x8) { /* invalid */
10288         if (fpe == 0x8 && (cw & 0x1)) {
10289             raise |= FE_INVALID;
10290         } else {
10291             exception = EXCEPTION_FLT_INVALID_OPERATION;
10292         }
10293     } else if (fpe & 0x10) { /* inexact */
10294         if (fpe == 0x10 && (cw & 0x20)) {
10295             raise |= FE_INEXACT;
10296         } else {
10297             exception = EXCEPTION_FLT_INEXACT_RESULT;
10298         }
10299     }
10300
10301     if (exception)
10302         raise = 0;
10303     feraiseexcept(raise);
10304     if (exception)
10305         RaiseException(exception, 0, 1, &exception_arg);
10306
10307     if (cw & 0x1) fpword |= _EM_INVALID;
10308     if (cw & 0x2) fpword |= _EM_DENORMAL;
10309     if (cw & 0x4) fpword |= _EM_ZERODIVIDE;
10310     if (cw & 0x8) fpword |= _EM_OVERFLOW;
10311     if (cw & 0x10) fpword |= _EM_UNDERFLOW;
10312     if (cw & 0x20) fpword |= _EM_INEXACT;
10313     switch (cw & 0xc00)
10314     {
10315         case 0xc00: fpword |= _RC_UP|_RC_DOWN; break;
10316         case 0x800: fpword |= _RC_UP; break;
10317         case 0x400: fpword |= _RC_DOWN; break;
10318     }
10319     switch (cw & 0x300)
10320     {
10321         case 0x0:   fpword |= _PC_24; break;
10322         case 0x200: fpword |= _PC_53; break;
10323         case 0x300: fpword |= _PC_64; break;
10324     }
10325     if (cw & 0x1000) fpword |= _IC_AFFINE;
10326     _control87(fpword, 0xffffffff);
10327
10328     return res;
10329 }
10330
10331 _Dcomplex* CDECL _Cbuild(_Dcomplex *ret, double r, double i)
10332 {
10333     ret->_Val[0] = r;
10334     ret->_Val[1] = i;
10335     return ret;
10336 }
10337
10338 double CDECL MSVCR120_creal(_Dcomplex z)
10339 {
10340     return z._Val[0];
10341 }
10342
10343 /*********************************************************************
10344  *      ilogb (MSVCR120.@)
10345  */
10346 int CDECL ilogb(double x)
10347 {
10348     return __ilogb(x);
10349 }
10350
10351 /*********************************************************************
10352  *      ilogbf (MSVCR120.@)
10353  */
10354 int CDECL ilogbf(float x)
10355 {
10356     return __ilogbf(x);
10357 }
10358 #endif /* _MSVCR_VER>=120 */