dlls/msvcrt/math.c

   1 /*
   2  * msvcrt.dll math functions
   3  *
   4  * Copyright 2000 Jon Griffiths
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
  19  *
  20  *
  21  * For functions copied from musl libc (http://musl.libc.org/):
  22  * ====================================================
  23  * Copyright 2005-2020 Rich Felker, et al.
  24  *
  25  * Permission is hereby granted, free of charge, to any person obtaining
  26  * a copy of this software and associated documentation files (the
  27  * "Software"), to deal in the Software without restriction, including
  28  * without limitation the rights to use, copy, modify, merge, publish,
  29  * distribute, sublicense, and/or sell copies of the Software, and to
  30  * permit persons to whom the Software is furnished to do so, subject to
  31  * the following conditions:
  32  *
  33  * The above copyright notice and this permission notice shall be
  34  * included in all copies or substantial portions of the Software.
  35  * ====================================================
  36  */
  37
  38 #include <complex.h>
  39 #include <stdio.h>
  40 #include <fenv.h>
  41 #include <fpieee.h>
  42 #include <limits.h>
  43 #include <locale.h>
  44 #include <math.h>
  45
  46 #include "msvcrt.h"
  47 #include "winternl.h"
  48
  49 #include "wine/asm.h"
  50 #include "wine/debug.h"
  51
  52 WINE_DEFAULT_DEBUG_CHANNEL(msvcrt);
  53
  54 #undef div
  55 #undef ldiv
  56
  57 #define _DOMAIN         1       /* domain error in argument */
  58 #define _SING           2       /* singularity */
  59 #define _OVERFLOW       3       /* range overflow */
  60 #define _UNDERFLOW      4       /* range underflow */
  61
  62 typedef int (CDECL *MSVCRT_matherr_func)(struct _exception *);
  63
  64 static MSVCRT_matherr_func MSVCRT_default_matherr_func = NULL;
  65
  66 BOOL sse2_supported;
  67 static BOOL sse2_enabled;
  68
  69 static const struct unix_funcs *unix_funcs;
  70
  71 void msvcrt_init_math( void *module )
  72 {
  73     sse2_supported = IsProcessorFeaturePresent( PF_XMMI64_INSTRUCTIONS_AVAILABLE );
  74 #if _MSVCR_VER <=71
  75     sse2_enabled = FALSE;
  76 #else
  77     sse2_enabled = sse2_supported;
  78 #endif
  79     __wine_init_unix_lib( module, DLL_PROCESS_ATTACH, NULL, &unix_funcs );
  80 }
  81
  82 /* Copied from musl: src/internal/libm.h */
  83 static inline float fp_barrierf(float x)
  84 {
  85     volatile float y = x;
  86     return y;
  87 }
  88
  89 static inline double fp_barrier(double x)
  90 {
  91     volatile double y = x;
  92     return y;
  93 }
  94
  95 static inline double CDECL ret_nan( BOOL update_sw )
  96 {
  97     double x = 1.0;
  98     if (!update_sw) return -NAN;
  99     return (x - x) / (x - x);
 100 }
 101
 102 #define SET_X87_CW(MASK) \
 103     "subl $4, %esp\n\t" \
 104     __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t") \
 105     "fnstcw (%esp)\n\t" \
 106     "movw (%esp), %ax\n\t" \
 107     "movw %ax, 2(%esp)\n\t" \
 108     "testw $" #MASK ", %ax\n\t" \
 109     "jz 1f\n\t" \
 110     "andw $~" #MASK ", %ax\n\t" \
 111     "movw %ax, 2(%esp)\n\t" \
 112     "fldcw 2(%esp)\n\t" \
 113     "1:\n\t"
 114
 115 #define RESET_X87_CW \
 116     "movw (%esp), %ax\n\t" \
 117     "cmpw %ax, 2(%esp)\n\t" \
 118     "je 1f\n\t" \
 119     "fstpl 8(%esp)\n\t" \
 120     "fldcw (%esp)\n\t" \
 121     "fldl 8(%esp)\n\t" \
 122     "fwait\n\t" \
 123     "1:\n\t" \
 124     "addl $4, %esp\n\t" \
 125     __ASM_CFI(".cfi_adjust_cfa_offset -4\n\t")
 126
 127 /*********************************************************************
 128  *      _matherr (CRTDLL.@)
 129  */
 130 int CDECL _matherr(struct _exception *e)
 131 {
 132     return 0;
 133 }
 134
 135
 136 static double math_error(int type, const char *name, double arg1, double arg2, double retval)
 137 {
 138     struct _exception exception = {type, (char *)name, arg1, arg2, retval};
 139
 140     TRACE("(%d, %s, %g, %g, %g)\n", type, debugstr_a(name), arg1, arg2, retval);
 141
 142     if (MSVCRT_default_matherr_func && MSVCRT_default_matherr_func(&exception))
 143         return exception.retval;
 144
 145     switch (type)
 146     {
 147     case 0:
 148         /* don't set errno */
 149         break;
 150     case _DOMAIN:
 151         *_errno() = EDOM;
 152         break;
 153     case _SING:
 154     case _OVERFLOW:
 155         *_errno() = ERANGE;
 156         break;
 157     case _UNDERFLOW:
 158         /* don't set errno */
 159         break;
 160     default:
 161         ERR("Unhandled math error!\n");
 162     }
 163
 164     return exception.retval;
 165 }
 166
 167 /*********************************************************************
 168  *      __setusermatherr (MSVCRT.@)
 169  */
 170 void CDECL __setusermatherr(MSVCRT_matherr_func func)
 171 {
 172     MSVCRT_default_matherr_func = func;
 173     TRACE("new matherr handler %p\n", func);
 174 }
 175
 176 /*********************************************************************
 177  *      _set_SSE2_enable (MSVCRT.@)
 178  */
 179 int CDECL _set_SSE2_enable(int flag)
 180 {
 181     sse2_enabled = flag && sse2_supported;
 182     return sse2_enabled;
 183 }
 184
 185 #if defined(_WIN64)
 186 # if _MSVCR_VER>=140
 187 /*********************************************************************
 188  *      _get_FMA3_enable (UCRTBASE.@)
 189  */
 190 int CDECL _get_FMA3_enable(void)
 191 {
 192     FIXME("() stub\n");
 193     return 0;
 194 }
 195 # endif
 196
 197 # if _MSVCR_VER>=120
 198 /*********************************************************************
 199  *      _set_FMA3_enable (MSVCR120.@)
 200  */
 201 int CDECL _set_FMA3_enable(int flag)
 202 {
 203     FIXME("(%x) stub\n", flag);
 204     return 0;
 205 }
 206 # endif
 207 #endif
 208
 209 #if !defined(__i386__) || _MSVCR_VER>=120
 210
 211 /*********************************************************************
 212  *      _chgsignf (MSVCRT.@)
 213  */
 214 float CDECL _chgsignf( float num )
 215 {
 216     union { float f; UINT32 i; } u = { num };
 217     u.i ^= 0x80000000;
 218     return u.f;
 219 }
 220
 221 /*********************************************************************
 222  *      _copysignf (MSVCRT.@)
 223  *
 224  * Copied from musl: src/math/copysignf.c
 225  */
 226 float CDECL _copysignf( float x, float y )
 227 {
 228     union { float f; UINT32 i; } ux = { x }, uy = { y };
 229     ux.i &= 0x7fffffff;
 230     ux.i |= uy.i & 0x80000000;
 231     return ux.f;
 232 }
 233
 234 /*********************************************************************
 235  *      _nextafterf (MSVCRT.@)
 236  *
 237  * Copied from musl: src/math/nextafterf.c
 238  */
 239 float CDECL _nextafterf( float x, float y )
 240 {
 241     unsigned int ix = *(unsigned int*)&x;
 242     unsigned int iy = *(unsigned int*)&y;
 243     unsigned int ax, ay, e;
 244
 245     if (isnan(x) || isnan(y))
 246         return x + y;
 247     if (x == y) {
 248         if (_fpclassf(y) & (_FPCLASS_ND | _FPCLASS_PD | _FPCLASS_NZ | _FPCLASS_PZ ))
 249             *_errno() = ERANGE;
 250         return y;
 251     }
 252     ax = ix & 0x7fffffff;
 253     ay = iy & 0x7fffffff;
 254     if (ax == 0) {
 255         if (ay == 0)
 256             return y;
 257         ix = (iy & 0x80000000) | 1;
 258     } else if (ax > ay || ((ix ^ iy) & 0x80000000))
 259         ix--;
 260     else
 261         ix++;
 262     e = ix & 0x7f800000;
 263     /* raise overflow if ix is infinite and x is finite */
 264     if (e == 0x7f800000) {
 265         fp_barrierf(x + x);
 266         *_errno() = ERANGE;
 267     }
 268     /* raise underflow if ix is subnormal or zero */
 269     y = *(float*)&ix;
 270     if (e == 0) {
 271         fp_barrierf(x * x + y * y);
 272         *_errno() = ERANGE;
 273     }
 274     return y;
 275 }
 276
 277 /* Copied from musl: src/math/ilogbf.c */
 278 static int __ilogbf(float x)
 279 {
 280     union { float f; UINT32 i; } u = { x };
 281     int e = u.i >> 23 & 0xff;
 282
 283     if (!e)
 284     {
 285         u.i <<= 9;
 286         if (u.i == 0) return FP_ILOGB0;
 287         /* subnormal x */
 288         for (e = -0x7f; u.i >> 31 == 0; e--, u.i <<= 1);
 289         return e;
 290     }
 291     if (e == 0xff) return u.i << 9 ? FP_ILOGBNAN : INT_MAX;
 292     return e - 0x7f;
 293 }
 294
 295 /*********************************************************************
 296  *      _logbf (MSVCRT.@)
 297  *
 298  * Copied from musl: src/math/logbf.c
 299  */
 300 float CDECL _logbf(float x)
 301 {
 302     if (!isfinite(x))
 303         return x * x;
 304     if (x == 0) {
 305         *_errno() = ERANGE;
 306         return -1 / (x * x);
 307     }
 308     return __ilogbf(x);
 309 }
 310
 311 #endif
 312
 313 /* Copied from musl: src/math/scalbn.c */
 314 static double __scalbn(double x, int n)
 315 {
 316     union {double f; UINT64 i;} u;
 317     double y = x;
 318
 319     if (n > 1023) {
 320         y *= 0x1p1023;
 321         n -= 1023;
 322         if (n > 1023) {
 323             y *= 0x1p1023;
 324             n -= 1023;
 325             if (n > 1023)
 326                 n = 1023;
 327         }
 328     } else if (n < -1022) {
 329         /* make sure final n < -53 to avoid double
 330            rounding in the subnormal range */
 331         y *= 0x1p-1022 * 0x1p53;
 332         n += 1022 - 53;
 333         if (n < -1022) {
 334             y *= 0x1p-1022 * 0x1p53;
 335             n += 1022 - 53;
 336             if (n < -1022)
 337                 n = -1022;
 338         }
 339     }
 340     u.i = (UINT64)(0x3ff + n) << 52;
 341     x = y * u.f;
 342     return x;
 343 }
 344
 345 /* Copied from musl: src/math/__rem_pio2_large.c */
 346 static int __rem_pio2_large(double *x, double *y, int e0, int nx, int prec)
 347 {
 348     static const int init_jk[] = {3, 4};
 349     static const INT32 ipio2[] = {
 350         0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
 351         0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
 352         0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
 353         0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41,
 354         0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8,
 355         0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,
 356         0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
 357         0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08,
 358         0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,
 359         0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,
 360         0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
 361     };
 362     static const double PIo2[] = {
 363         1.57079625129699707031e+00,
 364         7.54978941586159635335e-08,
 365         5.39030252995776476554e-15,
 366         3.28200341580791294123e-22,
 367         1.27065575308067607349e-29,
 368         1.22933308981111328932e-36,
 369         2.73370053816464559624e-44,
 370         2.16741683877804819444e-51,
 371     };
 372
 373     INT32 jz, jx, jv, jp, jk, carry, n, iq[20], i, j, k, m, q0, ih;
 374     double z, fw, f[20], fq[20] = {0}, q[20];
 375
 376     /* initialize jk*/
 377     jk = init_jk[prec];
 378     jp = jk;
 379
 380     /* determine jx,jv,q0, note that 3>q0 */
 381     jx = nx - 1;
 382     jv = (e0 - 3) / 24;
 383     if(jv < 0) jv = 0;
 384     q0 = e0 - 24 * (jv + 1);
 385
 386     /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
 387     j = jv - jx;
 388     m = jx + jk;
 389     for (i = 0; i <= m; i++, j++)
 390         f[i] = j < 0 ? 0.0 : (double)ipio2[j];
 391
 392     /* compute q[0],q[1],...q[jk] */
 393     for (i = 0; i <= jk; i++) {
 394         for (j = 0, fw = 0.0; j <= jx; j++)
 395             fw += x[j] * f[jx + i - j];
 396         q[i] = fw;
 397     }
 398
 399     jz = jk;
 400 recompute:
 401     /* distill q[] into iq[] reversingly */
 402     for (i = 0, j = jz, z = q[jz]; j > 0; i++, j--) {
 403         fw = (double)(INT32)(0x1p-24 * z);
 404         iq[i] = (INT32)(z - 0x1p24 * fw);
 405         z = q[j - 1] + fw;
 406     }
 407
 408     /* compute n */
 409     z = __scalbn(z, q0); /* actual value of z */
 410     z -= 8.0 * floor(z * 0.125); /* trim off integer >= 8 */
 411     n = (INT32)z;
 412     z -= (double)n;
 413     ih = 0;
 414     if (q0 > 0) {  /* need iq[jz-1] to determine n */
 415         i = iq[jz - 1] >> (24 - q0);
 416         n += i;
 417         iq[jz - 1] -= i << (24 - q0);
 418         ih = iq[jz - 1] >> (23 - q0);
 419     }
 420     else if (q0 == 0) ih = iq[jz - 1] >> 23;
 421     else if (z >= 0.5) ih = 2;
 422
 423     if (ih > 0) {  /* q > 0.5 */
 424         n += 1;
 425         carry = 0;
 426         for (i = 0; i < jz; i++) {  /* compute 1-q */
 427             j = iq[i];
 428             if (carry == 0) {
 429                 if (j != 0) {
 430                     carry = 1;
 431                     iq[i] = 0x1000000 - j;
 432                 }
 433             } else
 434                 iq[i] = 0xffffff - j;
 435         }
 436         if (q0 > 0) {  /* rare case: chance is 1 in 12 */
 437             switch(q0) {
 438             case 1:
 439                 iq[jz - 1] &= 0x7fffff;
 440                 break;
 441             case 2:
 442                 iq[jz - 1] &= 0x3fffff;
 443                 break;
 444             }
 445         }
 446         if (ih == 2) {
 447             z = 1.0 - z;
 448             if (carry != 0)
 449                 z -= __scalbn(1.0, q0);
 450         }
 451     }
 452
 453     /* check if recomputation is needed */
 454     if (z == 0.0) {
 455         j = 0;
 456         for (i = jz - 1; i >= jk; i--) j |= iq[i];
 457         if (j == 0) {  /* need recomputation */
 458             for (k = 1; iq[jk - k] == 0; k++);  /* k = no. of terms needed */
 459
 460             for (i = jz + 1; i <= jz + k; i++) {  /* add q[jz+1] to q[jz+k] */
 461                 f[jx + i] = (double)ipio2[jv + i];
 462                 for (j = 0, fw = 0.0; j <= jx; j++)
 463                     fw += x[j] * f[jx + i - j];
 464                 q[i] = fw;
 465             }
 466             jz += k;
 467             goto recompute;
 468         }
 469     }
 470
 471     /* chop off zero terms */
 472     if (z == 0.0) {
 473         jz -= 1;
 474         q0 -= 24;
 475         while (iq[jz] == 0) {
 476             jz--;
 477             q0 -= 24;
 478         }
 479     } else { /* break z into 24-bit if necessary */
 480         z = __scalbn(z, -q0);
 481         if (z >= 0x1p24) {
 482             fw = (double)(INT32)(0x1p-24 * z);
 483             iq[jz] = (INT32)(z - 0x1p24 * fw);
 484             jz += 1;
 485             q0 += 24;
 486             iq[jz] = (INT32)fw;
 487         } else
 488             iq[jz] = (INT32)z;
 489     }
 490
 491     /* convert integer "bit" chunk to floating-point value */
 492     fw = __scalbn(1.0, q0);
 493     for (i = jz; i >= 0; i--) {
 494         q[i] = fw * (double)iq[i];
 495         fw *= 0x1p-24;
 496     }
 497
 498     /* compute PIo2[0,...,jp]*q[jz,...,0] */
 499     for(i = jz; i >= 0; i--) {
 500         for (fw = 0.0, k = 0; k <= jp && k <= jz - i; k++)
 501             fw += PIo2[k] * q[i + k];
 502         fq[jz - i] = fw;
 503     }
 504
 505     /* compress fq[] into y[] */
 506     switch(prec) {
 507     case 0:
 508         fw = 0.0;
 509         for (i = jz; i >= 0; i--)
 510             fw += fq[i];
 511         y[0] = ih == 0 ? fw : -fw;
 512         break;
 513     case 1:
 514     case 2:
 515         fw = 0.0;
 516         for (i = jz; i >= 0; i--)
 517             fw += fq[i];
 518         fw = (double)fw;
 519         y[0] = ih==0 ? fw : -fw;
 520         fw = fq[0] - fw;
 521         for (i = 1; i <= jz; i++)
 522             fw += fq[i];
 523         y[1] = ih == 0 ? fw : -fw;
 524         break;
 525     case 3:  /* painful */
 526         for (i = jz; i > 0; i--) {
 527             fw = fq[i - 1] + fq[i];
 528             fq[i] += fq[i - 1] - fw;
 529             fq[i - 1] = fw;
 530         }
 531         for (i = jz; i > 1; i--) {
 532             fw = fq[i - 1] + fq[i];
 533             fq[i] += fq[i - 1] - fw;
 534             fq[i - 1] = fw;
 535         }
 536         for (fw = 0.0, i = jz; i >= 2; i--)
 537             fw += fq[i];
 538         if (ih == 0) {
 539             y[0] = fq[0];
 540             y[1] = fq[1];
 541             y[2] = fw;
 542         } else {
 543             y[0] = -fq[0];
 544             y[1] = -fq[1];
 545             y[2] = -fw;
 546         }
 547     }
 548     return n & 7;
 549 }
 550
 551 /* Based on musl implementation: src/math/round.c */
 552 static double __round(double x)
 553 {
 554     ULONGLONG llx = *(ULONGLONG*)&x, tmp;
 555     int e = (llx >> 52 & 0x7ff) - 0x3ff;
 556
 557     if (e >= 52)
 558         return x;
 559     if (e < -1)
 560         return 0 * x;
 561     else if (e == -1)
 562         return signbit(x) ? -1 : 1;
 563
 564     tmp = 0x000fffffffffffffULL >> e;
 565     if (!(llx & tmp))
 566         return x;
 567     llx += 0x0008000000000000ULL >> e;
 568     llx &= ~tmp;
 569     return *(double*)&llx;
 570 }
 571
 572 #if !defined(__i386__) || _MSVCR_VER >= 120
 573 /* Copied from musl: src/math/expm1f.c */
 574 static float __expm1f(float x)
 575 {
 576     static const float ln2_hi = 6.9313812256e-01,
 577         ln2_lo = 9.0580006145e-06,
 578         invln2 = 1.4426950216e+00,
 579         Q1 = -3.3333212137e-2,
 580         Q2 = 1.5807170421e-3;
 581
 582     float y, hi, lo, c, t, e, hxs, hfx, r1, twopk;
 583     union {float f; UINT32 i;} u = {x};
 584     UINT32 hx = u.i & 0x7fffffff;
 585     int k, sign = u.i >> 31;
 586
 587     /* filter out huge and non-finite argument */
 588     if (hx >= 0x4195b844) { /* if |x|>=27*ln2 */
 589         if (hx >= 0x7f800000) /* NaN */
 590             return u.i == 0xff800000 ? -1 : x;
 591         if (sign)
 592             return math_error(_UNDERFLOW, "exp", x, 0, -1);
 593         if (hx > 0x42b17217) /* x > log(FLT_MAX) */
 594             return math_error(_OVERFLOW, "exp", x, 0, fp_barrierf(x * FLT_MAX));
 595     }
 596
 597     /* argument reduction */
 598     if (hx > 0x3eb17218) { /* if |x| > 0.5 ln2 */
 599         if (hx < 0x3F851592) { /* and |x| < 1.5 ln2 */
 600             if (!sign) {
 601                 hi = x - ln2_hi;
 602                 lo = ln2_lo;
 603                 k = 1;
 604             } else {
 605                 hi = x + ln2_hi;
 606                 lo = -ln2_lo;
 607                 k = -1;
 608             }
 609         } else {
 610             k = invln2 * x + (sign ? -0.5f : 0.5f);
 611             t = k;
 612             hi = x - t * ln2_hi; /* t*ln2_hi is exact here */
 613             lo = t * ln2_lo;
 614         }
 615         x = hi - lo;
 616         c = (hi - x) - lo;
 617     } else if (hx < 0x33000000) { /* when |x|<2**-25, return x */
 618         if (hx < 0x00800000)
 619             fp_barrierf(x * x);
 620         return x;
 621     } else
 622         k = 0;
 623
 624     /* x is now in primary range */
 625     hfx = 0.5f * x;
 626     hxs = x * hfx;
 627     r1 = 1.0f + hxs * (Q1 + hxs * Q2);
 628     t = 3.0f - r1 * hfx;
 629     e = hxs * ((r1 - t) / (6.0f - x * t));
 630     if (k == 0) /* c is 0 */
 631         return x - (x * e - hxs);
 632     e = x * (e - c) - c;
 633     e -= hxs;
 634     /* exp(x) ~ 2^k (x_reduced - e + 1) */
 635     if (k == -1)
 636         return 0.5f * (x - e) - 0.5f;
 637     if (k == 1) {
 638         if (x < -0.25f)
 639             return -2.0f * (e - (x + 0.5f));
 640         return 1.0f + 2.0f * (x - e);
 641     }
 642     u.i = (0x7f + k) << 23; /* 2^k */
 643     twopk = u.f;
 644     if (k < 0 || k > 56) { /* suffice to return exp(x)-1 */
 645         y = x - e + 1.0f;
 646         if (k == 128)
 647             y = y * 2.0f * 0x1p127f;
 648         else
 649             y = y * twopk;
 650         return y - 1.0f;
 651     }
 652     u.i = (0x7f-k) << 23; /* 2^-k */
 653     if (k < 23)
 654         y = (x - e + (1 - u.f)) * twopk;
 655     else
 656         y = (x - (e + u.f) + 1) * twopk;
 657     return y;
 658 }
 659
 660 /* Copied from musl: src/math/__sindf.c */
 661 static float __sindf(double x)
 662 {
 663     static const double S1 = -0x1.5555555555555p-3,
 664         S2 = 0x1.1111111111111p-7,
 665         S3 = -0x1.a01a01a01a01ap-13,
 666         S4 = 0x1.71de3a556c734p-19;
 667
 668     double r, s, w, z;
 669
 670     z = x * x;
 671     if (x > -7.8175831586122513e-03 && x < 7.8175831586122513e-03)
 672         return x * (1 + S1 * z);
 673
 674     w = z * z;
 675     r = S3 + z * S4;
 676     s = z * x;
 677     return (x + s * (S1 + z * S2)) + s * w * r;
 678 }
 679
 680 /* Copied from musl: src/math/__cosdf.c */
 681 static float __cosdf(double x)
 682 {
 683     static const double C0 = -0x1.0000000000000p-1,
 684         C1 = 0x1.5555555555555p-5,
 685         C2 = -0x1.6c16c16c16c17p-10,
 686         C3 = 0x1.a01a01a01a01ap-16,
 687         C4 = -0x1.27e4fb7789f5cp-22;
 688     double z;
 689
 690     z = x * x;
 691     if (x > -7.8163146972656250e-03 && x < 7.8163146972656250e-03)
 692         return 1 + C0 * z;
 693     return 1.0 + z * (C0 + z * (C1 + z * (C2 + z * (C3 + z * C4))));
 694 }
 695
 696 static const UINT64 exp2f_T[] = {
 697     0x3ff0000000000000ULL, 0x3fefd9b0d3158574ULL, 0x3fefb5586cf9890fULL, 0x3fef9301d0125b51ULL,
 698     0x3fef72b83c7d517bULL, 0x3fef54873168b9aaULL, 0x3fef387a6e756238ULL, 0x3fef1e9df51fdee1ULL,
 699     0x3fef06fe0a31b715ULL, 0x3feef1a7373aa9cbULL, 0x3feedea64c123422ULL, 0x3feece086061892dULL,
 700     0x3feebfdad5362a27ULL, 0x3feeb42b569d4f82ULL, 0x3feeab07dd485429ULL, 0x3feea47eb03a5585ULL,
 701     0x3feea09e667f3bcdULL, 0x3fee9f75e8ec5f74ULL, 0x3feea11473eb0187ULL, 0x3feea589994cce13ULL,
 702     0x3feeace5422aa0dbULL, 0x3feeb737b0cdc5e5ULL, 0x3feec49182a3f090ULL, 0x3feed503b23e255dULL,
 703     0x3feee89f995ad3adULL, 0x3feeff76f2fb5e47ULL, 0x3fef199bdd85529cULL, 0x3fef3720dcef9069ULL,
 704     0x3fef5818dcfba487ULL, 0x3fef7c97337b9b5fULL, 0x3fefa4afa2a490daULL, 0x3fefd0765b6e4540ULL
 705 };
 706 #endif
 707
 708 #ifndef __i386__
 709
 710 /*********************************************************************
 711  *      _fpclassf (MSVCRT.@)
 712  */
 713 int CDECL _fpclassf( float num )
 714 {
 715     union { float f; UINT32 i; } u = { num };
 716     int e = u.i >> 23 & 0xff;
 717     int s = u.i >> 31;
 718
 719     switch (e)
 720     {
 721     case 0:
 722         if (u.i << 1) return s ? _FPCLASS_ND : _FPCLASS_PD;
 723         return s ? _FPCLASS_NZ : _FPCLASS_PZ;
 724     case 0xff:
 725         if (u.i << 9) return ((u.i >> 22) & 1) ? _FPCLASS_QNAN : _FPCLASS_SNAN;
 726         return s ? _FPCLASS_NINF : _FPCLASS_PINF;
 727     default:
 728         return s ? _FPCLASS_NN : _FPCLASS_PN;
 729     }
 730 }
 731
 732 /*********************************************************************
 733  *      _finitef (MSVCRT.@)
 734  */
 735 int CDECL _finitef( float num )
 736 {
 737     union { float f; UINT32 i; } u = { num };
 738     return (u.i & 0x7fffffff) < 0x7f800000;
 739 }
 740
 741 /*********************************************************************
 742  *      _isnanf (MSVCRT.@)
 743  */
 744 int CDECL _isnanf( float num )
 745 {
 746     union { float f; UINT32 i; } u = { num };
 747     return (u.i & 0x7fffffff) > 0x7f800000;
 748 }
 749
 750 static float asinf_R(float z)
 751 {
 752     /* coefficients for R(x^2) */
 753     static const float p1 = 1.66666672e-01,
 754                  p2 = -5.11644611e-02,
 755                  p3 = -1.21124933e-02,
 756                  p4 = -3.58742251e-03,
 757                  q1 = -7.56982703e-01;
 758
 759     float p, q;
 760     p = z * (p1 + z * (p2 + z * (p3 + z * p4)));
 761     q = 1.0f + z * q1;
 762     return p / q;
 763 }
 764
 765 /*********************************************************************
 766  *      acosf (MSVCRT.@)
 767  *
 768  * Copied from musl: src/math/acosf.c
 769  */
 770 float CDECL acosf( float x )
 771 {
 772     static const double pio2_lo = 6.12323399573676603587e-17;
 773
 774     float z, w, s, c, df;
 775     unsigned int hx, ix;
 776
 777     hx = *(unsigned int*)&x;
 778     ix = hx & 0x7fffffff;
 779     /* |x| >= 1 or nan */
 780     if (ix >= 0x3f800000) {
 781         if (ix == 0x3f800000) {
 782             if (hx >> 31)
 783                 return M_PI;
 784             return 0;
 785         }
 786         if (isnan(x)) return x;
 787         return math_error(_DOMAIN, "acosf", x, 0, 0 / (x - x));
 788     }
 789     /* |x| < 0.5 */
 790     if (ix < 0x3f000000) {
 791         if (ix <= 0x32800000) /* |x| < 2**-26 */
 792             return M_PI_2;
 793         return M_PI_2 - (x - (pio2_lo - x * asinf_R(x * x)));
 794     }
 795     /* x < -0.5 */
 796     if (hx >> 31) {
 797         z = (1 + x) * 0.5f;
 798         s = sqrtf(z);
 799         return M_PI - 2 * (s + ((double)s * asinf_R(z)));
 800     }
 801     /* x > 0.5 */
 802     z = (1 - x) * 0.5f;
 803     s = sqrtf(z);
 804     hx = *(unsigned int*)&s & 0xffff0000;
 805     df = *(float*)&hx;
 806     c = (z - df * df) / (s + df);
 807     w = asinf_R(z) * s + c;
 808     return 2 * (df + w);
 809 }
 810
 811 /*********************************************************************
 812  *      asinf (MSVCRT.@)
 813  *
 814  * Copied from musl: src/math/asinf.c
 815  */
 816 float CDECL asinf( float x )
 817 {
 818     static const double pio2 = 1.570796326794896558e+00;
 819     static const float pio4_hi = 0.785398125648;
 820     static const float pio2_lo = 7.54978941586e-08;
 821
 822     float s, z, f, c;
 823     unsigned int hx, ix;
 824
 825     hx = *(unsigned int*)&x;
 826     ix = hx & 0x7fffffff;
 827     if (ix >= 0x3f800000) {  /* |x| >= 1 */
 828         if (ix == 0x3f800000)  /* |x| == 1 */
 829             return x * pio2 + 7.5231638453e-37;  /* asin(+-1) = +-pi/2 with inexact */
 830         if (isnan(x)) return x;
 831         return math_error(_DOMAIN, "asinf", x, 0, 0 / (x - x));
 832     }
 833     if (ix < 0x3f000000) {  /* |x| < 0.5 */
 834         /* if 0x1p-126 <= |x| < 0x1p-12, avoid raising underflow */
 835         if (ix < 0x39800000 && ix >= 0x00800000)
 836             return x;
 837         return x + x * asinf_R(x * x);
 838     }
 839     /* 1 > |x| >= 0.5 */
 840     z = (1 - fabsf(x)) * 0.5f;
 841     s = sqrtf(z);
 842     /* f+c = sqrt(z) */
 843     *(unsigned int*)&f = *(unsigned int*)&s & 0xffff0000;
 844     c = (z - f * f) / (s + f);
 845     x = pio4_hi - (2 * s * asinf_R(z) - (pio2_lo - 2 * c) - (pio4_hi - 2 * f));
 846     if (hx >> 31)
 847         return -x;
 848     return x;
 849 }
 850
 851 /*********************************************************************
 852  *      atanf (MSVCRT.@)
 853  *
 854  * Copied from musl: src/math/atanf.c
 855  */
 856 float CDECL atanf( float x )
 857 {
 858     static const float atanhi[] = {
 859         4.6364760399e-01,
 860         7.8539812565e-01,
 861         9.8279368877e-01,
 862         1.5707962513e+00,
 863     };
 864     static const float atanlo[] = {
 865         5.0121582440e-09,
 866         3.7748947079e-08,
 867         3.4473217170e-08,
 868         7.5497894159e-08,
 869     };
 870     static const float aT[] = {
 871         3.3333328366e-01,
 872         -1.9999158382e-01,
 873         1.4253635705e-01,
 874         -1.0648017377e-01,
 875         6.1687607318e-02,
 876     };
 877
 878     float w, s1, s2, z;
 879     unsigned int ix, sign;
 880     int id;
 881
 882 #if _MSVCR_VER == 0
 883     if (isnan(x)) return math_error(_DOMAIN, "atanf", x, 0, x);
 884 #endif
 885
 886     ix = *(unsigned int*)&x;
 887     sign = ix >> 31;
 888     ix &= 0x7fffffff;
 889     if (ix >= 0x4c800000) {  /* if |x| >= 2**26 */
 890         if (isnan(x))
 891             return x;
 892         z = atanhi[3] + 7.5231638453e-37;
 893         return sign ? -z : z;
 894     }
 895     if (ix < 0x3ee00000) {   /* |x| < 0.4375 */
 896         if (ix < 0x39800000) {  /* |x| < 2**-12 */
 897             if (ix < 0x00800000)
 898                 /* raise underflow for subnormal x */
 899                 fp_barrierf(x*x);
 900             return x;
 901         }
 902         id = -1;
 903     } else {
 904         x = fabsf(x);
 905         if (ix < 0x3f980000) {  /* |x| < 1.1875 */
 906             if (ix < 0x3f300000) {  /*  7/16 <= |x| < 11/16 */
 907                 id = 0;
 908                 x = (2.0f * x - 1.0f) / (2.0f + x);
 909             } else {                /* 11/16 <= |x| < 19/16 */
 910                 id = 1;
 911                 x = (x - 1.0f) / (x + 1.0f);
 912             }
 913         } else {
 914             if (ix < 0x401c0000) {  /* |x| < 2.4375 */
 915                 id = 2;
 916                 x = (x - 1.5f) / (1.0f + 1.5f * x);
 917             } else {                /* 2.4375 <= |x| < 2**26 */
 918                 id = 3;
 919                 x = -1.0f / x;
 920             }
 921         }
 922     }
 923     /* end of argument reduction */
 924     z = x * x;
 925     w = z * z;
 926     /* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
 927     s1 = z * (aT[0] + w * (aT[2] + w * aT[4]));
 928     s2 = w * (aT[1] + w * aT[3]);
 929     if (id < 0)
 930         return x - x * (s1 + s2);
 931     z = atanhi[id] - ((x * (s1 + s2) - atanlo[id]) - x);
 932     return sign ? -z : z;
 933 }
 934
 935 /*********************************************************************
 936  *              atan2f (MSVCRT.@)
 937  *
 938  * Copied from musl: src/math/atan2f.c
 939  */
 940 float CDECL atan2f( float y, float x )
 941 {
 942     static const float pi     = 3.1415927410e+00,
 943                  pi_lo  = -8.7422776573e-08;
 944
 945     float z;
 946     unsigned int m, ix, iy;
 947
 948     if (isnan(x) || isnan(y))
 949         return x + y;
 950     ix = *(unsigned int*)&x;
 951     iy = *(unsigned int*)&y;
 952     if (ix == 0x3f800000)  /* x=1.0 */
 953         return atanf(y);
 954     m = ((iy >> 31) & 1) | ((ix >> 30) & 2);  /* 2*sign(x)+sign(y) */
 955     ix &= 0x7fffffff;
 956     iy &= 0x7fffffff;
 957
 958     /* when y = 0 */
 959     if (iy == 0) {
 960         switch (m) {
 961         case 0:
 962         case 1: return y;   /* atan(+-0,+anything)=+-0 */
 963         case 2: return pi;  /* atan(+0,-anything) = pi */
 964         case 3: return -pi; /* atan(-0,-anything) =-pi */
 965         }
 966     }
 967     /* when x = 0 */
 968     if (ix == 0)
 969         return m & 1 ? -pi / 2 : pi / 2;
 970     /* when x is INF */
 971     if (ix == 0x7f800000) {
 972         if (iy == 0x7f800000) {
 973             switch (m) {
 974             case 0: return pi / 4;      /* atan(+INF,+INF) */
 975             case 1: return -pi / 4;     /* atan(-INF,+INF) */
 976             case 2: return 3 * pi / 4;  /*atan(+INF,-INF)*/
 977             case 3: return -3 * pi / 4; /*atan(-INF,-INF)*/
 978             }
 979         } else {
 980             switch (m) {
 981             case 0: return 0.0f;    /* atan(+...,+INF) */
 982             case 1: return -0.0f;   /* atan(-...,+INF) */
 983             case 2: return pi;      /* atan(+...,-INF) */
 984             case 3: return -pi;     /* atan(-...,-INF) */
 985             }
 986         }
 987     }
 988     /* |y/x| > 0x1p26 */
 989     if (ix + (26 << 23) < iy || iy == 0x7f800000)
 990         return m & 1 ? -pi / 2 : pi / 2;
 991
 992     /* z = atan(|y/x|) with correct underflow */
 993     if ((m & 2) && iy + (26 << 23) < ix)  /*|y/x| < 0x1p-26, x < 0 */
 994         z = 0.0;
 995     else
 996         z = atanf(fabsf(y / x));
 997     switch (m) {
 998     case 0: return z;                /* atan(+,+) */
 999     case 1: return -z;               /* atan(-,+) */
1000     case 2: return pi - (z - pi_lo); /* atan(+,-) */
1001     default: /* case 3 */
1002         return (z - pi_lo) - pi;     /* atan(-,-) */
1003     }
1004 }
1005
1006 /* Copied from musl: src/math/__rem_pio2f.c */
1007 static int __rem_pio2f(float x, double *y)
1008 {
1009     static const double toint = 1.5 / DBL_EPSILON,
1010         pio4 = 0x1.921fb6p-1,
1011         invpio2 = 6.36619772367581382433e-01,
1012         pio2_1 = 1.57079631090164184570e+00,
1013         pio2_1t = 1.58932547735281966916e-08;
1014
1015     union {float f; uint32_t i;} u = {x};
1016     double tx[1], ty[1], fn;
1017     UINT32 ix;
1018     int n, sign, e0;
1019
1020     ix = u.i & 0x7fffffff;
1021     /* 25+53 bit pi is good enough for medium size */
1022     if (ix < 0x4dc90fdb) { /* |x| ~< 2^28*(pi/2), medium size */
1023         /* Use a specialized rint() to get fn. */
1024         fn = fp_barrier(x * invpio2 + toint) - toint;
1025         n  = (int)fn;
1026         *y = x - fn * pio2_1 - fn * pio2_1t;
1027         /* Matters with directed rounding. */
1028         if (*y < -pio4) {
1029             n--;
1030             fn--;
1031             *y = x - fn * pio2_1 - fn * pio2_1t;
1032         } else if (*y > pio4) {
1033             n++;
1034             fn++;
1035             *y = x - fn * pio2_1 - fn * pio2_1t;
1036         }
1037         return n;
1038     }
1039     if(ix >= 0x7f800000) { /* x is inf or NaN */
1040         *y = x - x;
1041         return 0;
1042     }
1043     /* scale x into [2^23, 2^24-1] */
1044     sign = u.i >> 31;
1045     e0 = (ix >> 23) - (0x7f + 23); /* e0 = ilogb(|x|)-23, positive */
1046     u.i = ix - (e0 << 23);
1047     tx[0] = u.f;
1048     n = __rem_pio2_large(tx, ty, e0, 1, 0);
1049     if (sign) {
1050         *y = -ty[0];
1051         return -n;
1052     }
1053     *y = ty[0];
1054     return n;
1055 }
1056
1057 /*********************************************************************
1058  *      cosf (MSVCRT.@)
1059  *
1060  * Copied from musl: src/math/cosf.c
1061  */
1062 float CDECL cosf( float x )
1063 {
1064     static const double c1pio2 = 1*M_PI_2,
1065         c2pio2 = 2*M_PI_2,
1066         c3pio2 = 3*M_PI_2,
1067         c4pio2 = 4*M_PI_2;
1068
1069     double y;
1070     UINT32 ix;
1071     unsigned n, sign;
1072
1073     ix = *(UINT32*)&x;
1074     sign = ix >> 31;
1075     ix &= 0x7fffffff;
1076
1077     if (ix <= 0x3f490fda) { /* |x| ~<= pi/4 */
1078         if (ix < 0x39800000) { /* |x| < 2**-12 */
1079             /* raise inexact if x != 0 */
1080             fp_barrierf(x + 0x1p120f);
1081             return 1.0f;
1082         }
1083         return __cosdf(x);
1084     }
1085     if (ix <= 0x407b53d1) { /* |x| ~<= 5*pi/4 */
1086         if (ix > 0x4016cbe3) /* |x| ~> 3*pi/4 */
1087             return -__cosdf(sign ? x + c2pio2 : x - c2pio2);
1088         else {
1089             if (sign)
1090                 return __sindf(x + c1pio2);
1091             else
1092                 return __sindf(c1pio2 - x);
1093         }
1094     }
1095     if (ix <= 0x40e231d5) { /* |x| ~<= 9*pi/4 */
1096         if (ix > 0x40afeddf) /* |x| ~> 7*pi/4 */
1097             return __cosdf(sign ? x + c4pio2 : x - c4pio2);
1098         else {
1099             if (sign)
1100                 return __sindf(-x - c3pio2);
1101             else
1102                 return __sindf(x - c3pio2);
1103         }
1104     }
1105
1106     /* cos(Inf or NaN) is NaN */
1107     if (isinf(x)) return math_error(_DOMAIN, "cosf", x, 0, x - x);
1108     if (ix >= 0x7f800000)
1109         return x - x;
1110
1111     /* general argument reduction needed */
1112     n = __rem_pio2f(x, &y);
1113     switch (n & 3) {
1114     case 0: return __cosdf(y);
1115     case 1: return __sindf(-y);
1116     case 2: return -__cosdf(y);
1117     default: return __sindf(y);
1118     }
1119 }
1120
1121 /* Copied from musl: src/math/__expo2f.c */
1122 static float __expo2f(float x, float sign)
1123 {
1124     static const int k = 235;
1125     static const float kln2 = 0x1.45c778p+7f;
1126     float scale;
1127
1128     *(UINT32*)&scale = (UINT32)(0x7f + k/2) << 23;
1129     return expf(x - kln2) * (sign * scale) * scale;
1130 }
1131
1132 /*********************************************************************
1133  *      coshf (MSVCRT.@)
1134  *
1135  * Copied from musl: src/math/coshf.c
1136  */
1137 float CDECL coshf( float x )
1138 {
1139     UINT32 ui = *(UINT32*)&x;
1140     UINT32 sign = ui & 0x80000000;
1141     float t;
1142
1143     /* |x| */
1144     ui &= 0x7fffffff;
1145     x = *(float*)&ui;
1146
1147     /* |x| < log(2) */
1148     if (ui < 0x3f317217) {
1149         if (ui < 0x3f800000 - (12 << 23)) {
1150             fp_barrierf(x + 0x1p120f);
1151             return 1;
1152         }
1153         t = __expm1f(x);
1154         return 1 + t * t / (2 * (1 + t));
1155     }
1156
1157     /* |x| < log(FLT_MAX) */
1158     if (ui < 0x42b17217) {
1159         t = expf(x);
1160         return 0.5f * (t + 1 / t);
1161     }
1162
1163     /* |x| > log(FLT_MAX) or nan */
1164     if (ui > 0x7f800000)
1165         *(UINT32*)&t = ui | sign | 0x400000;
1166     else
1167         t = __expo2f(x, 1.0f);
1168     return t;
1169 }
1170
1171 /*********************************************************************
1172  *      expf (MSVCRT.@)
1173  */
1174 float CDECL expf( float x )
1175 {
1176     static const double C[] = {
1177         0x1.c6af84b912394p-5 / (1 << 5) / (1 << 5) / (1 << 5),
1178         0x1.ebfce50fac4f3p-3 / (1 << 5) / (1 << 5),
1179         0x1.62e42ff0c52d6p-1 / (1 << 5)
1180     };
1181     static const double invln2n = 0x1.71547652b82fep+0 * (1 << 5);
1182
1183     double kd, z, r, r2, y, s;
1184     UINT32 abstop;
1185     UINT64 ki, t;
1186
1187     abstop = (*(UINT32*)&x >> 20) & 0x7ff;
1188     if (abstop >= 0x42b) {
1189         /* |x| >= 88 or x is nan.  */
1190         if (*(UINT32*)&x == 0xff800000)
1191             return 0.0f;
1192         if (abstop >= 0x7f8)
1193             return x + x;
1194         if (x > 0x1.62e42ep6f) /* x > log(0x1p128) ~= 88.72 */
1195             return math_error(_OVERFLOW, "expf", x, 0, x * FLT_MAX);
1196         if (x < -0x1.9fe368p6f) /* x < log(0x1p-150) ~= -103.97 */
1197             return math_error(_UNDERFLOW, "expf", x, 0, fp_barrierf(FLT_MIN) * FLT_MIN);
1198     }
1199
1200     /* x*N/Ln2 = k + r with r in [-1/2, 1/2] and int k.  */
1201     z = invln2n * x;
1202
1203     /* Round and convert z to int, the result is in [-150*N, 128*N] and
1204        ideally ties-to-even rule is used, otherwise the magnitude of r
1205        can be bigger which gives larger approximation error.  */
1206     kd = __round(z);
1207     ki = kd;
1208     r = z - kd;
1209
1210     /* exp(x) = 2^(k/N) * 2^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
1211     t = exp2f_T[ki % (1 << 5)];
1212     t += ki << (52 - 5);
1213     s = *(double*)&t;
1214     z = C[0] * r + C[1];
1215     r2 = r * r;
1216     y = C[2] * r + 1;
1217     y = z * r2 + y;
1218     y = y * s;
1219     return y;
1220 }
1221
1222 /*********************************************************************
1223  *      fmodf (MSVCRT.@)
1224  *
1225  * Copied from musl: src/math/fmodf.c
1226  */
1227 float CDECL fmodf( float x, float y )
1228 {
1229     UINT32 xi = *(UINT32*)&x;
1230     UINT32 yi = *(UINT32*)&y;
1231     int ex = xi>>23 & 0xff;
1232     int ey = yi>>23 & 0xff;
1233     UINT32 sx = xi & 0x80000000;
1234     UINT32 i;
1235
1236     if (isinf(x)) return math_error(_DOMAIN, "fmodf", x, y, (x * y) / (x * y));
1237     if (yi << 1 == 0 || isnan(y) || ex == 0xff)
1238         return (x * y) / (x * y);
1239     if (xi << 1 <= yi << 1) {
1240         if (xi << 1 == yi << 1)
1241             return 0 * x;
1242         return x;
1243     }
1244
1245     /* normalize x and y */
1246     if (!ex) {
1247         for (i = xi << 9; i >> 31 == 0; ex--, i <<= 1);
1248         xi <<= -ex + 1;
1249     } else {
1250         xi &= -1U >> 9;
1251         xi |= 1U << 23;
1252     }
1253     if (!ey) {
1254         for (i = yi << 9; i >> 31 == 0; ey--, i <<= 1);
1255         yi <<= -ey + 1;
1256     } else {
1257         yi &= -1U >> 9;
1258         yi |= 1U << 23;
1259     }
1260
1261     /* x mod y */
1262     for (; ex > ey; ex--) {
1263         i = xi - yi;
1264         if (i >> 31 == 0) {
1265             if (i == 0)
1266                 return 0 * x;
1267             xi = i;
1268         }
1269         xi <<= 1;
1270     }
1271     i = xi - yi;
1272     if (i >> 31 == 0) {
1273         if (i == 0)
1274             return 0 * x;
1275         xi = i;
1276     }
1277     for (; xi>>23 == 0; xi <<= 1, ex--);
1278
1279     /* scale result up */
1280     if (ex > 0) {
1281         xi -= 1U << 23;
1282         xi |= (UINT32)ex << 23;
1283     } else {
1284         xi >>= -ex + 1;
1285     }
1286     xi |= sx;
1287     return *(float*)&xi;
1288 }
1289
1290 /*********************************************************************
1291  *      logf (MSVCRT.@)
1292  *
1293  * Copied from musl: src/math/logf.c src/math/logf_data.c
1294  */
1295 float CDECL logf( float x )
1296 {
1297     static const double Ln2 = 0x1.62e42fefa39efp-1;
1298     static const double A[] = {
1299         -0x1.00ea348b88334p-2,
1300         0x1.5575b0be00b6ap-2,
1301         -0x1.ffffef20a4123p-2
1302     };
1303     static const struct {
1304         double invc, logc;
1305     } T[] = {
1306         { 0x1.661ec79f8f3bep+0, -0x1.57bf7808caadep-2 },
1307         { 0x1.571ed4aaf883dp+0, -0x1.2bef0a7c06ddbp-2 },
1308         { 0x1.49539f0f010bp+0, -0x1.01eae7f513a67p-2 },
1309         { 0x1.3c995b0b80385p+0, -0x1.b31d8a68224e9p-3 },
1310         { 0x1.30d190c8864a5p+0, -0x1.6574f0ac07758p-3 },
1311         { 0x1.25e227b0b8eap+0, -0x1.1aa2bc79c81p-3 },
1312         { 0x1.1bb4a4a1a343fp+0, -0x1.a4e76ce8c0e5ep-4 },
1313         { 0x1.12358f08ae5bap+0, -0x1.1973c5a611cccp-4 },
1314         { 0x1.0953f419900a7p+0, -0x1.252f438e10c1ep-5 },
1315         { 0x1p+0, 0x0p+0 },
1316         { 0x1.e608cfd9a47acp-1, 0x1.aa5aa5df25984p-5 },
1317         { 0x1.ca4b31f026aap-1, 0x1.c5e53aa362eb4p-4 },
1318         { 0x1.b2036576afce6p-1, 0x1.526e57720db08p-3 },
1319         { 0x1.9c2d163a1aa2dp-1, 0x1.bc2860d22477p-3 },
1320         { 0x1.886e6037841edp-1, 0x1.1058bc8a07ee1p-2 },
1321         { 0x1.767dcf5534862p-1, 0x1.4043057b6ee09p-2 }
1322     };
1323
1324     double z, r, r2, y, y0, invc, logc;
1325     UINT32 ix, iz, tmp;
1326     int k, i;
1327
1328     ix = *(UINT32*)&x;
1329     /* Fix sign of zero with downward rounding when x==1. */
1330     if (ix == 0x3f800000)
1331         return 0;
1332     if (ix - 0x00800000 >= 0x7f800000 - 0x00800000) {
1333         /* x < 0x1p-126 or inf or nan. */
1334         if (ix * 2 == 0)
1335             return math_error(_SING, "logf", x, 0, (ix & 0x80000000 ? 1.0 : -1.0) / x);
1336         if (ix == 0x7f800000) /* log(inf) == inf. */
1337             return x;
1338         if (ix * 2 > 0xff000000)
1339             return x;
1340         if (ix & 0x80000000)
1341             return math_error(_DOMAIN, "logf", x, 0, (x - x) / (x - x));
1342         /* x is subnormal, normalize it. */
1343         x *= 0x1p23f;
1344         ix = *(UINT32*)&x;
1345         ix -= 23 << 23;
1346     }
1347
1348     /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
1349        The range is split into N subintervals.
1350        The ith subinterval contains z and c is near its center. */
1351     tmp = ix - 0x3f330000;
1352     i = (tmp >> (23 - 4)) % (1 << 4);
1353     k = (INT32)tmp >> 23; /* arithmetic shift */
1354     iz = ix - (tmp & (0x1ffu << 23));
1355     invc = T[i].invc;
1356     logc = T[i].logc;
1357     z = *(float*)&iz;
1358
1359     /* log(x) = log1p(z/c-1) + log(c) + k*Ln2 */
1360     r = z * invc - 1;
1361     y0 = logc + (double)k * Ln2;
1362
1363     /* Pipelined polynomial evaluation to approximate log1p(r). */
1364     r2 = r * r;
1365     y = A[1] * r + A[2];
1366     y = A[0] * r2 + y;
1367     y = y * r2 + (y0 + r);
1368     return y;
1369 }
1370
1371 /*********************************************************************
1372  *      log10f (MSVCRT.@)
1373  */
1374 float CDECL log10f( float x )
1375 {
1376     static const float ivln10hi = 4.3432617188e-01,
1377         ivln10lo = -3.1689971365e-05,
1378         log10_2hi = 3.0102920532e-01,
1379         log10_2lo = 7.9034151668e-07,
1380         Lg1 = 0xaaaaaa.0p-24,
1381         Lg2 = 0xccce13.0p-25,
1382         Lg3 = 0x91e9ee.0p-25,
1383         Lg4 = 0xf89e26.0p-26;
1384
1385     union {float f; UINT32 i;} u = {x};
1386     float hfsq, f, s, z, R, w, t1, t2, dk, hi, lo;
1387     UINT32 ix;
1388     int k;
1389
1390     ix = u.i;
1391     k = 0;
1392     if (ix < 0x00800000 || ix >> 31) { /* x < 2**-126 */
1393         if (ix << 1 == 0)
1394             return math_error(_SING, "log10f", x, 0, -1 / (x * x));
1395         if ((ix & ~(1u << 31)) > 0x7f800000)
1396             return x;
1397         if (ix >> 31)
1398             return math_error(_DOMAIN, "log10f", x, 0, (x - x) / (x - x));
1399         /* subnormal number, scale up x */
1400         k -= 25;
1401         x *= 0x1p25f;
1402         u.f = x;
1403         ix = u.i;
1404     } else if (ix >= 0x7f800000) {
1405         return x;
1406     } else if (ix == 0x3f800000)
1407         return 0;
1408
1409     /* reduce x into [sqrt(2)/2, sqrt(2)] */
1410     ix += 0x3f800000 - 0x3f3504f3;
1411     k += (int)(ix >> 23) - 0x7f;
1412     ix = (ix & 0x007fffff) + 0x3f3504f3;
1413     u.i = ix;
1414     x = u.f;
1415
1416     f = x - 1.0f;
1417     s = f / (2.0f + f);
1418     z = s * s;
1419     w = z * z;
1420     t1= w * (Lg2 + w * Lg4);
1421     t2= z * (Lg1 + w * Lg3);
1422     R = t2 + t1;
1423     hfsq = 0.5f * f * f;
1424
1425     hi = f - hfsq;
1426     u.f = hi;
1427     u.i &= 0xfffff000;
1428     hi = u.f;
1429     lo = f - hi - hfsq + s * (hfsq + R);
1430     dk = k;
1431     return dk * log10_2lo + (lo + hi) * ivln10lo + lo * ivln10hi + hi * ivln10hi + dk * log10_2hi;
1432 }
1433
1434 /* Subnormal input is normalized so ix has negative biased exponent.
1435    Output is multiplied by POWF_SCALE (where 1 << 5). */
1436 static double powf_log2(UINT32 ix)
1437 {
1438     static const struct {
1439         double invc, logc;
1440     } T[] = {
1441         { 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 * (1 << 5) },
1442         { 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 * (1 << 5) },
1443         { 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 * (1 << 5) },
1444         { 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 * (1 << 5) },
1445         { 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 * (1 << 5) },
1446         { 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 * (1 << 5) },
1447         { 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 * (1 << 5) },
1448         { 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 * (1 << 5) },
1449         { 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 * (1 << 5) },
1450         { 0x1p+0, 0x0p+0 * (1 << 4) },
1451         { 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 * (1 << 5) },
1452         { 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 * (1 << 5) },
1453         { 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 * (1 << 5) },
1454         { 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 * (1 << 5) },
1455         { 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 * (1 << 5) },
1456         { 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 * (1 << 5) }
1457     };
1458     static const double A[] = {
1459         0x1.27616c9496e0bp-2 * (1 << 5), -0x1.71969a075c67ap-2 * (1 << 5),
1460         0x1.ec70a6ca7baddp-2 * (1 << 5), -0x1.7154748bef6c8p-1 * (1 << 5),
1461         0x1.71547652ab82bp0 * (1 << 5)
1462     };
1463
1464     double z, r, r2, r4, p, q, y, y0, invc, logc;
1465     UINT32 iz, top, tmp;
1466     int k, i;
1467
1468     /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
1469        The range is split into N subintervals.
1470        The ith subinterval contains z and c is near its center. */
1471     tmp = ix - 0x3f330000;
1472     i = (tmp >> (23 - 4)) % (1 << 4);
1473     top = tmp & 0xff800000;
1474     iz = ix - top;
1475     k = (INT32)top >> (23 - 5); /* arithmetic shift */
1476     invc = T[i].invc;
1477     logc = T[i].logc;
1478     z = *(float*)&iz;
1479
1480     /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
1481     r = z * invc - 1;
1482     y0 = logc + (double)k;
1483
1484     /* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */
1485     r2 = r * r;
1486     y = A[0] * r + A[1];
1487     p = A[2] * r + A[3];
1488     r4 = r2 * r2;
1489     q = A[4] * r + y0;
1490     q = p * r2 + q;
1491     y = y * r4 + q;
1492     return y;
1493 }
1494
1495 /* The output of log2 and thus the input of exp2 is either scaled by N
1496    (in case of fast toint intrinsics) or not. The unscaled xd must be
1497    in [-1021,1023], sign_bias sets the sign of the result. */
1498 static float powf_exp2(double xd, UINT32 sign_bias)
1499 {
1500     static const double C[] = {
1501         0x1.c6af84b912394p-5 / (1 << 5) / (1 << 5) / (1 << 5),
1502         0x1.ebfce50fac4f3p-3 / (1 << 5) / (1 << 5),
1503         0x1.62e42ff0c52d6p-1 / (1 << 5)
1504     };
1505
1506     UINT64 ki, ski, t;
1507     double kd, z, r, r2, y, s;
1508
1509     /* N*x = k + r with r in [-1/2, 1/2] */
1510     kd = __round(xd); /* k */
1511     ki = kd;
1512     r = xd - kd;
1513
1514     /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
1515     t = exp2f_T[ki % (1 << 5)];
1516     ski = ki + sign_bias;
1517     t += ski << (52 - 5);
1518     s = *(double*)&t;
1519     z = C[0] * r + C[1];
1520     r2 = r * r;
1521     y = C[2] * r + 1;
1522     y = z * r2 + y;
1523     y = y * s;
1524     return y;
1525 }
1526
1527 /* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
1528    the bit representation of a non-zero finite floating-point value. */
1529 static int powf_checkint(UINT32 iy)
1530 {
1531     int e = iy >> 23 & 0xff;
1532     if (e < 0x7f)
1533         return 0;
1534     if (e > 0x7f + 23)
1535         return 2;
1536     if (iy & ((1 << (0x7f + 23 - e)) - 1))
1537         return 0;
1538     if (iy & (1 << (0x7f + 23 - e)))
1539         return 1;
1540     return 2;
1541 }
1542
1543 /*********************************************************************
1544  *      powf (MSVCRT.@)
1545  *
1546  * Copied from musl: src/math/powf.c src/math/powf_data.c
1547  */
1548 float CDECL powf( float x, float y )
1549 {
1550     UINT32 sign_bias = 0;
1551     UINT32 ix, iy;
1552     double logx, ylogx;
1553
1554     ix = *(UINT32*)&x;
1555     iy = *(UINT32*)&y;
1556     if (ix - 0x00800000 >= 0x7f800000 - 0x00800000 ||
1557             2 * iy - 1 >= 2u * 0x7f800000 - 1) {
1558         /* Either (x < 0x1p-126 or inf or nan) or (y is 0 or inf or nan). */
1559         if (2 * iy - 1 >= 2u * 0x7f800000 - 1) {
1560             if (2 * iy == 0)
1561                 return 1.0f;
1562             if (ix == 0x3f800000)
1563                 return 1.0f;
1564             if (2 * ix > 2u * 0x7f800000 || 2 * iy > 2u * 0x7f800000)
1565                 return x + y;
1566             if (2 * ix == 2 * 0x3f800000)
1567                 return 1.0f;
1568             if ((2 * ix < 2 * 0x3f800000) == !(iy & 0x80000000))
1569                 return 0.0f; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
1570             return y * y;
1571         }
1572         if (2 * ix - 1 >= 2u * 0x7f800000 - 1) {
1573             float x2 = x * x;
1574             if (ix & 0x80000000 && powf_checkint(iy) == 1)
1575                 x2 = -x2;
1576             if (iy & 0x80000000 && x2 == 0.0)
1577                 return math_error(_SING, "powf", x, y, 1 / x2);
1578             /* Without the barrier some versions of clang hoist the 1/x2 and
1579                thus division by zero exception can be signaled spuriously. */
1580             return iy & 0x80000000 ? fp_barrierf(1 / x2) : x2;
1581         }
1582         /* x and y are non-zero finite. */
1583         if (ix & 0x80000000) {
1584             /* Finite x < 0. */
1585             int yint = powf_checkint(iy);
1586             if (yint == 0)
1587                 return math_error(_DOMAIN, "powf", x, y, 0 / (x - x));
1588             if (yint == 1)
1589                 sign_bias = 1 << (5 + 11);
1590             ix &= 0x7fffffff;
1591         }
1592         if (ix < 0x00800000) {
1593             /* Normalize subnormal x so exponent becomes negative. */
1594             x *= 0x1p23f;
1595             ix = *(UINT32*)&x;
1596             ix &= 0x7fffffff;
1597             ix -= 23 << 23;
1598         }
1599     }
1600     logx = powf_log2(ix);
1601     ylogx = y * logx; /* cannot overflow, y is single prec. */
1602     if ((*(UINT64*)&ylogx >> 47 & 0xffff) >= 0x40af800000000000llu >> 47) {
1603         /* |y*log(x)| >= 126. */
1604         if (ylogx > 0x1.fffffffd1d571p+6 * (1 << 5))
1605             return math_error(_OVERFLOW, "powf", x, y, (sign_bias ? -1.0 : 1.0) * 0x1p1023);
1606         if (ylogx <= -150.0 * (1 << 5))
1607             return math_error(_UNDERFLOW, "powf", x, y, (sign_bias ? -1.0 : 1.0) / 0x1p1023);
1608     }
1609     return powf_exp2(ylogx, sign_bias);
1610 }
1611
1612 /*********************************************************************
1613  *      sinf (MSVCRT.@)
1614  *
1615  * Copied from musl: src/math/sinf.c
1616  */
1617 float CDECL sinf( float x )
1618 {
1619     static const double s1pio2 = 1*M_PI_2,
1620         s2pio2 = 2*M_PI_2,
1621         s3pio2 = 3*M_PI_2,
1622         s4pio2 = 4*M_PI_2;
1623
1624     double y;
1625     UINT32 ix;
1626     int n, sign;
1627
1628     ix = *(UINT32*)&x;
1629     sign = ix >> 31;
1630     ix &= 0x7fffffff;
1631
1632     if (ix <= 0x3f490fda) { /* |x| ~<= pi/4 */
1633         if (ix < 0x39800000) { /* |x| < 2**-12 */
1634             /* raise inexact if x!=0 and underflow if subnormal */
1635             fp_barrierf(ix < 0x00800000 ? x / 0x1p120f : x + 0x1p120f);
1636             return x;
1637         }
1638         return __sindf(x);
1639     }
1640     if (ix <= 0x407b53d1) { /* |x| ~<= 5*pi/4 */
1641         if (ix <= 0x4016cbe3) { /* |x| ~<= 3pi/4 */
1642             if (sign)
1643                 return -__cosdf(x + s1pio2);
1644             else
1645                 return __cosdf(x - s1pio2);
1646         }
1647         return __sindf(sign ? -(x + s2pio2) : -(x - s2pio2));
1648     }
1649     if (ix <= 0x40e231d5) { /* |x| ~<= 9*pi/4 */
1650         if (ix <= 0x40afeddf) { /* |x| ~<= 7*pi/4 */
1651             if (sign)
1652                 return __cosdf(x + s3pio2);
1653             else
1654                 return -__cosdf(x - s3pio2);
1655         }
1656         return __sindf(sign ? x + s4pio2 : x - s4pio2);
1657     }
1658
1659     /* sin(Inf or NaN) is NaN */
1660     if (isinf(x))
1661         return math_error(_DOMAIN, "sinf", x, 0, x - x);
1662     if (ix >= 0x7f800000)
1663         return x - x;
1664
1665     /* general argument reduction needed */
1666     n = __rem_pio2f(x, &y);
1667     switch (n&3) {
1668     case 0: return __sindf(y);
1669     case 1: return __cosdf(y);
1670     case 2: return __sindf(-y);
1671     default: return -__cosdf(y);
1672     }
1673 }
1674
1675 /*********************************************************************
1676  *      sinhf (MSVCRT.@)
1677  */
1678 float CDECL sinhf( float x )
1679 {
1680     UINT32 ui = *(UINT32*)&x;
1681     float t, h, absx;
1682
1683     h = 0.5;
1684     if (ui >> 31)
1685         h = -h;
1686     /* |x| */
1687     ui &= 0x7fffffff;
1688     absx = *(float*)&ui;
1689
1690     /* |x| < log(FLT_MAX) */
1691     if (ui < 0x42b17217) {
1692         t = __expm1f(absx);
1693         if (ui < 0x3f800000) {
1694             if (ui < 0x3f800000 - (12 << 23))
1695                 return x;
1696             return h * (2 * t - t * t / (t + 1));
1697         }
1698         return h * (t + t / (t + 1));
1699     }
1700
1701     /* |x| > logf(FLT_MAX) or nan */
1702     if (ui > 0x7f800000)
1703         *(DWORD*)&t = *(DWORD*)&x | 0x400000;
1704     else
1705         t = __expo2f(absx, 2 * h);
1706     return t;
1707 }
1708
1709 static BOOL sqrtf_validate( float *x )
1710 {
1711     short c = _fdclass(*x);
1712
1713     if (c == FP_ZERO) return FALSE;
1714     if (c == FP_NAN) return FALSE;
1715     if (signbit(*x))
1716     {
1717         *x = math_error(_DOMAIN, "sqrtf", *x, 0, ret_nan(TRUE));
1718         return FALSE;
1719     }
1720     if (c == FP_INFINITE) return FALSE;
1721     return TRUE;
1722 }
1723
1724 #if defined(__x86_64__) || defined(__i386__)
1725 float CDECL sse2_sqrtf(float);
1726 __ASM_GLOBAL_FUNC( sse2_sqrtf,
1727         "sqrtss %xmm0, %xmm0\n\t"
1728         "ret" )
1729 #endif
1730
1731 /*********************************************************************
1732  *      sqrtf (MSVCRT.@)
1733  *
1734  * Copied from musl: src/math/sqrtf.c
1735  */
1736 float CDECL sqrtf( float x )
1737 {
1738 #ifdef __x86_64__
1739     if (!sqrtf_validate(&x))
1740         return x;
1741
1742     return sse2_sqrtf(x);
1743 #else
1744     static const float tiny = 1.0e-30;
1745
1746     float z;
1747     int ix,s,q,m,t,i;
1748     unsigned int r;
1749
1750     ix = *(int*)&x;
1751
1752     if (!sqrtf_validate(&x))
1753         return x;
1754
1755     /* normalize x */
1756     m = ix >> 23;
1757     if (m == 0) {  /* subnormal x */
1758         for (i = 0; (ix & 0x00800000) == 0; i++)
1759             ix <<= 1;
1760         m -= i - 1;
1761     }
1762     m -= 127;  /* unbias exponent */
1763     ix = (ix & 0x007fffff) | 0x00800000;
1764     if (m & 1)  /* odd m, double x to make it even */
1765         ix += ix;
1766     m >>= 1;  /* m = [m/2] */
1767
1768     /* generate sqrt(x) bit by bit */
1769     ix += ix;
1770     q = s = 0;       /* q = sqrt(x) */
1771     r = 0x01000000;  /* r = moving bit from right to left */
1772
1773     while (r != 0) {
1774         t = s + r;
1775         if (t <= ix) {
1776             s = t + r;
1777             ix -= t;
1778             q += r;
1779         }
1780         ix += ix;
1781         r >>= 1;
1782     }
1783
1784     /* use floating add to find out rounding direction */
1785     if (ix != 0) {
1786         z = 1.0f - tiny; /* raise inexact flag */
1787         if (z >= 1.0f) {
1788             z = 1.0f + tiny;
1789             if (z > 1.0f)
1790                 q += 2;
1791             else
1792                 q += q & 1;
1793         }
1794     }
1795     ix = (q >> 1) + 0x3f000000;
1796     r = ix + ((unsigned int)m << 23);
1797     z = *(float*)&r;
1798     return z;
1799 #endif
1800 }
1801
1802 /* Copied from musl: src/math/__tandf.c */
1803 static float __tandf(double x, int odd)
1804 {
1805     static const double T[] = {
1806         0x15554d3418c99f.0p-54,
1807         0x1112fd38999f72.0p-55,
1808         0x1b54c91d865afe.0p-57,
1809         0x191df3908c33ce.0p-58,
1810         0x185dadfcecf44e.0p-61,
1811         0x1362b9bf971bcd.0p-59,
1812     };
1813
1814     double z, r, w, s, t, u;
1815
1816     z = x * x;
1817     r = T[4] + z * T[5];
1818     t = T[2] + z * T[3];
1819     w = z * z;
1820     s = z * x;
1821     u = T[0] + z * T[1];
1822     r = (x + s * u) + (s * w) * (t + w * r);
1823     return odd ? -1.0 / r : r;
1824 }
1825
1826 /*********************************************************************
1827  *      tanf (MSVCRT.@)
1828  *
1829  * Copied from musl: src/math/tanf.c
1830  */
1831 float CDECL tanf( float x )
1832 {
1833     static const double t1pio2 = 1*M_PI_2,
1834         t2pio2 = 2*M_PI_2,
1835         t3pio2 = 3*M_PI_2,
1836         t4pio2 = 4*M_PI_2;
1837
1838     double y;
1839     UINT32 ix;
1840     unsigned n, sign;
1841
1842     ix = *(UINT32*)&x;
1843     sign = ix >> 31;
1844     ix &= 0x7fffffff;
1845
1846     if (ix <= 0x3f490fda) { /* |x| ~<= pi/4 */
1847         if (ix < 0x39800000) { /* |x| < 2**-12 */
1848             /* raise inexact if x!=0 and underflow if subnormal */
1849             fp_barrierf(ix < 0x00800000 ? x / 0x1p120f : x + 0x1p120f);
1850             return x;
1851         }
1852         return __tandf(x, 0);
1853     }
1854     if (ix <= 0x407b53d1) { /* |x| ~<= 5*pi/4 */
1855         if (ix <= 0x4016cbe3) /* |x| ~<= 3pi/4 */
1856             return __tandf((sign ? x + t1pio2 : x - t1pio2), 1);
1857         else
1858             return __tandf((sign ? x + t2pio2 : x - t2pio2), 0);
1859     }
1860     if (ix <= 0x40e231d5) { /* |x| ~<= 9*pi/4 */
1861         if (ix <= 0x40afeddf) /* |x| ~<= 7*pi/4 */
1862             return __tandf((sign ? x + t3pio2 : x - t3pio2), 1);
1863         else
1864             return __tandf((sign ? x + t4pio2 : x - t4pio2), 0);
1865     }
1866
1867     /* tan(Inf or NaN) is NaN */
1868     if (isinf(x))
1869         return math_error(_DOMAIN, "tanf", x, 0, x - x);
1870     if (ix >= 0x7f800000)
1871         return x - x;
1872
1873     /* argument reduction */
1874     n = __rem_pio2f(x, &y);
1875     return __tandf(y, n & 1);
1876 }
1877
1878 /*********************************************************************
1879  *      tanhf (MSVCRT.@)
1880  */
1881 float CDECL tanhf( float x )
1882 {
1883     UINT32 ui = *(UINT32*)&x;
1884     UINT32 sign = ui & 0x80000000;
1885     float t;
1886
1887     /* x = |x| */
1888     ui &= 0x7fffffff;
1889     x = *(float*)&ui;
1890
1891     if (ui > 0x3f0c9f54) {
1892         /* |x| > log(3)/2 ~= 0.5493 or nan */
1893         if (ui > 0x41200000) {
1894             if (ui > 0x7f800000) {
1895                 *(UINT32*)&x = ui | sign | 0x400000;
1896 #if _MSVCR_VER < 140
1897                 return math_error(_DOMAIN, "tanhf", x, 0, x);
1898 #else
1899                 return x;
1900 #endif
1901             }
1902             /* |x| > 10 */
1903             fp_barrierf(x + 0x1p120f);
1904             t = 1 + 0 / x;
1905         } else {
1906             t = __expm1f(2 * x);
1907             t = 1 - 2 / (t + 2);
1908         }
1909     } else if (ui > 0x3e82c578) {
1910         /* |x| > log(5/3)/2 ~= 0.2554 */
1911         t = __expm1f(2 * x);
1912         t = t / (t + 2);
1913     } else if (ui >= 0x00800000) {
1914         /* |x| >= 0x1p-126 */
1915         t = __expm1f(-2 * x);
1916         t = -t / (t + 2);
1917     } else {
1918         /* |x| is subnormal */
1919         fp_barrierf(x * x);
1920         t = x;
1921     }
1922     return sign ? -t : t;
1923 }
1924
1925 /*********************************************************************
1926  *      ceilf (MSVCRT.@)
1927  *
1928  * Copied from musl: src/math/ceilf.c
1929  */
1930 float CDECL ceilf( float x )
1931 {
1932     union {float f; UINT32 i;} u = {x};
1933     int e = (int)(u.i >> 23 & 0xff) - 0x7f;
1934     UINT32 m;
1935
1936     if (e >= 23)
1937         return x;
1938     if (e >= 0) {
1939         m = 0x007fffff >> e;
1940         if ((u.i & m) == 0)
1941             return x;
1942         if (u.i >> 31 == 0)
1943             u.i += m;
1944         u.i &= ~m;
1945     } else {
1946         if (u.i >> 31)
1947             return -0.0;
1948         else if (u.i << 1)
1949             return 1.0;
1950     }
1951     return u.f;
1952 }
1953
1954 /*********************************************************************
1955  *      floorf (MSVCRT.@)
1956  *
1957  * Copied from musl: src/math/floorf.c
1958  */
1959 float CDECL floorf( float x )
1960 {
1961     union {float f; UINT32 i;} u = {x};
1962     int e = (int)(u.i >> 23 & 0xff) - 0x7f;
1963     UINT32 m;
1964
1965     if (e >= 23)
1966         return x;
1967     if (e >= 0) {
1968         m = 0x007fffff >> e;
1969         if ((u.i & m) == 0)
1970             return x;
1971         if (u.i >> 31)
1972             u.i += m;
1973         u.i &= ~m;
1974     } else {
1975         if (u.i >> 31 == 0)
1976             return 0;
1977         else if (u.i << 1)
1978             return -1;
1979     }
1980     return u.f;
1981 }
1982
1983 /*********************************************************************
1984  *      frexpf (MSVCRT.@)
1985  *
1986  * Copied from musl: src/math/frexpf.c
1987  */
1988 float CDECL frexpf( float x, int *e )
1989 {
1990     UINT32 ux = *(UINT32*)&x;
1991     int ee = ux >> 23 & 0xff;
1992
1993     if (!ee) {
1994         if (x) {
1995             x = frexpf(x * 0x1p64, e);
1996             *e -= 64;
1997         } else *e = 0;
1998         return x;
1999     } else if (ee == 0xff) {
2000         return x;
2001     }
2002
2003     *e = ee - 0x7e;
2004     ux &= 0x807ffffful;
2005     ux |= 0x3f000000ul;
2006     return *(float*)&ux;
2007 }
2008
2009 /*********************************************************************
2010  *      modff (MSVCRT.@)
2011  *
2012  * Copied from musl: src/math/modff.c
2013  */
2014 float CDECL modff( float x, float *iptr )
2015 {
2016     union {float f; UINT32 i;} u = {x};
2017     UINT32 mask;
2018     int e = (u.i >> 23 & 0xff) - 0x7f;
2019
2020     /* no fractional part */
2021     if (e >= 23) {
2022         *iptr = x;
2023         if (e == 0x80 && u.i << 9 != 0) { /* nan */
2024             return x;
2025         }
2026         u.i &= 0x80000000;
2027         return u.f;
2028     }
2029     /* no integral part */
2030     if (e < 0) {
2031         u.i &= 0x80000000;
2032         *iptr = u.f;
2033         return x;
2034     }
2035
2036     mask = 0x007fffff >> e;
2037     if ((u.i & mask) == 0) {
2038         *iptr = x;
2039         u.i &= 0x80000000;
2040         return u.f;
2041     }
2042     u.i &= ~mask;
2043     *iptr = u.f;
2044     return x - u.f;
2045 }
2046
2047 #endif
2048
2049 #if !defined(__i386__) && !defined(__x86_64__) && (_MSVCR_VER == 0 || _MSVCR_VER >= 110)
2050
2051 /*********************************************************************
2052  *      fabsf (MSVCRT.@)
2053  *
2054  * Copied from musl: src/math/fabsf.c
2055  */
2056 float CDECL fabsf( float x )
2057 {
2058     union { float f; UINT32 i; } u = { x };
2059     u.i &= 0x7fffffff;
2060     return u.f;
2061 }
2062
2063 #endif
2064
2065 /*********************************************************************
2066  *              acos (MSVCRT.@)
2067  *
2068  * Copied from musl: src/math/acos.c
2069  */
2070 static double acos_R(double z)
2071 {
2072     static const double pS0 =  1.66666666666666657415e-01,
2073                  pS1 = -3.25565818622400915405e-01,
2074                  pS2 =  2.01212532134862925881e-01,
2075                  pS3 = -4.00555345006794114027e-02,
2076                  pS4 =  7.91534994289814532176e-04,
2077                  pS5 =  3.47933107596021167570e-05,
2078                  qS1 = -2.40339491173441421878e+00,
2079                  qS2 =  2.02094576023350569471e+00,
2080                  qS3 = -6.88283971605453293030e-01,
2081                  qS4 =  7.70381505559019352791e-02;
2082
2083     double p, q;
2084     p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5)))));
2085     q = 1.0 + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4)));
2086     return p/q;
2087 }
2088
2089 double CDECL acos( double x )
2090 {
2091     static const double pio2_hi = 1.57079632679489655800e+00,
2092                  pio2_lo = 6.12323399573676603587e-17;
2093
2094     double z, w, s, c, df;
2095     unsigned int hx, ix;
2096     ULONGLONG llx;
2097
2098     hx = *(ULONGLONG*)&x >> 32;
2099     ix = hx & 0x7fffffff;
2100     /* |x| >= 1 or nan */
2101     if (ix >= 0x3ff00000) {
2102         unsigned int lx;
2103
2104         lx = *(ULONGLONG*)&x;
2105         if (((ix - 0x3ff00000) | lx) == 0) {
2106             /* acos(1)=0, acos(-1)=pi */
2107             if (hx >> 31)
2108                 return 2 * pio2_hi + 7.5231638452626401e-37;
2109             return 0;
2110         }
2111         if (isnan(x)) return x;
2112         return math_error(_DOMAIN, "acos", x, 0, 0 / (x - x));
2113     }
2114     /* |x| < 0.5 */
2115     if (ix < 0x3fe00000) {
2116         if (ix <= 0x3c600000)  /* |x| < 2**-57 */
2117             return pio2_hi + 7.5231638452626401e-37;
2118         return pio2_hi - (x - (pio2_lo - x * acos_R(x * x)));
2119     }
2120     /* x < -0.5 */
2121     if (hx >> 31) {
2122         z = (1.0 + x) * 0.5;
2123         s = sqrt(z);
2124         w = acos_R(z) * s - pio2_lo;
2125         return 2 * (pio2_hi - (s + w));
2126     }
2127     /* x > 0.5 */
2128     z = (1.0 - x) * 0.5;
2129     s = sqrt(z);
2130     df = s;
2131     llx = (*(ULONGLONG*)&df >> 32) << 32;
2132     df = *(double*)&llx;
2133     c = (z - df * df) / (s + df);
2134     w = acos_R(z) * s + c;
2135     return 2 * (df + w);
2136 }
2137
2138 /*********************************************************************
2139  *              asin (MSVCRT.@)
2140  *
2141  * Copied from musl: src/math/asin.c
2142  */
2143 static double asin_R(double z)
2144 {
2145     /* coefficients for R(x^2) */
2146     static const double pS0 =  1.66666666666666657415e-01,
2147                  pS1 = -3.25565818622400915405e-01,
2148                  pS2 =  2.01212532134862925881e-01,
2149                  pS3 = -4.00555345006794114027e-02,
2150                  pS4 =  7.91534994289814532176e-04,
2151                  pS5 =  3.47933107596021167570e-05,
2152                  qS1 = -2.40339491173441421878e+00,
2153                  qS2 =  2.02094576023350569471e+00,
2154                  qS3 = -6.88283971605453293030e-01,
2155                  qS4 =  7.70381505559019352791e-02;
2156
2157     double p, q;
2158     p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5)))));
2159     q = 1.0 + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4)));
2160     return p / q;
2161 }
2162
2163 #ifdef __i386__
2164 double CDECL x87_asin(double);
2165 __ASM_GLOBAL_FUNC( x87_asin,
2166         "fldl 4(%esp)\n\t"
2167         SET_X87_CW(~0x37f)
2168         "fld %st\n\t"
2169         "fld1\n\t"
2170         "fsubp\n\t"
2171         "fld1\n\t"
2172         "fadd %st(2)\n\t"
2173         "fmulp\n\t"
2174         "fsqrt\n\t"
2175         "fpatan\n\t"
2176         RESET_X87_CW
2177         "ret" )
2178 #endif
2179
2180 double CDECL asin( double x )
2181 {
2182     static const double pio2_hi = 1.57079632679489655800e+00,
2183                  pio2_lo = 6.12323399573676603587e-17;
2184
2185     double z, r, s;
2186     unsigned int hx, ix;
2187     ULONGLONG llx;
2188 #ifdef __i386__
2189     unsigned int x87_cw, sse2_cw;
2190 #endif
2191
2192     hx = *(ULONGLONG*)&x >> 32;
2193     ix = hx & 0x7fffffff;
2194     /* |x| >= 1 or nan */
2195     if (ix >= 0x3ff00000) {
2196         unsigned int lx;
2197         lx = *(ULONGLONG*)&x;
2198         if (((ix - 0x3ff00000) | lx) == 0)
2199             /* asin(1) = +-pi/2 with inexact */
2200             return x * pio2_hi + 7.5231638452626401e-37;
2201         if (isnan(x))
2202         {
2203 #ifdef __i386__
2204             return math_error(_DOMAIN, "asin", x, 0, x);
2205 #else
2206             return x;
2207 #endif
2208         }
2209         return math_error(_DOMAIN, "asin", x, 0, 0 / (x - x));
2210     }
2211
2212 #ifdef __i386__
2213     __control87_2(0, 0, &x87_cw, &sse2_cw);
2214     if (!sse2_enabled || (x87_cw & _MCW_EM) != _MCW_EM
2215             || (sse2_cw & (_MCW_EM | _MCW_RC)) != _MCW_EM)
2216         return x87_asin(x);
2217 #endif
2218
2219     /* |x| < 0.5 */
2220     if (ix < 0x3fe00000) {
2221         /* if 0x1p-1022 <= |x| < 0x1p-26, avoid raising underflow */
2222         if (ix < 0x3e500000 && ix >= 0x00100000)
2223             return x;
2224         return x + x * asin_R(x * x);
2225     }
2226     /* 1 > |x| >= 0.5 */
2227     z = (1 - fabs(x)) * 0.5;
2228     s = sqrt(z);
2229     r = asin_R(z);
2230     if (ix >= 0x3fef3333) {  /* if |x| > 0.975 */
2231         x = pio2_hi - (2 * (s + s * r) - pio2_lo);
2232     } else {
2233         double f, c;
2234         /* f+c = sqrt(z) */
2235         f = s;
2236         llx = (*(ULONGLONG*)&f >> 32) << 32;
2237         f = *(double*)&llx;
2238         c = (z - f * f) / (s + f);
2239         x = 0.5 * pio2_hi - (2 * s * r - (pio2_lo - 2 * c) - (0.5 * pio2_hi - 2 * f));
2240     }
2241     if (hx >> 31)
2242         return -x;
2243     return x;
2244 }
2245
2246 /*********************************************************************
2247  *              atan (MSVCRT.@)
2248  *
2249  * Copied from musl: src/math/atan.c
2250  */
2251 double CDECL atan( double x )
2252 {
2253     static const double atanhi[] = {
2254         4.63647609000806093515e-01,
2255         7.85398163397448278999e-01,
2256         9.82793723247329054082e-01,
2257         1.57079632679489655800e+00,
2258     };
2259     static const double atanlo[] = {
2260         2.26987774529616870924e-17,
2261         3.06161699786838301793e-17,
2262         1.39033110312309984516e-17,
2263         6.12323399573676603587e-17,
2264     };
2265     static const double aT[] = {
2266         3.33333333333329318027e-01,
2267         -1.99999999998764832476e-01,
2268         1.42857142725034663711e-01,
2269         -1.11111104054623557880e-01,
2270         9.09088713343650656196e-02,
2271         -7.69187620504482999495e-02,
2272         6.66107313738753120669e-02,
2273         -5.83357013379057348645e-02,
2274         4.97687799461593236017e-02,
2275         -3.65315727442169155270e-02,
2276         1.62858201153657823623e-02,
2277     };
2278
2279     double w, s1, s2, z;
2280     unsigned int ix, sign;
2281     int id;
2282
2283 #if _MSVCR_VER == 0
2284     if (isnan(x)) return math_error(_DOMAIN, "atan", x, 0, x);
2285 #endif
2286
2287     ix = *(ULONGLONG*)&x >> 32;
2288     sign = ix >> 31;
2289     ix &= 0x7fffffff;
2290     if (ix >= 0x44100000) {   /* if |x| >= 2^66 */
2291         if (isnan(x))
2292             return x;
2293         z = atanhi[3] + 7.5231638452626401e-37;
2294         return sign ? -z : z;
2295     }
2296     if (ix < 0x3fdc0000) {    /* |x| < 0.4375 */
2297         if (ix < 0x3e400000) {  /* |x| < 2^-27 */
2298             if (ix < 0x00100000)
2299                 /* raise underflow for subnormal x */
2300                 fp_barrierf((float)x);
2301             return x;
2302         }
2303         id = -1;
2304     } else {
2305         x = fabs(x);
2306         if (ix < 0x3ff30000) {  /* |x| < 1.1875 */
2307             if (ix < 0x3fe60000) {  /*  7/16 <= |x| < 11/16 */
2308                 id = 0;
2309                 x = (2.0 * x - 1.0) / (2.0 + x);
2310             } else {                /* 11/16 <= |x| < 19/16 */
2311                 id = 1;
2312                 x = (x - 1.0) / (x + 1.0);
2313             }
2314         } else {
2315             if (ix < 0x40038000) {  /* |x| < 2.4375 */
2316                 id = 2;
2317                 x = (x - 1.5) / (1.0 + 1.5 * x);
2318             } else {                /* 2.4375 <= |x| < 2^66 */
2319                 id = 3;
2320                 x = -1.0 / x;
2321             }
2322         }
2323     }
2324     /* end of argument reduction */
2325     z = x * x;
2326     w = z * z;
2327     /* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
2328     s1 = z * (aT[0] + w * (aT[2] + w * (aT[4] + w * (aT[6] + w * (aT[8] + w * aT[10])))));
2329     s2 = w * (aT[1] + w * (aT[3] + w * (aT[5] + w * (aT[7] + w * aT[9]))));
2330     if (id < 0)
2331         return x - x * (s1 + s2);
2332     z = atanhi[id] - (x * (s1 + s2) - atanlo[id] - x);
2333     return sign ? -z : z;
2334 }
2335
2336 /*********************************************************************
2337  *              atan2 (MSVCRT.@)
2338  *
2339  * Copied from musl: src/math/atan2.c
2340  */
2341 double CDECL atan2( double y, double x )
2342 {
2343     static const double pi     = 3.1415926535897931160E+00,
2344                  pi_lo  = 1.2246467991473531772E-16;
2345
2346     double z;
2347     unsigned int m, lx, ly, ix, iy;
2348
2349     if (isnan(x) || isnan(y))
2350         return x+y;
2351     ix = *(ULONGLONG*)&x >> 32;
2352     lx = *(ULONGLONG*)&x;
2353     iy = *(ULONGLONG*)&y >> 32;
2354     ly = *(ULONGLONG*)&y;
2355     if (((ix - 0x3ff00000) | lx) == 0)  /* x = 1.0 */
2356         return atan(y);
2357     m = ((iy >> 31) & 1) | ((ix >> 30) & 2);  /* 2*sign(x)+sign(y) */
2358     ix = ix & 0x7fffffff;
2359     iy = iy & 0x7fffffff;
2360
2361     /* when y = 0 */
2362     if ((iy | ly) == 0) {
2363         switch(m) {
2364         case 0:
2365         case 1: return y;   /* atan(+-0,+anything)=+-0 */
2366         case 2: return pi;  /* atan(+0,-anything) = pi */
2367         case 3: return -pi; /* atan(-0,-anything) =-pi */
2368         }
2369     }
2370     /* when x = 0 */
2371     if ((ix | lx) == 0)
2372         return m & 1 ? -pi / 2 : pi / 2;
2373     /* when x is INF */
2374     if (ix == 0x7ff00000) {
2375         if (iy == 0x7ff00000) {
2376             switch(m) {
2377             case 0: return pi / 4;      /* atan(+INF,+INF) */
2378             case 1: return -pi / 4;     /* atan(-INF,+INF) */
2379             case 2: return 3 * pi / 4;  /* atan(+INF,-INF) */
2380             case 3: return -3 * pi / 4; /* atan(-INF,-INF) */
2381             }
2382         } else {
2383             switch(m) {
2384             case 0: return 0.0;  /* atan(+...,+INF) */
2385             case 1: return -0.0; /* atan(-...,+INF) */
2386             case 2: return pi;   /* atan(+...,-INF) */
2387             case 3: return -pi;  /* atan(-...,-INF) */
2388             }
2389         }
2390     }
2391     /* |y/x| > 0x1p64 */
2392     if (ix + (64 << 20) < iy || iy == 0x7ff00000)
2393         return m & 1 ? -pi / 2 : pi / 2;
2394
2395     /* z = atan(|y/x|) without spurious underflow */
2396     if ((m & 2) && iy + (64 << 20) < ix)  /* |y/x| < 0x1p-64, x<0 */
2397         z = 0;
2398     else
2399         z = atan(fabs(y / x));
2400     switch (m) {
2401     case 0: return z;                /* atan(+,+) */
2402     case 1: return -z;               /* atan(-,+) */
2403     case 2: return pi - (z - pi_lo); /* atan(+,-) */
2404     default: /* case 3 */
2405         return (z - pi_lo) - pi;     /* atan(-,-) */
2406     }
2407 }
2408
2409 /* Copied from musl: src/math/rint.c */
2410 static double __rint(double x)
2411 {
2412     static const double toint = 1 / DBL_EPSILON;
2413
2414     ULONGLONG llx = *(ULONGLONG*)&x;
2415     int e = llx >> 52 & 0x7ff;
2416     int s = llx >> 63;
2417     unsigned cw;
2418     double y;
2419
2420     if (e >= 0x3ff+52)
2421         return x;
2422     cw = _controlfp(0, 0);
2423     if ((cw & _MCW_PC) != _PC_53)
2424         _controlfp(_PC_53, _MCW_PC);
2425     if (s)
2426         y = fp_barrier(x - toint) + toint;
2427     else
2428         y = fp_barrier(x + toint) - toint;
2429     if ((cw & _MCW_PC) != _PC_53)
2430         _controlfp(cw, _MCW_PC);
2431     if (y == 0)
2432         return s ? -0.0 : 0;
2433     return y;
2434 }
2435
2436 /* Copied from musl: src/math/__rem_pio2.c */
2437 static int __rem_pio2(double x, double *y)
2438 {
2439     static const double pio4    = 0x1.921fb54442d18p-1,
2440                  invpio2 = 6.36619772367581382433e-01,
2441                  pio2_1  = 1.57079632673412561417e+00,
2442                  pio2_1t = 6.07710050650619224932e-11,
2443                  pio2_2  = 6.07710050630396597660e-11,
2444                  pio2_2t = 2.02226624879595063154e-21,
2445                  pio2_3  = 2.02226624871116645580e-21,
2446                  pio2_3t = 8.47842766036889956997e-32;
2447
2448     union {double f; UINT64 i;} u = {x};
2449     double z, w, t, r, fn, tx[3], ty[2];
2450     UINT32 ix;
2451     int sign, n, ex, ey, i;
2452
2453     sign = u.i >> 63;
2454     ix = u.i >> 32 & 0x7fffffff;
2455     if (ix <= 0x400f6a7a) { /* |x| ~<= 5pi/4 */
2456         if ((ix & 0xfffff) == 0x921fb) /* |x| ~= pi/2 or 2pi/2 */
2457             goto medium; /* cancellation -- use medium case */
2458         if (ix <= 0x4002d97c) { /* |x| ~<= 3pi/4 */
2459             if (!sign) {
2460                 z = x - pio2_1; /* one round good to 85 bits */
2461                 y[0] = z - pio2_1t;
2462                 y[1] = (z - y[0]) - pio2_1t;
2463                 return 1;
2464             } else {
2465                 z = x + pio2_1;
2466                 y[0] = z + pio2_1t;
2467                 y[1] = (z - y[0]) + pio2_1t;
2468                 return -1;
2469             }
2470         } else {
2471             if (!sign) {
2472                 z = x - 2 * pio2_1;
2473                 y[0] = z - 2 * pio2_1t;
2474                 y[1] = (z - y[0]) - 2 * pio2_1t;
2475                 return 2;
2476             } else {
2477                 z = x + 2 * pio2_1;
2478                 y[0] = z + 2 * pio2_1t;
2479                 y[1] = (z - y[0]) + 2 * pio2_1t;
2480                 return -2;
2481             }
2482         }
2483     }
2484     if (ix <= 0x401c463b) { /* |x| ~<= 9pi/4 */
2485         if (ix <= 0x4015fdbc) { /* |x| ~<= 7pi/4 */
2486             if (ix == 0x4012d97c) /* |x| ~= 3pi/2 */
2487                 goto medium;
2488             if (!sign) {
2489                 z = x - 3 * pio2_1;
2490                 y[0] = z - 3 * pio2_1t;
2491                 y[1] = (z - y[0]) - 3 * pio2_1t;
2492                 return 3;
2493             } else {
2494                 z = x + 3 * pio2_1;
2495                 y[0] = z + 3 * pio2_1t;
2496                 y[1] = (z - y[0]) + 3 * pio2_1t;
2497                 return -3;
2498             }
2499         } else {
2500             if (ix == 0x401921fb) /* |x| ~= 4pi/2 */
2501                 goto medium;
2502             if (!sign) {
2503                 z = x - 4 * pio2_1;
2504                 y[0] = z - 4 * pio2_1t;
2505                 y[1] = (z - y[0]) - 4 * pio2_1t;
2506                 return 4;
2507             } else {
2508                 z = x + 4 * pio2_1;
2509                 y[0] = z + 4 * pio2_1t;
2510                 y[1] = (z - y[0]) + 4 * pio2_1t;
2511                 return -4;
2512             }
2513         }
2514     }
2515     if (ix < 0x413921fb) { /* |x| ~< 2^20*(pi/2), medium size */
2516 medium:
2517         fn = __rint(x * invpio2);
2518         n = (INT32)fn;
2519         r = x - fn * pio2_1;
2520         w = fn * pio2_1t; /* 1st round, good to 85 bits */
2521         /* Matters with directed rounding. */
2522         if (r - w < -pio4) {
2523             n--;
2524             fn--;
2525             r = x - fn * pio2_1;
2526             w = fn * pio2_1t;
2527         } else if (r - w > pio4) {
2528             n++;
2529             fn++;
2530             r = x - fn * pio2_1;
2531             w = fn * pio2_1t;
2532         }
2533         y[0] = r - w;
2534         u.f = y[0];
2535         ey = u.i >> 52 & 0x7ff;
2536         ex = ix >> 20;
2537         if (ex - ey > 16) { /* 2nd round, good to 118 bits */
2538             t = r;
2539             w = fn * pio2_2;
2540             r = t - w;
2541             w = fn * pio2_2t - ((t - r) - w);
2542             y[0] = r - w;
2543             u.f = y[0];
2544             ey = u.i >> 52 & 0x7ff;
2545             if (ex - ey > 49) { /* 3rd round, good to 151 bits, covers all cases */
2546                 t = r;
2547                 w = fn * pio2_3;
2548                 r = t - w;
2549                 w = fn * pio2_3t - ((t - r) - w);
2550                 y[0] = r - w;
2551             }
2552         }
2553         y[1] = (r - y[0]) - w;
2554         return n;
2555     }
2556     /*
2557      * all other (large) arguments
2558      */
2559     if (ix >= 0x7ff00000) {  /* x is inf or NaN */
2560         y[0] = y[1] = x - x;
2561         return 0;
2562     }
2563     /* set z = scalbn(|x|,-ilogb(x)+23) */
2564     u.f = x;
2565     u.i &= (UINT64)-1 >> 12;
2566     u.i |= (UINT64)(0x3ff + 23) << 52;
2567     z = u.f;
2568     for (i = 0; i < 2; i++) {
2569         tx[i] = (double)(INT32)z;
2570         z = (z - tx[i]) * 0x1p24;
2571     }
2572     tx[i] = z;
2573     /* skip zero terms, first term is non-zero */
2574     while (tx[i] == 0.0)
2575         i--;
2576     n = __rem_pio2_large(tx, ty, (int)(ix >> 20) - (0x3ff + 23), i + 1, 1);
2577     if (sign) {
2578         y[0] = -ty[0];
2579         y[1] = -ty[1];
2580         return -n;
2581     }
2582     y[0] = ty[0];
2583     y[1] = ty[1];
2584     return n;
2585 }
2586
2587 /* Copied from musl: src/math/__sin.c */
2588 static double __sin(double x, double y, int iy)
2589 {
2590     static const double S1  = -1.66666666666666324348e-01,
2591                  S2  =  8.33333333332248946124e-03,
2592                  S3  = -1.98412698298579493134e-04,
2593                  S4  =  2.75573137070700676789e-06,
2594                  S5  = -2.50507602534068634195e-08,
2595                  S6  =  1.58969099521155010221e-10;
2596
2597     double z, r, v, w;
2598
2599     z = x * x;
2600     w = z * z;
2601     r = S2 + z * (S3 + z * S4) + z * w * (S5 + z * S6);
2602     v = z * x;
2603     if (iy == 0)
2604         return x + v * (S1 + z * r);
2605     else
2606         return x - ((z * (0.5 * y - v * r) - y) - v * S1);
2607 }
2608
2609 /* Copied from musl: src/math/__cos.c */
2610 static double __cos(double x, double y)
2611 {
2612     static const double C1  =  4.16666666666666019037e-02,
2613                  C2  = -1.38888888888741095749e-03,
2614                  C3  =  2.48015872894767294178e-05,
2615                  C4  = -2.75573143513906633035e-07,
2616                  C5  =  2.08757232129817482790e-09,
2617                  C6  = -1.13596475577881948265e-11;
2618     double hz, z, r, w;
2619
2620     z = x * x;
2621     w = z * z;
2622     r = z * (C1 + z * (C2 + z * C3)) + w * w * (C4 + z * (C5 + z * C6));
2623     hz = 0.5 * z;
2624     w = 1.0 - hz;
2625     return w + (((1.0 - w) - hz) + (z * r - x * y));
2626 }
2627
2628 /*********************************************************************
2629  *              cos (MSVCRT.@)
2630  *
2631  * Copied from musl: src/math/cos.c
2632  */
2633 double CDECL cos( double x )
2634 {
2635     double y[2];
2636     UINT32 ix;
2637     unsigned n;
2638
2639     ix = *(ULONGLONG*)&x >> 32;
2640     ix &= 0x7fffffff;
2641
2642     /* |x| ~< pi/4 */
2643     if (ix <= 0x3fe921fb) {
2644         if (ix < 0x3e46a09e) { /* |x| < 2**-27 * sqrt(2) */
2645             /* raise inexact if x!=0 */
2646             fp_barrier(x + 0x1p120f);
2647             return 1.0;
2648         }
2649         return __cos(x, 0);
2650     }
2651
2652     /* cos(Inf or NaN) is NaN */
2653     if (isinf(x)) return math_error(_DOMAIN, "cos", x, 0, x - x);
2654     if (ix >= 0x7ff00000)
2655         return x - x;
2656
2657     /* argument reduction */
2658     n = __rem_pio2(x, y);
2659     switch (n & 3) {
2660     case 0: return __cos(y[0], y[1]);
2661     case 1: return -__sin(y[0], y[1], 1);
2662     case 2: return -__cos(y[0], y[1]);
2663     default: return __sin(y[0], y[1], 1);
2664     }
2665 }
2666
2667 /* Copied from musl: src/math/expm1.c */
2668 static double CDECL __expm1(double x)
2669 {
2670     static const double o_threshold = 7.09782712893383973096e+02,
2671         ln2_hi = 6.93147180369123816490e-01,
2672         ln2_lo = 1.90821492927058770002e-10,
2673         invln2 = 1.44269504088896338700e+00,
2674         Q1 = -3.33333333333331316428e-02,
2675         Q2 = 1.58730158725481460165e-03,
2676         Q3 = -7.93650757867487942473e-05,
2677         Q4 = 4.00821782732936239552e-06,
2678         Q5 = -2.01099218183624371326e-07;
2679
2680     double y, hi, lo, c, t, e, hxs, hfx, r1, twopk;
2681     union {double f; UINT64 i;} u = {x};
2682     UINT32 hx = u.i >> 32 & 0x7fffffff;
2683     int k, sign = u.i >> 63;
2684
2685     /* filter out huge and non-finite argument */
2686     if (hx >= 0x4043687A) { /* if |x|>=56*ln2 */
2687         if (isnan(x))
2688             return x;
2689         if (isinf(x))
2690             return sign ? -1 : x;
2691         if (sign)
2692             return math_error(_UNDERFLOW, "exp", x, 0, -1);
2693         if (x > o_threshold)
2694             return math_error(_OVERFLOW, "exp", x, 0, x * 0x1p1023);
2695     }
2696
2697     /* argument reduction */
2698     if (hx > 0x3fd62e42) { /* if |x| > 0.5 ln2 */
2699         if (hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */
2700             if (!sign) {
2701                 hi = x - ln2_hi;
2702                 lo = ln2_lo;
2703                 k = 1;
2704             } else {
2705                 hi = x + ln2_hi;
2706                 lo = -ln2_lo;
2707                 k = -1;
2708             }
2709         } else {
2710             k = invln2 * x + (sign ? -0.5 : 0.5);
2711             t = k;
2712             hi = x - t * ln2_hi; /* t*ln2_hi is exact here */
2713             lo = t * ln2_lo;
2714         }
2715         x = hi - lo;
2716         c = (hi - x) - lo;
2717     } else if (hx < 0x3c900000) { /* |x| < 2**-54, return x */
2718         fp_barrier(x + 0x1p120f);
2719         if (hx < 0x00100000)
2720             fp_barrier((float)x);
2721         return x;
2722     } else
2723         k = 0;
2724
2725     /* x is now in primary range */
2726     hfx = 0.5 * x;
2727     hxs = x * hfx;
2728     r1 = 1.0 + hxs * (Q1 + hxs * (Q2 + hxs * (Q3 + hxs * (Q4 + hxs * Q5))));
2729     t = 3.0 - r1 * hfx;
2730     e = hxs * ((r1 - t) / (6.0 - x * t));
2731     if (k == 0) /* c is 0 */
2732         return x - (x * e - hxs);
2733     e = x * (e - c) - c;
2734     e -= hxs;
2735     /* exp(x) ~ 2^k (x_reduced - e + 1) */
2736     if (k == -1)
2737         return 0.5 * (x - e) - 0.5;
2738     if (k == 1) {
2739         if (x < -0.25)
2740             return -2.0 * (e - (x + 0.5));
2741         return 1.0 + 2.0 * (x - e);
2742     }
2743     u.i = (UINT64)(0x3ff + k) << 52; /* 2^k */
2744     twopk = u.f;
2745     if (k < 0 || k > 56) { /* suffice to return exp(x)-1 */
2746         y = x - e + 1.0;
2747         if (k == 1024)
2748             y = y * 2.0 * 0x1p1023;
2749         else
2750             y = y * twopk;
2751         return y - 1.0;
2752     }
2753     u.i = (UINT64)(0x3ff - k) << 52; /* 2^-k */
2754     if (k < 20)
2755         y = (x - e + (1 - u.f)) * twopk;
2756     else
2757         y = (x - (e + u.f) + 1) * twopk;
2758     return y;
2759 }
2760
2761 static double __expo2(double x, double sign)
2762 {
2763     static const int k = 2043;
2764     static const double kln2 = 0x1.62066151add8bp+10;
2765     double scale;
2766
2767     *(UINT64*)&scale = (UINT64)(0x3ff + k / 2) << 52;
2768     return exp(x - kln2) * (sign * scale) * scale;
2769 }
2770
2771 /*********************************************************************
2772  *              cosh (MSVCRT.@)
2773  *
2774  * Copied from musl: src/math/cosh.c
2775  */
2776 double CDECL cosh( double x )
2777 {
2778     UINT64 ux = *(UINT64*)&x;
2779     UINT32 w;
2780     double t;
2781
2782     /* |x| */
2783     ux &= (uint64_t)-1 / 2;
2784     x = *(double*)&ux;
2785     w = ux >> 32;
2786
2787     /* |x| < log(2) */
2788     if (w < 0x3fe62e42) {
2789         if (w < 0x3ff00000 - (26 << 20)) {
2790             fp_barrier(x + 0x1p120f);
2791             return 1;
2792         }
2793         t = __expm1(x);
2794         return 1 + t * t / (2 * (1 + t));
2795     }
2796
2797     /* |x| < log(DBL_MAX) */
2798     if (w < 0x40862e42) {
2799         t = exp(x);
2800         /* note: if x>log(0x1p26) then the 1/t is not needed */
2801         return 0.5 * (t + 1 / t);
2802     }
2803
2804     /* |x| > log(DBL_MAX) or nan */
2805     /* note: the result is stored to handle overflow */
2806     t = __expo2(x, 1.0);
2807     return t;
2808 }
2809
2810 /* Copied from musl: src/math/exp_data.c */
2811 static const UINT64 exp_T[] = {
2812     0x0ULL, 0x3ff0000000000000ULL,
2813     0x3c9b3b4f1a88bf6eULL, 0x3feff63da9fb3335ULL,
2814     0xbc7160139cd8dc5dULL, 0x3fefec9a3e778061ULL,
2815     0xbc905e7a108766d1ULL, 0x3fefe315e86e7f85ULL,
2816     0x3c8cd2523567f613ULL, 0x3fefd9b0d3158574ULL,
2817     0xbc8bce8023f98efaULL, 0x3fefd06b29ddf6deULL,
2818     0x3c60f74e61e6c861ULL, 0x3fefc74518759bc8ULL,
2819     0x3c90a3e45b33d399ULL, 0x3fefbe3ecac6f383ULL,
2820     0x3c979aa65d837b6dULL, 0x3fefb5586cf9890fULL,
2821     0x3c8eb51a92fdeffcULL, 0x3fefac922b7247f7ULL,
2822     0x3c3ebe3d702f9cd1ULL, 0x3fefa3ec32d3d1a2ULL,
2823     0xbc6a033489906e0bULL, 0x3fef9b66affed31bULL,
2824     0xbc9556522a2fbd0eULL, 0x3fef9301d0125b51ULL,
2825     0xbc5080ef8c4eea55ULL, 0x3fef8abdc06c31ccULL,
2826     0xbc91c923b9d5f416ULL, 0x3fef829aaea92de0ULL,
2827     0x3c80d3e3e95c55afULL, 0x3fef7a98c8a58e51ULL,
2828     0xbc801b15eaa59348ULL, 0x3fef72b83c7d517bULL,
2829     0xbc8f1ff055de323dULL, 0x3fef6af9388c8deaULL,
2830     0x3c8b898c3f1353bfULL, 0x3fef635beb6fcb75ULL,
2831     0xbc96d99c7611eb26ULL, 0x3fef5be084045cd4ULL,
2832     0x3c9aecf73e3a2f60ULL, 0x3fef54873168b9aaULL,
2833     0xbc8fe782cb86389dULL, 0x3fef4d5022fcd91dULL,
2834     0x3c8a6f4144a6c38dULL, 0x3fef463b88628cd6ULL,
2835     0x3c807a05b0e4047dULL, 0x3fef3f49917ddc96ULL,
2836     0x3c968efde3a8a894ULL, 0x3fef387a6e756238ULL,
2837     0x3c875e18f274487dULL, 0x3fef31ce4fb2a63fULL,
2838     0x3c80472b981fe7f2ULL, 0x3fef2b4565e27cddULL,
2839     0xbc96b87b3f71085eULL, 0x3fef24dfe1f56381ULL,
2840     0x3c82f7e16d09ab31ULL, 0x3fef1e9df51fdee1ULL,
2841     0xbc3d219b1a6fbffaULL, 0x3fef187fd0dad990ULL,
2842     0x3c8b3782720c0ab4ULL, 0x3fef1285a6e4030bULL,
2843     0x3c6e149289cecb8fULL, 0x3fef0cafa93e2f56ULL,
2844     0x3c834d754db0abb6ULL, 0x3fef06fe0a31b715ULL,
2845     0x3c864201e2ac744cULL, 0x3fef0170fc4cd831ULL,
2846     0x3c8fdd395dd3f84aULL, 0x3feefc08b26416ffULL,
2847     0xbc86a3803b8e5b04ULL, 0x3feef6c55f929ff1ULL,
2848     0xbc924aedcc4b5068ULL, 0x3feef1a7373aa9cbULL,
2849     0xbc9907f81b512d8eULL, 0x3feeecae6d05d866ULL,
2850     0xbc71d1e83e9436d2ULL, 0x3feee7db34e59ff7ULL,
2851     0xbc991919b3ce1b15ULL, 0x3feee32dc313a8e5ULL,
2852     0x3c859f48a72a4c6dULL, 0x3feedea64c123422ULL,
2853     0xbc9312607a28698aULL, 0x3feeda4504ac801cULL,
2854     0xbc58a78f4817895bULL, 0x3feed60a21f72e2aULL,
2855     0xbc7c2c9b67499a1bULL, 0x3feed1f5d950a897ULL,
2856     0x3c4363ed60c2ac11ULL, 0x3feece086061892dULL,
2857     0x3c9666093b0664efULL, 0x3feeca41ed1d0057ULL,
2858     0x3c6ecce1daa10379ULL, 0x3feec6a2b5c13cd0ULL,
2859     0x3c93ff8e3f0f1230ULL, 0x3feec32af0d7d3deULL,
2860     0x3c7690cebb7aafb0ULL, 0x3feebfdad5362a27ULL,
2861     0x3c931dbdeb54e077ULL, 0x3feebcb299fddd0dULL,
2862     0xbc8f94340071a38eULL, 0x3feeb9b2769d2ca7ULL,
2863     0xbc87deccdc93a349ULL, 0x3feeb6daa2cf6642ULL,
2864     0xbc78dec6bd0f385fULL, 0x3feeb42b569d4f82ULL,
2865     0xbc861246ec7b5cf6ULL, 0x3feeb1a4ca5d920fULL,
2866     0x3c93350518fdd78eULL, 0x3feeaf4736b527daULL,
2867     0x3c7b98b72f8a9b05ULL, 0x3feead12d497c7fdULL,
2868     0x3c9063e1e21c5409ULL, 0x3feeab07dd485429ULL,
2869     0x3c34c7855019c6eaULL, 0x3feea9268a5946b7ULL,
2870     0x3c9432e62b64c035ULL, 0x3feea76f15ad2148ULL,
2871     0xbc8ce44a6199769fULL, 0x3feea5e1b976dc09ULL,
2872     0xbc8c33c53bef4da8ULL, 0x3feea47eb03a5585ULL,
2873     0xbc845378892be9aeULL, 0x3feea34634ccc320ULL,
2874     0xbc93cedd78565858ULL, 0x3feea23882552225ULL,
2875     0x3c5710aa807e1964ULL, 0x3feea155d44ca973ULL,
2876     0xbc93b3efbf5e2228ULL, 0x3feea09e667f3bcdULL,
2877     0xbc6a12ad8734b982ULL, 0x3feea012750bdabfULL,
2878     0xbc6367efb86da9eeULL, 0x3fee9fb23c651a2fULL,
2879     0xbc80dc3d54e08851ULL, 0x3fee9f7df9519484ULL,
2880     0xbc781f647e5a3ecfULL, 0x3fee9f75e8ec5f74ULL,
2881     0xbc86ee4ac08b7db0ULL, 0x3fee9f9a48a58174ULL,
2882     0xbc8619321e55e68aULL, 0x3fee9feb564267c9ULL,
2883     0x3c909ccb5e09d4d3ULL, 0x3feea0694fde5d3fULL,
2884     0xbc7b32dcb94da51dULL, 0x3feea11473eb0187ULL,
2885     0x3c94ecfd5467c06bULL, 0x3feea1ed0130c132ULL,
2886     0x3c65ebe1abd66c55ULL, 0x3feea2f336cf4e62ULL,
2887     0xbc88a1c52fb3cf42ULL, 0x3feea427543e1a12ULL,
2888     0xbc9369b6f13b3734ULL, 0x3feea589994cce13ULL,
2889     0xbc805e843a19ff1eULL, 0x3feea71a4623c7adULL,
2890     0xbc94d450d872576eULL, 0x3feea8d99b4492edULL,
2891     0x3c90ad675b0e8a00ULL, 0x3feeaac7d98a6699ULL,
2892     0x3c8db72fc1f0eab4ULL, 0x3feeace5422aa0dbULL,
2893     0xbc65b6609cc5e7ffULL, 0x3feeaf3216b5448cULL,
2894     0x3c7bf68359f35f44ULL, 0x3feeb1ae99157736ULL,
2895     0xbc93091fa71e3d83ULL, 0x3feeb45b0b91ffc6ULL,
2896     0xbc5da9b88b6c1e29ULL, 0x3feeb737b0cdc5e5ULL,
2897     0xbc6c23f97c90b959ULL, 0x3feeba44cbc8520fULL,
2898     0xbc92434322f4f9aaULL, 0x3feebd829fde4e50ULL,
2899     0xbc85ca6cd7668e4bULL, 0x3feec0f170ca07baULL,
2900     0x3c71affc2b91ce27ULL, 0x3feec49182a3f090ULL,
2901     0x3c6dd235e10a73bbULL, 0x3feec86319e32323ULL,
2902     0xbc87c50422622263ULL, 0x3feecc667b5de565ULL,
2903     0x3c8b1c86e3e231d5ULL, 0x3feed09bec4a2d33ULL,
2904     0xbc91bbd1d3bcbb15ULL, 0x3feed503b23e255dULL,
2905     0x3c90cc319cee31d2ULL, 0x3feed99e1330b358ULL,
2906     0x3c8469846e735ab3ULL, 0x3feede6b5579fdbfULL,
2907     0xbc82dfcd978e9db4ULL, 0x3feee36bbfd3f37aULL,
2908     0x3c8c1a7792cb3387ULL, 0x3feee89f995ad3adULL,
2909     0xbc907b8f4ad1d9faULL, 0x3feeee07298db666ULL,
2910     0xbc55c3d956dcaebaULL, 0x3feef3a2b84f15fbULL,
2911     0xbc90a40e3da6f640ULL, 0x3feef9728de5593aULL,
2912     0xbc68d6f438ad9334ULL, 0x3feeff76f2fb5e47ULL,
2913     0xbc91eee26b588a35ULL, 0x3fef05b030a1064aULL,
2914     0x3c74ffd70a5fddcdULL, 0x3fef0c1e904bc1d2ULL,
2915     0xbc91bdfbfa9298acULL, 0x3fef12c25bd71e09ULL,
2916     0x3c736eae30af0cb3ULL, 0x3fef199bdd85529cULL,
2917     0x3c8ee3325c9ffd94ULL, 0x3fef20ab5fffd07aULL,
2918     0x3c84e08fd10959acULL, 0x3fef27f12e57d14bULL,
2919     0x3c63cdaf384e1a67ULL, 0x3fef2f6d9406e7b5ULL,
2920     0x3c676b2c6c921968ULL, 0x3fef3720dcef9069ULL,
2921     0xbc808a1883ccb5d2ULL, 0x3fef3f0b555dc3faULL,
2922     0xbc8fad5d3ffffa6fULL, 0x3fef472d4a07897cULL,
2923     0xbc900dae3875a949ULL, 0x3fef4f87080d89f2ULL,
2924     0x3c74a385a63d07a7ULL, 0x3fef5818dcfba487ULL,
2925     0xbc82919e2040220fULL, 0x3fef60e316c98398ULL,
2926     0x3c8e5a50d5c192acULL, 0x3fef69e603db3285ULL,
2927     0x3c843a59ac016b4bULL, 0x3fef7321f301b460ULL,
2928     0xbc82d52107b43e1fULL, 0x3fef7c97337b9b5fULL,
2929     0xbc892ab93b470dc9ULL, 0x3fef864614f5a129ULL,
2930     0x3c74b604603a88d3ULL, 0x3fef902ee78b3ff6ULL,
2931     0x3c83c5ec519d7271ULL, 0x3fef9a51fbc74c83ULL,
2932     0xbc8ff7128fd391f0ULL, 0x3fefa4afa2a490daULL,
2933     0xbc8dae98e223747dULL, 0x3fefaf482d8e67f1ULL,
2934     0x3c8ec3bc41aa2008ULL, 0x3fefba1bee615a27ULL,
2935     0x3c842b94c3a9eb32ULL, 0x3fefc52b376bba97ULL,
2936     0x3c8a64a931d185eeULL, 0x3fefd0765b6e4540ULL,
2937     0xbc8e37bae43be3edULL, 0x3fefdbfdad9cbe14ULL,
2938     0x3c77893b4d91cd9dULL, 0x3fefe7c1819e90d8ULL,
2939     0x3c5305c14160cc89ULL, 0x3feff3c22b8f71f1ULL
2940 };
2941
2942 /*********************************************************************
2943  *              exp (MSVCRT.@)
2944  *
2945  * Copied from musl: src/math/exp.c
2946  */
2947 double CDECL exp( double x )
2948 {
2949     static const double C[] = {
2950         0x1.ffffffffffdbdp-2,
2951         0x1.555555555543cp-3,
2952         0x1.55555cf172b91p-5,
2953         0x1.1111167a4d017p-7
2954     };
2955     static const double invln2N = 0x1.71547652b82fep0 * (1 << 7),
2956         negln2hiN = -0x1.62e42fefa0000p-8,
2957         negln2loN = -0x1.cf79abc9e3b3ap-47;
2958
2959     UINT32 abstop;
2960     UINT64 ki, idx, top, sbits;
2961     double kd, z, r, r2, scale, tail, tmp;
2962
2963     abstop = (*(UINT64*)&x >> 52) & 0x7ff;
2964     if (abstop -  0x3c9 >= 0x408 - 0x3c9) {
2965         if (abstop - 0x3c9 >= 0x80000000)
2966             /* Avoid spurious underflow for tiny x. */
2967             /* Note: 0 is common input. */
2968             return 1.0 + x;
2969         if (abstop >= 0x409) {
2970             if (*(UINT64*)&x == 0xfff0000000000000ULL)
2971                 return 0.0;
2972 #if _MSVCR_VER == 0
2973             if (*(UINT64*)&x > 0x7ff0000000000000ULL)
2974                 return math_error(_DOMAIN, "exp", x, 0, 1.0 + x);
2975 #endif
2976             if (abstop >= 0x7ff)
2977                 return 1.0 + x;
2978             if (*(UINT64*)&x >> 63)
2979                 return math_error(_UNDERFLOW, "exp", x, 0, fp_barrier(DBL_MIN) * DBL_MIN);
2980             else
2981                 return math_error(_OVERFLOW, "exp", x, 0, fp_barrier(DBL_MAX) * DBL_MAX);
2982         }
2983         /* Large x is special cased below. */
2984         abstop = 0;
2985     }
2986
2987     /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
2988     /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
2989     z = invln2N * x;
2990     kd = __round(z);
2991     ki = (INT64)kd;
2992
2993     r = x + kd * negln2hiN + kd * negln2loN;
2994     /* 2^(k/N) ~= scale * (1 + tail). */
2995     idx = 2 * (ki % (1 << 7));
2996     top = ki << (52 - 7);
2997     tail = *(double*)&exp_T[idx];
2998     /* This is only a valid scale when -1023*N < k < 1024*N. */
2999     sbits = exp_T[idx + 1] + top;
3000     /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
3001     /* Evaluation is optimized assuming superscalar pipelined execution. */
3002     r2 = r * r;
3003     /* Without fma the worst case error is 0.25/N ulp larger. */
3004     /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
3005     tmp = tail + r + r2 * (C[0] + r * C[1]) + r2 * r2 * (C[2] + r * C[3]);
3006     if (abstop == 0) {
3007         /* Handle cases that may overflow or underflow when computing the result that
3008            is scale*(1+TMP) without intermediate rounding. The bit representation of
3009            scale is in SBITS, however it has a computed exponent that may have
3010            overflown into the sign bit so that needs to be adjusted before using it as
3011            a double. (int32_t)KI is the k used in the argument reduction and exponent
3012            adjustment of scale, positive k here means the result may overflow and
3013            negative k means the result may underflow. */
3014         double scale, y;
3015
3016         if ((ki & 0x80000000) == 0) {
3017             /* k > 0, the exponent of scale might have overflowed by <= 460. */
3018             sbits -= 1009ull << 52;
3019             scale = *(double*)&sbits;
3020             y = 0x1p1009 * (scale + scale * tmp);
3021             if (isinf(y))
3022                 return math_error(_OVERFLOW, "exp", x, 0, y);
3023             return y;
3024         }
3025         /* k < 0, need special care in the subnormal range. */
3026         sbits += 1022ull << 52;
3027         scale = *(double*)&sbits;
3028         y = scale + scale * tmp;
3029         if (y < 1.0) {
3030             /* Round y to the right precision before scaling it into the subnormal
3031                range to avoid double rounding that can cause 0.5+E/2 ulp error where
3032                E is the worst-case ulp error outside the subnormal range. So this
3033                is only useful if the goal is better than 1 ulp worst-case error. */
3034             double hi, lo;
3035             lo = scale - y + scale * tmp;
3036             hi = 1.0 + y;
3037             lo = 1.0 - hi + y + lo;
3038             y = hi + lo - 1.0;
3039             /* Avoid -0.0 with downward rounding. */
3040             if (y == 0.0)
3041                 y = 0.0;
3042             /* The underflow exception needs to be signaled explicitly. */
3043             fp_barrier(fp_barrier(0x1p-1022) * 0x1p-1022);
3044             y = 0x1p-1022 * y;
3045             return math_error(_UNDERFLOW, "exp", x, 0, y);
3046         }
3047         y = 0x1p-1022 * y;
3048         return y;
3049     }
3050     scale = *(double*)&sbits;
3051     /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
3052        is no spurious underflow here even without fma. */
3053     return scale + scale * tmp;
3054 }
3055
3056 /*********************************************************************
3057  *              fmod (MSVCRT.@)
3058  *
3059  * Copied from musl: src/math/fmod.c
3060  */
3061 double CDECL fmod( double x, double y )
3062 {
3063     UINT64 xi = *(UINT64*)&x;
3064     UINT64 yi = *(UINT64*)&y;
3065     int ex = xi >> 52 & 0x7ff;
3066     int ey = yi >> 52 & 0x7ff;
3067     int sx = xi >> 63;
3068     UINT64 i;
3069
3070     if (isinf(x)) return math_error(_DOMAIN, "fmod", x, y, (x * y) / (x * y));
3071     if (yi << 1 == 0 || isnan(y) || ex == 0x7ff)
3072         return (x * y) / (x * y);
3073     if (xi << 1 <= yi << 1) {
3074         if (xi << 1 == yi << 1)
3075             return 0 * x;
3076         return x;
3077     }
3078
3079     /* normalize x and y */
3080     if (!ex) {
3081         for (i = xi << 12; i >> 63 == 0; ex--, i <<= 1);
3082         xi <<= -ex + 1;
3083     } else {
3084         xi &= -1ULL >> 12;
3085         xi |= 1ULL << 52;
3086     }
3087     if (!ey) {
3088         for (i = yi << 12; i >> 63 == 0; ey--, i <<= 1);
3089         yi <<= -ey + 1;
3090     } else {
3091         yi &= -1ULL >> 12;
3092         yi |= 1ULL << 52;
3093     }
3094
3095     /* x mod y */
3096     for (; ex > ey; ex--) {
3097         i = xi - yi;
3098         if (i >> 63 == 0) {
3099             if (i == 0)
3100                 return 0 * x;
3101             xi = i;
3102         }
3103         xi <<= 1;
3104     }
3105     i = xi - yi;
3106     if (i >> 63 == 0) {
3107         if (i == 0)
3108             return 0 * x;
3109         xi = i;
3110     }
3111     for (; xi >> 52 == 0; xi <<= 1, ex--);
3112
3113     /* scale result */
3114     if (ex > 0) {
3115         xi -= 1ULL << 52;
3116         xi |= (UINT64)ex << 52;
3117     } else {
3118         xi >>= -ex + 1;
3119     }
3120     xi |= (UINT64)sx << 63;
3121     return *(double*)&xi;
3122 }
3123
3124 /*********************************************************************
3125  *              log (MSVCRT.@)
3126  *
3127  * Copied from musl: src/math/log.c src/math/log_data.c
3128  */
3129 double CDECL log( double x )
3130 {
3131     static const double Ln2hi = 0x1.62e42fefa3800p-1,
3132         Ln2lo = 0x1.ef35793c76730p-45;
3133     static const double A[] = {
3134         -0x1.0000000000001p-1,
3135         0x1.555555551305bp-2,
3136         -0x1.fffffffeb459p-3,
3137         0x1.999b324f10111p-3,
3138         -0x1.55575e506c89fp-3
3139     };
3140     static const double B[] = {
3141         -0x1p-1,
3142         0x1.5555555555577p-2,
3143         -0x1.ffffffffffdcbp-3,
3144         0x1.999999995dd0cp-3,
3145         -0x1.55555556745a7p-3,
3146         0x1.24924a344de3p-3,
3147         -0x1.fffffa4423d65p-4,
3148         0x1.c7184282ad6cap-4,
3149         -0x1.999eb43b068ffp-4,
3150         0x1.78182f7afd085p-4,
3151         -0x1.5521375d145cdp-4
3152     };
3153     static const struct {
3154         double invc, logc;
3155     } T[] = {
3156         {0x1.734f0c3e0de9fp+0, -0x1.7cc7f79e69000p-2},
3157         {0x1.713786a2ce91fp+0, -0x1.76feec20d0000p-2},
3158         {0x1.6f26008fab5a0p+0, -0x1.713e31351e000p-2},
3159         {0x1.6d1a61f138c7dp+0, -0x1.6b85b38287800p-2},
3160         {0x1.6b1490bc5b4d1p+0, -0x1.65d5590807800p-2},
3161         {0x1.69147332f0cbap+0, -0x1.602d076180000p-2},
3162         {0x1.6719f18224223p+0, -0x1.5a8ca86909000p-2},
3163         {0x1.6524f99a51ed9p+0, -0x1.54f4356035000p-2},
3164         {0x1.63356aa8f24c4p+0, -0x1.4f637c36b4000p-2},
3165         {0x1.614b36b9ddc14p+0, -0x1.49da7fda85000p-2},
3166         {0x1.5f66452c65c4cp+0, -0x1.445923989a800p-2},
3167         {0x1.5d867b5912c4fp+0, -0x1.3edf439b0b800p-2},
3168         {0x1.5babccb5b90dep+0, -0x1.396ce448f7000p-2},
3169         {0x1.59d61f2d91a78p+0, -0x1.3401e17bda000p-2},
3170         {0x1.5805612465687p+0, -0x1.2e9e2ef468000p-2},
3171         {0x1.56397cee76bd3p+0, -0x1.2941b3830e000p-2},
3172         {0x1.54725e2a77f93p+0, -0x1.23ec58cda8800p-2},
3173         {0x1.52aff42064583p+0, -0x1.1e9e129279000p-2},
3174         {0x1.50f22dbb2bddfp+0, -0x1.1956d2b48f800p-2},
3175         {0x1.4f38f4734ded7p+0, -0x1.141679ab9f800p-2},
3176         {0x1.4d843cfde2840p+0, -0x1.0edd094ef9800p-2},
3177         {0x1.4bd3ec078a3c8p+0, -0x1.09aa518db1000p-2},
3178         {0x1.4a27fc3e0258ap+0, -0x1.047e65263b800p-2},
3179         {0x1.4880524d48434p+0, -0x1.feb224586f000p-3},
3180         {0x1.46dce1b192d0bp+0, -0x1.f474a7517b000p-3},
3181         {0x1.453d9d3391854p+0, -0x1.ea4443d103000p-3},
3182         {0x1.43a2744b4845ap+0, -0x1.e020d44e9b000p-3},
3183         {0x1.420b54115f8fbp+0, -0x1.d60a22977f000p-3},
3184         {0x1.40782da3ef4b1p+0, -0x1.cc00104959000p-3},
3185         {0x1.3ee8f5d57fe8fp+0, -0x1.c202956891000p-3},
3186         {0x1.3d5d9a00b4ce9p+0, -0x1.b81178d811000p-3},
3187         {0x1.3bd60c010c12bp+0, -0x1.ae2c9ccd3d000p-3},
3188         {0x1.3a5242b75dab8p+0, -0x1.a45402e129000p-3},
3189         {0x1.38d22cd9fd002p+0, -0x1.9a877681df000p-3},
3190         {0x1.3755bc5847a1cp+0, -0x1.90c6d69483000p-3},
3191         {0x1.35dce49ad36e2p+0, -0x1.87120a645c000p-3},
3192         {0x1.34679984dd440p+0, -0x1.7d68fb4143000p-3},
3193         {0x1.32f5cceffcb24p+0, -0x1.73cb83c627000p-3},
3194         {0x1.3187775a10d49p+0, -0x1.6a39a9b376000p-3},
3195         {0x1.301c8373e3990p+0, -0x1.60b3154b7a000p-3},
3196         {0x1.2eb4ebb95f841p+0, -0x1.5737d76243000p-3},
3197         {0x1.2d50a0219a9d1p+0, -0x1.4dc7b8fc23000p-3},
3198         {0x1.2bef9a8b7fd2ap+0, -0x1.4462c51d20000p-3},
3199         {0x1.2a91c7a0c1babp+0, -0x1.3b08abc830000p-3},
3200         {0x1.293726014b530p+0, -0x1.31b996b490000p-3},
3201         {0x1.27dfa5757a1f5p+0, -0x1.2875490a44000p-3},
3202         {0x1.268b39b1d3bbfp+0, -0x1.1f3b9f879a000p-3},
3203         {0x1.2539d838ff5bdp+0, -0x1.160c8252ca000p-3},
3204         {0x1.23eb7aac9083bp+0, -0x1.0ce7f57f72000p-3},
3205         {0x1.22a012ba940b6p+0, -0x1.03cdc49fea000p-3},
3206         {0x1.2157996cc4132p+0, -0x1.f57bdbc4b8000p-4},
3207         {0x1.201201dd2fc9bp+0, -0x1.e370896404000p-4},
3208         {0x1.1ecf4494d480bp+0, -0x1.d17983ef94000p-4},
3209         {0x1.1d8f5528f6569p+0, -0x1.bf9674ed8a000p-4},
3210         {0x1.1c52311577e7cp+0, -0x1.adc79202f6000p-4},
3211         {0x1.1b17c74cb26e9p+0, -0x1.9c0c3e7288000p-4},
3212         {0x1.19e010c2c1ab6p+0, -0x1.8a646b372c000p-4},
3213         {0x1.18ab07bb670bdp+0, -0x1.78d01b3ac0000p-4},
3214         {0x1.1778a25efbcb6p+0, -0x1.674f145380000p-4},
3215         {0x1.1648d354c31dap+0, -0x1.55e0e6d878000p-4},
3216         {0x1.151b990275fddp+0, -0x1.4485cdea1e000p-4},
3217         {0x1.13f0ea432d24cp+0, -0x1.333d94d6aa000p-4},
3218         {0x1.12c8b7210f9dap+0, -0x1.22079f8c56000p-4},
3219         {0x1.11a3028ecb531p+0, -0x1.10e4698622000p-4},
3220         {0x1.107fbda8434afp+0, -0x1.ffa6c6ad20000p-5},
3221         {0x1.0f5ee0f4e6bb3p+0, -0x1.dda8d4a774000p-5},
3222         {0x1.0e4065d2a9fcep+0, -0x1.bbcece4850000p-5},
3223         {0x1.0d244632ca521p+0, -0x1.9a1894012c000p-5},
3224         {0x1.0c0a77ce2981ap+0, -0x1.788583302c000p-5},
3225         {0x1.0af2f83c636d1p+0, -0x1.5715e67d68000p-5},
3226         {0x1.09ddb98a01339p+0, -0x1.35c8a49658000p-5},
3227         {0x1.08cabaf52e7dfp+0, -0x1.149e364154000p-5},
3228         {0x1.07b9f2f4e28fbp+0, -0x1.e72c082eb8000p-6},
3229         {0x1.06ab58c358f19p+0, -0x1.a55f152528000p-6},
3230         {0x1.059eea5ecf92cp+0, -0x1.63d62cf818000p-6},
3231         {0x1.04949cdd12c90p+0, -0x1.228fb8caa0000p-6},
3232         {0x1.038c6c6f0ada9p+0, -0x1.c317b20f90000p-7},
3233         {0x1.02865137932a9p+0, -0x1.419355daa0000p-7},
3234         {0x1.0182427ea7348p+0, -0x1.81203c2ec0000p-8},
3235         {0x1.008040614b195p+0, -0x1.0040979240000p-9},
3236         {0x1.fe01ff726fa1ap-1, 0x1.feff384900000p-9},
3237         {0x1.fa11cc261ea74p-1, 0x1.7dc41353d0000p-7},
3238         {0x1.f6310b081992ep-1, 0x1.3cea3c4c28000p-6},
3239         {0x1.f25f63ceeadcdp-1, 0x1.b9fc114890000p-6},
3240         {0x1.ee9c8039113e7p-1, 0x1.1b0d8ce110000p-5},
3241         {0x1.eae8078cbb1abp-1, 0x1.58a5bd001c000p-5},
3242         {0x1.e741aa29d0c9bp-1, 0x1.95c8340d88000p-5},
3243         {0x1.e3a91830a99b5p-1, 0x1.d276aef578000p-5},
3244         {0x1.e01e009609a56p-1, 0x1.07598e598c000p-4},
3245         {0x1.dca01e577bb98p-1, 0x1.253f5e30d2000p-4},
3246         {0x1.d92f20b7c9103p-1, 0x1.42edd8b380000p-4},
3247         {0x1.d5cac66fb5ccep-1, 0x1.606598757c000p-4},
3248         {0x1.d272caa5ede9dp-1, 0x1.7da76356a0000p-4},
3249         {0x1.cf26e3e6b2ccdp-1, 0x1.9ab434e1c6000p-4},
3250         {0x1.cbe6da2a77902p-1, 0x1.b78c7bb0d6000p-4},
3251         {0x1.c8b266d37086dp-1, 0x1.d431332e72000p-4},
3252         {0x1.c5894bd5d5804p-1, 0x1.f0a3171de6000p-4},
3253         {0x1.c26b533bb9f8cp-1, 0x1.067152b914000p-3},
3254         {0x1.bf583eeece73fp-1, 0x1.147858292b000p-3},
3255         {0x1.bc4fd75db96c1p-1, 0x1.2266ecdca3000p-3},
3256         {0x1.b951e0c864a28p-1, 0x1.303d7a6c55000p-3},
3257         {0x1.b65e2c5ef3e2cp-1, 0x1.3dfc33c331000p-3},
3258         {0x1.b374867c9888bp-1, 0x1.4ba366b7a8000p-3},
3259         {0x1.b094b211d304ap-1, 0x1.5933928d1f000p-3},
3260         {0x1.adbe885f2ef7ep-1, 0x1.66acd2418f000p-3},
3261         {0x1.aaf1d31603da2p-1, 0x1.740f8ec669000p-3},
3262         {0x1.a82e63fd358a7p-1, 0x1.815c0f51af000p-3},
3263         {0x1.a5740ef09738bp-1, 0x1.8e92954f68000p-3},
3264         {0x1.a2c2a90ab4b27p-1, 0x1.9bb3602f84000p-3},
3265         {0x1.a01a01393f2d1p-1, 0x1.a8bed1c2c0000p-3},
3266         {0x1.9d79f24db3c1bp-1, 0x1.b5b515c01d000p-3},
3267         {0x1.9ae2505c7b190p-1, 0x1.c2967ccbcc000p-3},
3268         {0x1.9852ef297ce2fp-1, 0x1.cf635d5486000p-3},
3269         {0x1.95cbaeea44b75p-1, 0x1.dc1bd3446c000p-3},
3270         {0x1.934c69de74838p-1, 0x1.e8c01b8cfe000p-3},
3271         {0x1.90d4f2f6752e6p-1, 0x1.f5509c0179000p-3},
3272         {0x1.8e6528effd79dp-1, 0x1.00e6c121fb800p-2},
3273         {0x1.8bfce9fcc007cp-1, 0x1.071b80e93d000p-2},
3274         {0x1.899c0dabec30ep-1, 0x1.0d46b9e867000p-2},
3275         {0x1.87427aa2317fbp-1, 0x1.13687334bd000p-2},
3276         {0x1.84f00acb39a08p-1, 0x1.1980d67234800p-2},
3277         {0x1.82a49e8653e55p-1, 0x1.1f8ffe0cc8000p-2},
3278         {0x1.8060195f40260p-1, 0x1.2595fd7636800p-2},
3279         {0x1.7e22563e0a329p-1, 0x1.2b9300914a800p-2},
3280         {0x1.7beb377dcb5adp-1, 0x1.3187210436000p-2},
3281         {0x1.79baa679725c2p-1, 0x1.377266dec1800p-2},
3282         {0x1.77907f2170657p-1, 0x1.3d54ffbaf3000p-2},
3283         {0x1.756cadbd6130cp-1, 0x1.432eee32fe000p-2}
3284     };
3285     static const struct {
3286         double chi, clo;
3287     } T2[] = {
3288         {0x1.61000014fb66bp-1, 0x1.e026c91425b3cp-56},
3289         {0x1.63000034db495p-1, 0x1.dbfea48005d41p-55},
3290         {0x1.650000d94d478p-1, 0x1.e7fa786d6a5b7p-55},
3291         {0x1.67000074e6fadp-1, 0x1.1fcea6b54254cp-57},
3292         {0x1.68ffffedf0faep-1, -0x1.c7e274c590efdp-56},
3293         {0x1.6b0000763c5bcp-1, -0x1.ac16848dcda01p-55},
3294         {0x1.6d0001e5cc1f6p-1, 0x1.33f1c9d499311p-55},
3295         {0x1.6efffeb05f63ep-1, -0x1.e80041ae22d53p-56},
3296         {0x1.710000e86978p-1, 0x1.bff6671097952p-56},
3297         {0x1.72ffffc67e912p-1, 0x1.c00e226bd8724p-55},
3298         {0x1.74fffdf81116ap-1, -0x1.e02916ef101d2p-57},
3299         {0x1.770000f679c9p-1, -0x1.7fc71cd549c74p-57},
3300         {0x1.78ffffa7ec835p-1, 0x1.1bec19ef50483p-55},
3301         {0x1.7affffe20c2e6p-1, -0x1.07e1729cc6465p-56},
3302         {0x1.7cfffed3fc9p-1, -0x1.08072087b8b1cp-55},
3303         {0x1.7efffe9261a76p-1, 0x1.dc0286d9df9aep-55},
3304         {0x1.81000049ca3e8p-1, 0x1.97fd251e54c33p-55},
3305         {0x1.8300017932c8fp-1, -0x1.afee9b630f381p-55},
3306         {0x1.850000633739cp-1, 0x1.9bfbf6b6535bcp-55},
3307         {0x1.87000204289c6p-1, -0x1.bbf65f3117b75p-55},
3308         {0x1.88fffebf57904p-1, -0x1.9006ea23dcb57p-55},
3309         {0x1.8b00022bc04dfp-1, -0x1.d00df38e04b0ap-56},
3310         {0x1.8cfffe50c1b8ap-1, -0x1.8007146ff9f05p-55},
3311         {0x1.8effffc918e43p-1, 0x1.3817bd07a7038p-55},
3312         {0x1.910001efa5fc7p-1, 0x1.93e9176dfb403p-55},
3313         {0x1.9300013467bb9p-1, 0x1.f804e4b980276p-56},
3314         {0x1.94fffe6ee076fp-1, -0x1.f7ef0d9ff622ep-55},
3315         {0x1.96fffde3c12d1p-1, -0x1.082aa962638bap-56},
3316         {0x1.98ffff4458a0dp-1, -0x1.7801b9164a8efp-55},
3317         {0x1.9afffdd982e3ep-1, -0x1.740e08a5a9337p-55},
3318         {0x1.9cfffed49fb66p-1, 0x1.fce08c19bep-60},
3319         {0x1.9f00020f19c51p-1, -0x1.a3faa27885b0ap-55},
3320         {0x1.a10001145b006p-1, 0x1.4ff489958da56p-56},
3321         {0x1.a300007bbf6fap-1, 0x1.cbeab8a2b6d18p-55},
3322         {0x1.a500010971d79p-1, 0x1.8fecadd78793p-55},
3323         {0x1.a70001df52e48p-1, -0x1.f41763dd8abdbp-55},
3324         {0x1.a90001c593352p-1, -0x1.ebf0284c27612p-55},
3325         {0x1.ab0002a4f3e4bp-1, -0x1.9fd043cff3f5fp-57},
3326         {0x1.acfffd7ae1ed1p-1, -0x1.23ee7129070b4p-55},
3327         {0x1.aefffee510478p-1, 0x1.a063ee00edea3p-57},
3328         {0x1.b0fffdb650d5bp-1, 0x1.a06c8381f0ab9p-58},
3329         {0x1.b2ffffeaaca57p-1, -0x1.9011e74233c1dp-56},
3330         {0x1.b4fffd995badcp-1, -0x1.9ff1068862a9fp-56},
3331         {0x1.b7000249e659cp-1, 0x1.aff45d0864f3ep-55},
3332         {0x1.b8ffff987164p-1, 0x1.cfe7796c2c3f9p-56},
3333         {0x1.bafffd204cb4fp-1, -0x1.3ff27eef22bc4p-57},
3334         {0x1.bcfffd2415c45p-1, -0x1.cffb7ee3bea21p-57},
3335         {0x1.beffff86309dfp-1, -0x1.14103972e0b5cp-55},
3336         {0x1.c0fffe1b57653p-1, 0x1.bc16494b76a19p-55},
3337         {0x1.c2ffff1fa57e3p-1, -0x1.4feef8d30c6edp-57},
3338         {0x1.c4fffdcbfe424p-1, -0x1.43f68bcec4775p-55},
3339         {0x1.c6fffed54b9f7p-1, 0x1.47ea3f053e0ecp-55},
3340         {0x1.c8fffeb998fd5p-1, 0x1.383068df992f1p-56},
3341         {0x1.cb0002125219ap-1, -0x1.8fd8e64180e04p-57},
3342         {0x1.ccfffdd94469cp-1, 0x1.e7ebe1cc7ea72p-55},
3343         {0x1.cefffeafdc476p-1, 0x1.ebe39ad9f88fep-55},
3344         {0x1.d1000169af82bp-1, 0x1.57d91a8b95a71p-56},
3345         {0x1.d30000d0ff71dp-1, 0x1.9c1906970c7dap-55},
3346         {0x1.d4fffea790fc4p-1, -0x1.80e37c558fe0cp-58},
3347         {0x1.d70002edc87e5p-1, -0x1.f80d64dc10f44p-56},
3348         {0x1.d900021dc82aap-1, -0x1.47c8f94fd5c5cp-56},
3349         {0x1.dafffd86b0283p-1, 0x1.c7f1dc521617ep-55},
3350         {0x1.dd000296c4739p-1, 0x1.8019eb2ffb153p-55},
3351         {0x1.defffe54490f5p-1, 0x1.e00d2c652cc89p-57},
3352         {0x1.e0fffcdabf694p-1, -0x1.f8340202d69d2p-56},
3353         {0x1.e2fffdb52c8ddp-1, 0x1.b00c1ca1b0864p-56},
3354         {0x1.e4ffff24216efp-1, 0x1.2ffa8b094ab51p-56},
3355         {0x1.e6fffe88a5e11p-1, -0x1.7f673b1efbe59p-58},
3356         {0x1.e9000119eff0dp-1, -0x1.4808d5e0bc801p-55},
3357         {0x1.eafffdfa51744p-1, 0x1.80006d54320b5p-56},
3358         {0x1.ed0001a127fa1p-1, -0x1.002f860565c92p-58},
3359         {0x1.ef00007babcc4p-1, -0x1.540445d35e611p-55},
3360         {0x1.f0ffff57a8d02p-1, -0x1.ffb3139ef9105p-59},
3361         {0x1.f30001ee58ac7p-1, 0x1.a81acf2731155p-55},
3362         {0x1.f4ffff5823494p-1, 0x1.a3f41d4d7c743p-55},
3363         {0x1.f6ffffca94c6bp-1, -0x1.202f41c987875p-57},
3364         {0x1.f8fffe1f9c441p-1, 0x1.77dd1f477e74bp-56},
3365         {0x1.fafffd2e0e37ep-1, -0x1.f01199a7ca331p-57},
3366         {0x1.fd0001c77e49ep-1, 0x1.181ee4bceacb1p-56},
3367         {0x1.feffff7e0c331p-1, -0x1.e05370170875ap-57},
3368         {0x1.00ffff465606ep+0, -0x1.a7ead491c0adap-55},
3369         {0x1.02ffff3867a58p+0, -0x1.77f69c3fcb2ep-54},
3370         {0x1.04ffffdfc0d17p+0, 0x1.7bffe34cb945bp-54},
3371         {0x1.0700003cd4d82p+0, 0x1.20083c0e456cbp-55},
3372         {0x1.08ffff9f2cbe8p+0, -0x1.dffdfbe37751ap-57},
3373         {0x1.0b000010cda65p+0, -0x1.13f7faee626ebp-54},
3374         {0x1.0d00001a4d338p+0, 0x1.07dfa79489ff7p-55},
3375         {0x1.0effffadafdfdp+0, -0x1.7040570d66bcp-56},
3376         {0x1.110000bbafd96p+0, 0x1.e80d4846d0b62p-55},
3377         {0x1.12ffffae5f45dp+0, 0x1.dbffa64fd36efp-54},
3378         {0x1.150000dd59ad9p+0, 0x1.a0077701250aep-54},
3379         {0x1.170000f21559ap+0, 0x1.dfdf9e2e3deeep-55},
3380         {0x1.18ffffc275426p+0, 0x1.10030dc3b7273p-54},
3381         {0x1.1b000123d3c59p+0, 0x1.97f7980030188p-54},
3382         {0x1.1cffff8299eb7p+0, -0x1.5f932ab9f8c67p-57},
3383         {0x1.1effff48ad4p+0, 0x1.37fbf9da75bebp-54},
3384         {0x1.210000c8b86a4p+0, 0x1.f806b91fd5b22p-54},
3385         {0x1.2300003854303p+0, 0x1.3ffc2eb9fbf33p-54},
3386         {0x1.24fffffbcf684p+0, 0x1.601e77e2e2e72p-56},
3387         {0x1.26ffff52921d9p+0, 0x1.ffcbb767f0c61p-56},
3388         {0x1.2900014933a3cp+0, -0x1.202ca3c02412bp-56},
3389         {0x1.2b00014556313p+0, -0x1.2808233f21f02p-54},
3390         {0x1.2cfffebfe523bp+0, -0x1.8ff7e384fdcf2p-55},
3391         {0x1.2f0000bb8ad96p+0, -0x1.5ff51503041c5p-55},
3392         {0x1.30ffffb7ae2afp+0, -0x1.10071885e289dp-55},
3393         {0x1.32ffffeac5f7fp+0, -0x1.1ff5d3fb7b715p-54},
3394         {0x1.350000ca66756p+0, 0x1.57f82228b82bdp-54},
3395         {0x1.3700011fbf721p+0, 0x1.000bac40dd5ccp-55},
3396         {0x1.38ffff9592fb9p+0, -0x1.43f9d2db2a751p-54},
3397         {0x1.3b00004ddd242p+0, 0x1.57f6b707638e1p-55},
3398         {0x1.3cffff5b2c957p+0, 0x1.a023a10bf1231p-56},
3399         {0x1.3efffeab0b418p+0, 0x1.87f6d66b152bp-54},
3400         {0x1.410001532aff4p+0, 0x1.7f8375f198524p-57},
3401         {0x1.4300017478b29p+0, 0x1.301e672dc5143p-55},
3402         {0x1.44fffe795b463p+0, 0x1.9ff69b8b2895ap-55},
3403         {0x1.46fffe80475ep+0, -0x1.5c0b19bc2f254p-54},
3404         {0x1.48fffef6fc1e7p+0, 0x1.b4009f23a2a72p-54},
3405         {0x1.4afffe5bea704p+0, -0x1.4ffb7bf0d7d45p-54},
3406         {0x1.4d000171027dep+0, -0x1.9c06471dc6a3dp-54},
3407         {0x1.4f0000ff03ee2p+0, 0x1.77f890b85531cp-54},
3408         {0x1.5100012dc4bd1p+0, 0x1.004657166a436p-57},
3409         {0x1.530001605277ap+0, -0x1.6bfcece233209p-54},
3410         {0x1.54fffecdb704cp+0, -0x1.902720505a1d7p-55},
3411         {0x1.56fffef5f54a9p+0, 0x1.bbfe60ec96412p-54},
3412         {0x1.5900017e61012p+0, 0x1.87ec581afef9p-55},
3413         {0x1.5b00003c93e92p+0, -0x1.f41080abf0ccp-54},
3414         {0x1.5d0001d4919bcp+0, -0x1.8812afb254729p-54},
3415         {0x1.5efffe7b87a89p+0, -0x1.47eb780ed6904p-54}
3416     };
3417
3418     double w, z, r, r2, r3, y, invc, logc, kd, hi, lo;
3419     UINT64 ix, iz, tmp;
3420     UINT32 top;
3421     int k, i;
3422
3423     ix = *(UINT64*)&x;
3424     top = ix >> 48;
3425     if (ix - 0x3fee000000000000ULL < 0x3090000000000ULL) {
3426         double rhi, rlo;
3427
3428         /* Handle close to 1.0 inputs separately. */
3429         /* Fix sign of zero with downward rounding when x==1. */
3430         if (ix == 0x3ff0000000000000ULL)
3431             return 0;
3432         r = x - 1.0;
3433         r2 = r * r;
3434         r3 = r * r2;
3435         y = r3 * (B[1] + r * B[2] + r2 * B[3] + r3 * (B[4] + r * B[5] + r2 * B[6] +
3436                     r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10])));
3437         /* Worst-case error is around 0.507 ULP. */
3438         w = r * 0x1p27;
3439         rhi = r + w - w;
3440         rlo = r - rhi;
3441         w = rhi * rhi * B[0]; /* B[0] == -0.5. */
3442         hi = r + w;
3443         lo = r - hi + w;
3444         lo += B[0] * rlo * (rhi + r);
3445         y += lo;
3446         y += hi;
3447         return y;
3448     }
3449     if (top - 0x0010 >= 0x7ff0 - 0x0010) {
3450         /* x < 0x1p-1022 or inf or nan. */
3451         if (ix * 2 == 0)
3452             return math_error(_SING, "log", x, 0, (top & 0x8000 ? 1.0 : -1.0) / x);
3453         if (ix == 0x7ff0000000000000ULL) /* log(inf) == inf. */
3454             return x;
3455         if ((top & 0x7ff0) == 0x7ff0 && (ix & 0xfffffffffffffULL))
3456             return x;
3457         if (top & 0x8000)
3458             return math_error(_DOMAIN, "log", x, 0, (x - x) / (x - x));
3459         /* x is subnormal, normalize it. */
3460         x *= 0x1p52;
3461         ix = *(UINT64*)&x;
3462         ix -= 52ULL << 52;
3463     }
3464
3465     /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
3466        The range is split into N subintervals.
3467        The ith subinterval contains z and c is near its center. */
3468     tmp = ix - 0x3fe6000000000000ULL;
3469     i = (tmp >> (52 - 7)) % (1 << 7);
3470     k = (INT64)tmp >> 52; /* arithmetic shift */
3471     iz = ix - (tmp & 0xfffULL << 52);
3472     invc = T[i].invc;
3473     logc = T[i].logc;
3474     z = *(double*)&iz;
3475
3476     /* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */
3477     /* r ~= z/c - 1, |r| < 1/(2*N). */
3478     r = (z - T2[i].chi - T2[i].clo) * invc;
3479     kd = (double)k;
3480
3481     /* hi + lo = r + log(c) + k*Ln2. */
3482     w = kd * Ln2hi + logc;
3483     hi = w + r;
3484     lo = w - hi + r + kd * Ln2lo;
3485
3486     /* log(x) = lo + (log1p(r) - r) + hi. */
3487     r2 = r * r; /* rounding error: 0x1p-54/N^2. */
3488     /* Worst case error if |y| > 0x1p-5:
3489        0.5 + 4.13/N + abs-poly-error*2^57 ULP (+ 0.002 ULP without fma)
3490        Worst case error if |y| > 0x1p-4:
3491        0.5 + 2.06/N + abs-poly-error*2^56 ULP (+ 0.001 ULP without fma). */
3492     y = lo + r2 * A[0] +
3493         r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])) + hi;
3494     return y;
3495 }
3496
3497 /*********************************************************************
3498  *              log10 (MSVCRT.@)
3499  */
3500 double CDECL log10( double x )
3501 {
3502     static const double ivln10hi = 4.34294481878168880939e-01,
3503         ivln10lo = 2.50829467116452752298e-11,
3504         log10_2hi = 3.01029995663611771306e-01,
3505         log10_2lo = 3.69423907715893078616e-13,
3506         Lg1 = 6.666666666666735130e-01,
3507         Lg2 = 3.999999999940941908e-01,
3508         Lg3 = 2.857142874366239149e-01,
3509         Lg4 = 2.222219843214978396e-01,
3510         Lg5 = 1.818357216161805012e-01,
3511         Lg6 = 1.531383769920937332e-01,
3512         Lg7 = 1.479819860511658591e-01;
3513
3514     union {double f; UINT64 i;} u = {x};
3515     double hfsq, f, s, z, R, w, t1, t2, dk, y, hi, lo, val_hi, val_lo;
3516     UINT32 hx;
3517     int k;
3518
3519     hx = u.i >> 32;
3520     k = 0;
3521     if (hx < 0x00100000 || hx >> 31) {
3522         if (u.i << 1 == 0)
3523             return math_error(_SING, "log10", x, 0, -1 / (x * x));
3524         if ((u.i & ~(1ULL << 63)) > 0x7ff0000000000000ULL)
3525             return x;
3526         if (hx >> 31)
3527             return math_error(_DOMAIN, "log10", x, 0, (x - x) / (x - x));
3528         /* subnormal number, scale x up */
3529         k -= 54;
3530         x *= 0x1p54;
3531         u.f = x;
3532         hx = u.i >> 32;
3533     } else if (hx >= 0x7ff00000) {
3534         return x;
3535     } else if (hx == 0x3ff00000 && u.i<<32 == 0)
3536         return 0;
3537
3538     /* reduce x into [sqrt(2)/2, sqrt(2)] */
3539     hx += 0x3ff00000 - 0x3fe6a09e;
3540     k += (int)(hx >> 20) - 0x3ff;
3541     hx = (hx & 0x000fffff) + 0x3fe6a09e;
3542     u.i = (UINT64)hx << 32 | (u.i & 0xffffffff);
3543     x = u.f;
3544
3545     f = x - 1.0;
3546     hfsq = 0.5 * f * f;
3547     s = f / (2.0 + f);
3548     z = s * s;
3549     w = z * z;
3550     t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
3551     t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
3552     R = t2 + t1;
3553
3554     /* hi+lo = f - hfsq + s*(hfsq+R) ~ log(1+f) */
3555     hi = f - hfsq;
3556     u.f = hi;
3557     u.i &= (UINT64)-1 << 32;
3558     hi = u.f;
3559     lo = f - hi - hfsq + s * (hfsq + R);
3560
3561     /* val_hi+val_lo ~ log10(1+f) + k*log10(2) */
3562     val_hi = hi * ivln10hi;
3563     dk = k;
3564     y = dk * log10_2hi;
3565     val_lo = dk * log10_2lo + (lo + hi) * ivln10lo + lo * ivln10hi;
3566
3567     /*
3568      * Extra precision in for adding y is not strictly needed
3569      * since there is no very large cancellation near x = sqrt(2) or
3570      * x = 1/sqrt(2), but we do it anyway since it costs little on CPUs
3571      * with some parallelism and it reduces the error for many args.
3572      */
3573     w = y + val_hi;
3574     val_lo += (y - w) + val_hi;
3575     val_hi = w;
3576
3577     return val_lo + val_hi;
3578 }
3579
3580 /* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about
3581    additional 15 bits precision. IX is the bit representation of x, but
3582    normalized in the subnormal range using the sign bit for the exponent. */
3583 static double pow_log(UINT64 ix, double *tail)
3584 {
3585     static const struct {
3586         double invc, logc, logctail;
3587     } T[] = {
3588         {0x1.6a00000000000p+0, -0x1.62c82f2b9c800p-2, 0x1.ab42428375680p-48},
3589         {0x1.6800000000000p+0, -0x1.5d1bdbf580800p-2, -0x1.ca508d8e0f720p-46},
3590         {0x1.6600000000000p+0, -0x1.5767717455800p-2, -0x1.362a4d5b6506dp-45},
3591         {0x1.6400000000000p+0, -0x1.51aad872df800p-2, -0x1.684e49eb067d5p-49},
3592         {0x1.6200000000000p+0, -0x1.4be5f95777800p-2, -0x1.41b6993293ee0p-47},
3593         {0x1.6000000000000p+0, -0x1.4618bc21c6000p-2, 0x1.3d82f484c84ccp-46},
3594         {0x1.5e00000000000p+0, -0x1.404308686a800p-2, 0x1.c42f3ed820b3ap-50},
3595         {0x1.5c00000000000p+0, -0x1.3a64c55694800p-2, 0x1.0b1c686519460p-45},
3596         {0x1.5a00000000000p+0, -0x1.347dd9a988000p-2, 0x1.5594dd4c58092p-45},
3597         {0x1.5800000000000p+0, -0x1.2e8e2bae12000p-2, 0x1.67b1e99b72bd8p-45},
3598         {0x1.5600000000000p+0, -0x1.2895a13de8800p-2, 0x1.5ca14b6cfb03fp-46},
3599         {0x1.5600000000000p+0, -0x1.2895a13de8800p-2, 0x1.5ca14b6cfb03fp-46},
3600         {0x1.5400000000000p+0, -0x1.22941fbcf7800p-2, -0x1.65a242853da76p-46},
3601         {0x1.5200000000000p+0, -0x1.1c898c1699800p-2, -0x1.fafbc68e75404p-46},
3602         {0x1.5000000000000p+0, -0x1.1675cababa800p-2, 0x1.f1fc63382a8f0p-46},
3603         {0x1.4e00000000000p+0, -0x1.1058bf9ae4800p-2, -0x1.6a8c4fd055a66p-45},
3604         {0x1.4c00000000000p+0, -0x1.0a324e2739000p-2, -0x1.c6bee7ef4030ep-47},
3605         {0x1.4a00000000000p+0, -0x1.0402594b4d000p-2, -0x1.036b89ef42d7fp-48},
3606         {0x1.4a00000000000p+0, -0x1.0402594b4d000p-2, -0x1.036b89ef42d7fp-48},
3607         {0x1.4800000000000p+0, -0x1.fb9186d5e4000p-3, 0x1.d572aab993c87p-47},
3608         {0x1.4600000000000p+0, -0x1.ef0adcbdc6000p-3, 0x1.b26b79c86af24p-45},
3609         {0x1.4400000000000p+0, -0x1.e27076e2af000p-3, -0x1.72f4f543fff10p-46},
3610         {0x1.4200000000000p+0, -0x1.d5c216b4fc000p-3, 0x1.1ba91bbca681bp-45},
3611         {0x1.4000000000000p+0, -0x1.c8ff7c79aa000p-3, 0x1.7794f689f8434p-45},
3612         {0x1.4000000000000p+0, -0x1.c8ff7c79aa000p-3, 0x1.7794f689f8434p-45},
3613         {0x1.3e00000000000p+0, -0x1.bc286742d9000p-3, 0x1.94eb0318bb78fp-46},
3614         {0x1.3c00000000000p+0, -0x1.af3c94e80c000p-3, 0x1.a4e633fcd9066p-52},
3615         {0x1.3a00000000000p+0, -0x1.a23bc1fe2b000p-3, -0x1.58c64dc46c1eap-45},
3616         {0x1.3a00000000000p+0, -0x1.a23bc1fe2b000p-3, -0x1.58c64dc46c1eap-45},
3617         {0x1.3800000000000p+0, -0x1.9525a9cf45000p-3, -0x1.ad1d904c1d4e3p-45},
3618         {0x1.3600000000000p+0, -0x1.87fa06520d000p-3, 0x1.bbdbf7fdbfa09p-45},
3619         {0x1.3400000000000p+0, -0x1.7ab890210e000p-3, 0x1.bdb9072534a58p-45},
3620         {0x1.3400000000000p+0, -0x1.7ab890210e000p-3, 0x1.bdb9072534a58p-45},
3621         {0x1.3200000000000p+0, -0x1.6d60fe719d000p-3, -0x1.0e46aa3b2e266p-46},
3622         {0x1.3000000000000p+0, -0x1.5ff3070a79000p-3, -0x1.e9e439f105039p-46},
3623         {0x1.3000000000000p+0, -0x1.5ff3070a79000p-3, -0x1.e9e439f105039p-46},
3624         {0x1.2e00000000000p+0, -0x1.526e5e3a1b000p-3, -0x1.0de8b90075b8fp-45},
3625         {0x1.2c00000000000p+0, -0x1.44d2b6ccb8000p-3, 0x1.70cc16135783cp-46},
3626         {0x1.2c00000000000p+0, -0x1.44d2b6ccb8000p-3, 0x1.70cc16135783cp-46},
3627         {0x1.2a00000000000p+0, -0x1.371fc201e9000p-3, 0x1.178864d27543ap-48},
3628         {0x1.2800000000000p+0, -0x1.29552f81ff000p-3, -0x1.48d301771c408p-45},
3629         {0x1.2600000000000p+0, -0x1.1b72ad52f6000p-3, -0x1.e80a41811a396p-45},
3630         {0x1.2600000000000p+0, -0x1.1b72ad52f6000p-3, -0x1.e80a41811a396p-45},
3631         {0x1.2400000000000p+0, -0x1.0d77e7cd09000p-3, 0x1.a699688e85bf4p-47},
3632         {0x1.2400000000000p+0, -0x1.0d77e7cd09000p-3, 0x1.a699688e85bf4p-47},
3633         {0x1.2200000000000p+0, -0x1.fec9131dbe000p-4, -0x1.575545ca333f2p-45},
3634         {0x1.2000000000000p+0, -0x1.e27076e2b0000p-4, 0x1.a342c2af0003cp-45},
3635         {0x1.2000000000000p+0, -0x1.e27076e2b0000p-4, 0x1.a342c2af0003cp-45},
3636         {0x1.1e00000000000p+0, -0x1.c5e548f5bc000p-4, -0x1.d0c57585fbe06p-46},
3637         {0x1.1c00000000000p+0, -0x1.a926d3a4ae000p-4, 0x1.53935e85baac8p-45},
3638         {0x1.1c00000000000p+0, -0x1.a926d3a4ae000p-4, 0x1.53935e85baac8p-45},
3639         {0x1.1a00000000000p+0, -0x1.8c345d631a000p-4, 0x1.37c294d2f5668p-46},
3640         {0x1.1a00000000000p+0, -0x1.8c345d631a000p-4, 0x1.37c294d2f5668p-46},
3641         {0x1.1800000000000p+0, -0x1.6f0d28ae56000p-4, -0x1.69737c93373dap-45},
3642         {0x1.1600000000000p+0, -0x1.51b073f062000p-4, 0x1.f025b61c65e57p-46},
3643         {0x1.1600000000000p+0, -0x1.51b073f062000p-4, 0x1.f025b61c65e57p-46},
3644         {0x1.1400000000000p+0, -0x1.341d7961be000p-4, 0x1.c5edaccf913dfp-45},
3645         {0x1.1400000000000p+0, -0x1.341d7961be000p-4, 0x1.c5edaccf913dfp-45},
3646         {0x1.1200000000000p+0, -0x1.16536eea38000p-4, 0x1.47c5e768fa309p-46},
3647         {0x1.1000000000000p+0, -0x1.f0a30c0118000p-5, 0x1.d599e83368e91p-45},
3648         {0x1.1000000000000p+0, -0x1.f0a30c0118000p-5, 0x1.d599e83368e91p-45},
3649         {0x1.0e00000000000p+0, -0x1.b42dd71198000p-5, 0x1.c827ae5d6704cp-46},
3650         {0x1.0e00000000000p+0, -0x1.b42dd71198000p-5, 0x1.c827ae5d6704cp-46},
3651         {0x1.0c00000000000p+0, -0x1.77458f632c000p-5, -0x1.cfc4634f2a1eep-45},
3652         {0x1.0c00000000000p+0, -0x1.77458f632c000p-5, -0x1.cfc4634f2a1eep-45},
3653         {0x1.0a00000000000p+0, -0x1.39e87b9fec000p-5, 0x1.502b7f526feaap-48},
3654         {0x1.0a00000000000p+0, -0x1.39e87b9fec000p-5, 0x1.502b7f526feaap-48},
3655         {0x1.0800000000000p+0, -0x1.f829b0e780000p-6, -0x1.980267c7e09e4p-45},
3656         {0x1.0800000000000p+0, -0x1.f829b0e780000p-6, -0x1.980267c7e09e4p-45},
3657         {0x1.0600000000000p+0, -0x1.7b91b07d58000p-6, -0x1.88d5493faa639p-45},
3658         {0x1.0400000000000p+0, -0x1.fc0a8b0fc0000p-7, -0x1.f1e7cf6d3a69cp-50},
3659         {0x1.0400000000000p+0, -0x1.fc0a8b0fc0000p-7, -0x1.f1e7cf6d3a69cp-50},
3660         {0x1.0200000000000p+0, -0x1.fe02a6b100000p-8, -0x1.9e23f0dda40e4p-46},
3661         {0x1.0200000000000p+0, -0x1.fe02a6b100000p-8, -0x1.9e23f0dda40e4p-46},
3662         {0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x0.0000000000000p+0},
3663         {0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x0.0000000000000p+0},
3664         {0x1.fc00000000000p-1, 0x1.0101575890000p-7, -0x1.0c76b999d2be8p-46},
3665         {0x1.f800000000000p-1, 0x1.0205658938000p-6, -0x1.3dc5b06e2f7d2p-45},
3666         {0x1.f400000000000p-1, 0x1.8492528c90000p-6, -0x1.aa0ba325a0c34p-45},
3667         {0x1.f000000000000p-1, 0x1.0415d89e74000p-5, 0x1.111c05cf1d753p-47},
3668         {0x1.ec00000000000p-1, 0x1.466aed42e0000p-5, -0x1.c167375bdfd28p-45},
3669         {0x1.e800000000000p-1, 0x1.894aa149fc000p-5, -0x1.97995d05a267dp-46},
3670         {0x1.e400000000000p-1, 0x1.ccb73cdddc000p-5, -0x1.a68f247d82807p-46},
3671         {0x1.e200000000000p-1, 0x1.eea31c006c000p-5, -0x1.e113e4fc93b7bp-47},
3672         {0x1.de00000000000p-1, 0x1.1973bd1466000p-4, -0x1.5325d560d9e9bp-45},
3673         {0x1.da00000000000p-1, 0x1.3bdf5a7d1e000p-4, 0x1.cc85ea5db4ed7p-45},
3674         {0x1.d600000000000p-1, 0x1.5e95a4d97a000p-4, -0x1.c69063c5d1d1ep-45},
3675         {0x1.d400000000000p-1, 0x1.700d30aeac000p-4, 0x1.c1e8da99ded32p-49},
3676         {0x1.d000000000000p-1, 0x1.9335e5d594000p-4, 0x1.3115c3abd47dap-45},
3677         {0x1.cc00000000000p-1, 0x1.b6ac88dad6000p-4, -0x1.390802bf768e5p-46},
3678         {0x1.ca00000000000p-1, 0x1.c885801bc4000p-4, 0x1.646d1c65aacd3p-45},
3679         {0x1.c600000000000p-1, 0x1.ec739830a2000p-4, -0x1.dc068afe645e0p-45},
3680         {0x1.c400000000000p-1, 0x1.fe89139dbe000p-4, -0x1.534d64fa10afdp-45},
3681         {0x1.c000000000000p-1, 0x1.1178e8227e000p-3, 0x1.1ef78ce2d07f2p-45},
3682         {0x1.be00000000000p-1, 0x1.1aa2b7e23f000p-3, 0x1.ca78e44389934p-45},
3683         {0x1.ba00000000000p-1, 0x1.2d1610c868000p-3, 0x1.39d6ccb81b4a1p-47},
3684         {0x1.b800000000000p-1, 0x1.365fcb0159000p-3, 0x1.62fa8234b7289p-51},
3685         {0x1.b400000000000p-1, 0x1.4913d8333b000p-3, 0x1.5837954fdb678p-45},
3686         {0x1.b200000000000p-1, 0x1.527e5e4a1b000p-3, 0x1.633e8e5697dc7p-45},
3687         {0x1.ae00000000000p-1, 0x1.6574ebe8c1000p-3, 0x1.9cf8b2c3c2e78p-46},
3688         {0x1.ac00000000000p-1, 0x1.6f0128b757000p-3, -0x1.5118de59c21e1p-45},
3689         {0x1.aa00000000000p-1, 0x1.7898d85445000p-3, -0x1.c661070914305p-46},
3690         {0x1.a600000000000p-1, 0x1.8beafeb390000p-3, -0x1.73d54aae92cd1p-47},
3691         {0x1.a400000000000p-1, 0x1.95a5adcf70000p-3, 0x1.7f22858a0ff6fp-47},
3692         {0x1.a000000000000p-1, 0x1.a93ed3c8ae000p-3, -0x1.8724350562169p-45},
3693         {0x1.9e00000000000p-1, 0x1.b31d8575bd000p-3, -0x1.c358d4eace1aap-47},
3694         {0x1.9c00000000000p-1, 0x1.bd087383be000p-3, -0x1.d4bc4595412b6p-45},
3695         {0x1.9a00000000000p-1, 0x1.c6ffbc6f01000p-3, -0x1.1ec72c5962bd2p-48},
3696         {0x1.9600000000000p-1, 0x1.db13db0d49000p-3, -0x1.aff2af715b035p-45},
3697         {0x1.9400000000000p-1, 0x1.e530effe71000p-3, 0x1.212276041f430p-51},
3698         {0x1.9200000000000p-1, 0x1.ef5ade4dd0000p-3, -0x1.a211565bb8e11p-51},
3699         {0x1.9000000000000p-1, 0x1.f991c6cb3b000p-3, 0x1.bcbecca0cdf30p-46},
3700         {0x1.8c00000000000p-1, 0x1.07138604d5800p-2, 0x1.89cdb16ed4e91p-48},
3701         {0x1.8a00000000000p-1, 0x1.0c42d67616000p-2, 0x1.7188b163ceae9p-45},
3702         {0x1.8800000000000p-1, 0x1.1178e8227e800p-2, -0x1.c210e63a5f01cp-45},
3703         {0x1.8600000000000p-1, 0x1.16b5ccbacf800p-2, 0x1.b9acdf7a51681p-45},
3704         {0x1.8400000000000p-1, 0x1.1bf99635a6800p-2, 0x1.ca6ed5147bdb7p-45},
3705         {0x1.8200000000000p-1, 0x1.214456d0eb800p-2, 0x1.a87deba46baeap-47},
3706         {0x1.7e00000000000p-1, 0x1.2bef07cdc9000p-2, 0x1.a9cfa4a5004f4p-45},
3707         {0x1.7c00000000000p-1, 0x1.314f1e1d36000p-2, -0x1.8e27ad3213cb8p-45},
3708         {0x1.7a00000000000p-1, 0x1.36b6776be1000p-2, 0x1.16ecdb0f177c8p-46},
3709         {0x1.7800000000000p-1, 0x1.3c25277333000p-2, 0x1.83b54b606bd5cp-46},
3710         {0x1.7600000000000p-1, 0x1.419b423d5e800p-2, 0x1.8e436ec90e09dp-47},
3711         {0x1.7400000000000p-1, 0x1.4718dc271c800p-2, -0x1.f27ce0967d675p-45},
3712         {0x1.7200000000000p-1, 0x1.4c9e09e173000p-2, -0x1.e20891b0ad8a4p-45},
3713         {0x1.7000000000000p-1, 0x1.522ae0738a000p-2, 0x1.ebe708164c759p-45},
3714         {0x1.6e00000000000p-1, 0x1.57bf753c8d000p-2, 0x1.fadedee5d40efp-46},
3715         {0x1.6c00000000000p-1, 0x1.5d5bddf596000p-2, -0x1.a0b2a08a465dcp-47},
3716     };
3717     static const double A[] = {
3718         -0x1p-1,
3719         0x1.555555555556p-2 * -2,
3720         -0x1.0000000000006p-2 * -2,
3721         0x1.999999959554ep-3 * 4,
3722         -0x1.555555529a47ap-3 * 4,
3723         0x1.2495b9b4845e9p-3 * -8,
3724         -0x1.0002b8b263fc3p-3 * -8
3725     };
3726     static const double ln2hi = 0x1.62e42fefa3800p-1,
3727         ln2lo = 0x1.ef35793c76730p-45;
3728
3729     double z, r, y, invc, logc, logctail, kd, hi, t1, t2, lo, lo1, lo2, p;
3730     double zhi, zlo, rhi, rlo, ar, ar2, ar3, lo3, lo4, arhi, arhi2;
3731     UINT64 iz, tmp;
3732     int k, i;
3733
3734     /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
3735        The range is split into N subintervals.
3736        The ith subinterval contains z and c is near its center. */
3737     tmp = ix - 0x3fe6955500000000ULL;
3738     i = (tmp >> (52 - 7)) % (1 << 7);
3739     k = (INT64)tmp >> 52; /* arithmetic shift */
3740     iz = ix - (tmp & 0xfffULL << 52);
3741     z = *(double*)&iz;
3742     kd = k;
3743
3744     /* log(x) = k*Ln2 + log(c) + log1p(z/c-1). */
3745     invc = T[i].invc;
3746     logc = T[i].logc;
3747     logctail = T[i].logctail;
3748
3749     /* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and
3750      |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible. */
3751     /* Split z such that rhi, rlo and rhi*rhi are exact and |rlo| <= |r|. */
3752     iz = (iz + (1ULL << 31)) & (-1ULL << 32);
3753     zhi = *(double*)&iz;
3754     zlo = z - zhi;
3755     rhi = zhi * invc - 1.0;
3756     rlo = zlo * invc;
3757     r = rhi + rlo;
3758
3759     /* k*Ln2 + log(c) + r. */
3760     t1 = kd * ln2hi + logc;
3761     t2 = t1 + r;
3762     lo1 = kd * ln2lo + logctail;
3763     lo2 = t1 - t2 + r;
3764
3765     /* Evaluation is optimized assuming superscalar pipelined execution. */
3766     ar = A[0] * r; /* A[0] = -0.5. */
3767     ar2 = r * ar;
3768     ar3 = r * ar2;
3769     /* k*Ln2 + log(c) + r + A[0]*r*r. */
3770     arhi = A[0] * rhi;
3771     arhi2 = rhi * arhi;
3772     hi = t2 + arhi2;
3773     lo3 = rlo * (ar + arhi);
3774     lo4 = t2 - hi + arhi2;
3775     /* p = log1p(r) - r - A[0]*r*r. */
3776     p = (ar3 * (A[1] + r * A[2] + ar2 * (A[3] + r * A[4] + ar2 * (A[5] + r * A[6]))));
3777     lo = lo1 + lo2 + lo3 + lo4 + p;
3778     y = hi + lo;
3779     *tail = hi - y + lo;
3780     return y;
3781 }
3782
3783 /* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
3784    The sign_bias argument is SIGN_BIAS or 0 and sets the sign to -1 or 1. */
3785 static double pow_exp(double argx, double argy, double x, double xtail, UINT32 sign_bias)
3786 {
3787     static const double C[] = {
3788         0x1.ffffffffffdbdp-2,
3789         0x1.555555555543cp-3,
3790         0x1.55555cf172b91p-5,
3791         0x1.1111167a4d017p-7
3792     };
3793     static const double invln2N = 0x1.71547652b82fep0 * (1 << 7),
3794         negln2hiN = -0x1.62e42fefa0000p-8,
3795         negln2loN = -0x1.cf79abc9e3b3ap-47;
3796
3797     UINT32 abstop;
3798     UINT64 ki, idx, top, sbits;
3799     double kd, z, r, r2, scale, tail, tmp;
3800
3801     abstop = (*(UINT64*)&x >> 52) & 0x7ff;
3802     if (abstop - 0x3c9 >= 0x408 - 0x3c9) {
3803         if (abstop - 0x3c9 >= 0x80000000) {
3804             /* Avoid spurious underflow for tiny x. */
3805             /* Note: 0 is common input. */
3806             double one = 1.0 + x;
3807             return sign_bias ? -one : one;
3808         }
3809         if (abstop >= 0x409) {
3810             /* Note: inf and nan are already handled. */
3811             if (*(UINT64*)&x >> 63)
3812                 return math_error(_UNDERFLOW, "pow", argx, argy, (sign_bias ? -DBL_MIN : DBL_MIN) * DBL_MIN);
3813             return math_error(_OVERFLOW, "pow", argx, argy, (sign_bias ? -DBL_MAX : DBL_MAX) * DBL_MAX);
3814         }
3815         /* Large x is special cased below. */
3816         abstop = 0;
3817     }
3818
3819     /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
3820     /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
3821     z = invln2N * x;
3822     kd = __round(z);
3823     ki = kd;
3824     r = x + kd * negln2hiN + kd * negln2loN;
3825     /* The code assumes 2^-200 < |xtail| < 2^-8/N. */
3826     r += xtail;
3827     /* 2^(k/N) ~= scale * (1 + tail). */
3828     idx = 2 * (ki % (1 << 7));
3829     top = (ki + sign_bias) << (52 - 7);
3830     tail = *(double*)&exp_T[idx];
3831     /* This is only a valid scale when -1023*N < k < 1024*N. */
3832     sbits = exp_T[idx + 1] + top;
3833     /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
3834     /* Evaluation is optimized assuming superscalar pipelined execution. */
3835     r2 = r * r;
3836     /* Without fma the worst case error is 0.25/N ulp larger. */
3837     /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
3838     tmp = tail + r + r2 * (C[0] + r * C[1]) + r2 * r2 * (C[2] + r * C[3]);
3839     if (abstop == 0) {
3840         /* Handle cases that may overflow or underflow when computing the result that
3841            is scale*(1+TMP) without intermediate rounding. The bit representation of
3842            scale is in SBITS, however it has a computed exponent that may have
3843            overflown into the sign bit so that needs to be adjusted before using it as
3844            a double. (int32_t)KI is the k used in the argument reduction and exponent
3845            adjustment of scale, positive k here means the result may overflow and
3846            negative k means the result may underflow. */
3847         double scale, y;
3848
3849         if ((ki & 0x80000000) == 0) {
3850             /* k > 0, the exponent of scale might have overflowed by <= 460. */
3851             sbits -= 1009ull << 52;
3852             scale = *(double*)&sbits;
3853             y = 0x1p1009 * (scale + scale * tmp);
3854             if (isinf(y))
3855                 return math_error(_OVERFLOW, "pow", argx, argy, y);
3856             return y;
3857         }
3858         /* k < 0, need special care in the subnormal range. */
3859         sbits += 1022ull << 52;
3860         /* Note: sbits is signed scale. */
3861         scale = *(double*)&sbits;
3862         y = scale + scale * tmp;
3863         if (fabs(y) < 1.0) {
3864             /* Round y to the right precision before scaling it into the subnormal
3865                range to avoid double rounding that can cause 0.5+E/2 ulp error where
3866                E is the worst-case ulp error outside the subnormal range. So this
3867                is only useful if the goal is better than 1 ulp worst-case error. */
3868             double hi, lo, one = 1.0;
3869             if (y < 0.0)
3870                 one = -1.0;
3871             lo = scale - y + scale * tmp;
3872             hi = one + y;
3873             lo = one - hi + y + lo;
3874             y = hi + lo - one;
3875             /* Fix the sign of 0. */
3876             if (y == 0.0) {
3877                 sbits &= 0x8000000000000000ULL;
3878                 y = *(double*)&sbits;
3879             }
3880             /* The underflow exception needs to be signaled explicitly. */
3881             fp_barrier(fp_barrier(0x1p-1022) * 0x1p-1022);
3882             y = 0x1p-1022 * y;
3883             return math_error(_UNDERFLOW, "pow", argx, argy, y);
3884         }
3885         y = 0x1p-1022 * y;
3886         return y;
3887     }
3888     scale = *(double*)&sbits;
3889     /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
3890        is no spurious underflow here even without fma. */
3891     return scale + scale * tmp;
3892 }
3893
3894 /* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
3895    the bit representation of a non-zero finite floating-point value. */
3896 static inline int pow_checkint(UINT64 iy)
3897 {
3898     int e = iy >> 52 & 0x7ff;
3899     if (e < 0x3ff)
3900         return 0;
3901     if (e > 0x3ff + 52)
3902         return 2;
3903     if (iy & ((1ULL << (0x3ff + 52 - e)) - 1))
3904         return 0;
3905     if (iy & (1ULL << (0x3ff + 52 - e)))
3906         return 1;
3907     return 2;
3908 }
3909
3910 /*********************************************************************
3911  *              pow (MSVCRT.@)
3912  *
3913  * Copied from musl: src/math/pow.c
3914  */
3915 double CDECL pow( double x, double y )
3916 {
3917     UINT32 sign_bias = 0;
3918     UINT64 ix, iy;
3919     UINT32 topx, topy;
3920     double lo, hi, ehi, elo, yhi, ylo, lhi, llo;
3921
3922     ix = *(UINT64*)&x;
3923     iy = *(UINT64*)&y;
3924     topx = ix >> 52;
3925     topy = iy >> 52;
3926     if (topx - 0x001 >= 0x7ff - 0x001 ||
3927             (topy & 0x7ff) - 0x3be >= 0x43e - 0x3be) {
3928         /* Note: if |y| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then pow(x,y) = inf/0
3929            and if |y| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then pow(x,y) = +-1. */
3930         /* Special cases: (x < 0x1p-126 or inf or nan) or
3931            (|y| < 0x1p-65 or |y| >= 0x1p63 or nan). */
3932         if (2 * iy - 1 >= 2 * 0x7ff0000000000000ULL - 1) {
3933             if (2 * iy == 0)
3934                 return 1.0;
3935             if (ix == 0x3ff0000000000000ULL)
3936                 return 1.0;
3937             if (2 * ix > 2 * 0x7ff0000000000000ULL ||
3938                     2 * iy > 2 * 0x7ff0000000000000ULL)
3939                 return x + y;
3940             if (2 * ix == 2 * 0x3ff0000000000000ULL)
3941                 return 1.0;
3942             if ((2 * ix < 2 * 0x3ff0000000000000ULL) == !(iy >> 63))
3943                 return 0.0; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
3944             return y * y;
3945         }
3946         if (2 * ix - 1 >= 2 * 0x7ff0000000000000ULL - 1) {
3947             double x2 = x * x;
3948             if (ix >> 63 && pow_checkint(iy) == 1)
3949                 x2 = -x2;
3950             if (iy & 0x8000000000000000ULL && x2 == 0.0)
3951                 return math_error(_SING, "pow", x, y, 1 / x2);
3952             /* Without the barrier some versions of clang hoist the 1/x2 and
3953                thus division by zero exception can be signaled spuriously. */
3954             return iy >> 63 ? fp_barrier(1 / x2) : x2;
3955         }
3956         /* Here x and y are non-zero finite. */
3957         if (ix >> 63) {
3958             /* Finite x < 0. */
3959             int yint = pow_checkint(iy);
3960             if (yint == 0)
3961                 return math_error(_DOMAIN, "pow", x, y, 0 / (x - x));
3962             if (yint == 1)
3963                 sign_bias = 0x800 << 7;
3964             ix &= 0x7fffffffffffffff;
3965             topx &= 0x7ff;
3966         }
3967         if ((topy & 0x7ff) - 0x3be >= 0x43e - 0x3be) {
3968             /* Note: sign_bias == 0 here because y is not odd. */
3969             if (ix == 0x3ff0000000000000ULL)
3970                 return 1.0;
3971             if ((topy & 0x7ff) < 0x3be) {
3972                 /* |y| < 2^-65, x^y ~= 1 + y*log(x). */
3973                 return ix > 0x3ff0000000000000ULL ? 1.0 + y : 1.0 - y;
3974             }
3975             if ((ix > 0x3ff0000000000000ULL) == (topy < 0x800))
3976                 return math_error(_OVERFLOW, "pow", x, y, fp_barrier(DBL_MAX) * DBL_MAX);
3977             return math_error(_UNDERFLOW, "pow", x, y, fp_barrier(DBL_MIN) * DBL_MIN);
3978         }
3979         if (topx == 0) {
3980             /* Normalize subnormal x so exponent becomes negative. */
3981             x *= 0x1p52;
3982             ix = *(UINT64*)&x;
3983             ix &= 0x7fffffffffffffff;
3984             ix -= 52ULL << 52;
3985         }
3986     }
3987
3988     hi = pow_log(ix, &lo);
3989     iy &= -1ULL << 27;
3990     yhi = *(double*)&iy;
3991     ylo = y - yhi;
3992     *(UINT64*)&lhi = *(UINT64*)&hi & -1ULL << 27;
3993     llo = fp_barrier(hi - lhi + lo);
3994     ehi = yhi * lhi;
3995     elo = ylo * lhi + y * llo; /* |elo| < |ehi| * 2^-25. */
3996     return pow_exp(x, y, ehi, elo, sign_bias);
3997 }
3998
3999 /*********************************************************************
4000  *              sin (MSVCRT.@)
4001  *
4002  * Copied from musl: src/math/sin.c
4003  */
4004 double CDECL sin( double x )
4005 {
4006     double y[2];
4007     UINT32 ix;
4008     unsigned n;
4009
4010     ix = *(ULONGLONG*)&x >> 32;
4011     ix &= 0x7fffffff;
4012
4013     /* |x| ~< pi/4 */
4014     if (ix <= 0x3fe921fb) {
4015         if (ix < 0x3e500000) { /* |x| < 2**-26 */
4016             /* raise inexact if x != 0 and underflow if subnormal*/
4017             fp_barrier(ix < 0x00100000 ? x/0x1p120f : x+0x1p120f);
4018             return x;
4019         }
4020         return __sin(x, 0.0, 0);
4021     }
4022
4023     /* sin(Inf or NaN) is NaN */
4024     if (isinf(x))
4025         return math_error(_DOMAIN, "sin", x, 0, x - x);
4026     if (ix >= 0x7ff00000)
4027         return x - x;
4028
4029     /* argument reduction needed */
4030     n = __rem_pio2(x, y);
4031     switch (n&3) {
4032     case 0: return  __sin(y[0], y[1], 1);
4033     case 1: return  __cos(y[0], y[1]);
4034     case 2: return -__sin(y[0], y[1], 1);
4035     default: return -__cos(y[0], y[1]);
4036     }
4037 }
4038
4039 /*********************************************************************
4040  *              sinh (MSVCRT.@)
4041  */
4042 double CDECL sinh( double x )
4043 {
4044     UINT64 ux = *(UINT64*)&x;
4045     UINT32 w;
4046     double t, h, absx;
4047
4048     h = 0.5;
4049     if (ux >> 63)
4050         h = -h;
4051     /* |x| */
4052     ux &= (UINT64)-1 / 2;
4053     absx = *(double*)&ux;
4054     w = ux >> 32;
4055
4056     /* |x| < log(DBL_MAX) */
4057     if (w < 0x40862e42) {
4058         t = __expm1(absx);
4059         if (w < 0x3ff00000) {
4060             if (w < 0x3ff00000 - (26 << 20))
4061                 return x;
4062             return h * (2 * t - t * t / (t + 1));
4063         }
4064         return h * (t + t / (t + 1));
4065     }
4066
4067     /* |x| > log(DBL_MAX) or nan */
4068     /* note: the result is stored to handle overflow */
4069     t = __expo2(absx, 2 * h);
4070     return t;
4071 }
4072
4073 static BOOL sqrt_validate( double *x, BOOL update_sw )
4074 {
4075     short c = _dclass(*x);
4076
4077     if (c == FP_ZERO) return FALSE;
4078     if (c == FP_NAN)
4079     {
4080 #ifdef __i386__
4081         if (update_sw)
4082             *x = math_error(_DOMAIN, "sqrt", *x, 0, *x);
4083 #else
4084         /* set signaling bit */
4085         *(ULONGLONG*)x |= 0x8000000000000ULL;
4086 #endif
4087         return FALSE;
4088     }
4089     if (signbit(*x))
4090     {
4091         *x = math_error(_DOMAIN, "sqrt", *x, 0, ret_nan(update_sw));
4092         return FALSE;
4093     }
4094     if (c == FP_INFINITE) return FALSE;
4095     return TRUE;
4096 }
4097
4098 #if defined(__x86_64__) || defined(__i386__)
4099 double CDECL sse2_sqrt(double);
4100 __ASM_GLOBAL_FUNC( sse2_sqrt,
4101         "sqrtsd %xmm0, %xmm0\n\t"
4102         "ret" )
4103 #endif
4104
4105 #ifdef __i386__
4106 double CDECL x87_sqrt(double);
4107 __ASM_GLOBAL_FUNC( x87_sqrt,
4108         "fldl 4(%esp)\n\t"
4109         SET_X87_CW(0xc00)
4110         "fsqrt\n\t"
4111         RESET_X87_CW
4112         "ret" )
4113 #endif
4114
4115 /*********************************************************************
4116  *              sqrt (MSVCRT.@)
4117  *
4118  * Copied from musl: src/math/sqrt.c
4119  */
4120 double CDECL sqrt( double x )
4121 {
4122 #ifdef __x86_64__
4123     if (!sqrt_validate(&x, TRUE))
4124         return x;
4125
4126     return sse2_sqrt(x);
4127 #elif defined( __i386__ )
4128     if (!sqrt_validate(&x, TRUE))
4129         return x;
4130
4131     return x87_sqrt(x);
4132 #else
4133     static const double tiny = 1.0e-300;
4134
4135     double z;
4136     int sign = 0x80000000;
4137     int ix0,s0,q,m,t,i;
4138     unsigned int r,t1,s1,ix1,q1;
4139     ULONGLONG ix;
4140
4141     if (!sqrt_validate(&x, TRUE))
4142         return x;
4143
4144     ix = *(ULONGLONG*)&x;
4145     ix0 = ix >> 32;
4146     ix1 = ix;
4147
4148     /* normalize x */
4149     m = ix0 >> 20;
4150     if (m == 0) {  /* subnormal x */
4151         while (ix0 == 0) {
4152             m -= 21;
4153             ix0 |= (ix1 >> 11);
4154             ix1 <<= 21;
4155         }
4156         for (i=0; (ix0 & 0x00100000) == 0; i++)
4157             ix0 <<= 1;
4158         m -= i - 1;
4159         ix0 |= ix1 >> (32 - i);
4160         ix1 <<= i;
4161     }
4162     m -= 1023;    /* unbias exponent */
4163     ix0 = (ix0 & 0x000fffff) | 0x00100000;
4164     if (m & 1) {  /* odd m, double x to make it even */
4165         ix0 += ix0 + ((ix1 & sign) >> 31);
4166         ix1 += ix1;
4167     }
4168     m >>= 1;      /* m = [m/2] */
4169
4170     /* generate sqrt(x) bit by bit */
4171     ix0 += ix0 + ((ix1 & sign) >> 31);
4172     ix1 += ix1;
4173     q = q1 = s0 = s1 = 0;  /* [q,q1] = sqrt(x) */
4174     r = 0x00200000;        /* r = moving bit from right to left */
4175
4176     while (r != 0) {
4177         t = s0 + r;
4178         if (t <= ix0) {
4179             s0   = t + r;
4180             ix0 -= t;
4181             q   += r;
4182         }
4183         ix0 += ix0 + ((ix1 & sign) >> 31);
4184         ix1 += ix1;
4185         r >>= 1;
4186     }
4187
4188     r = sign;
4189     while (r != 0) {
4190         t1 = s1 + r;
4191         t  = s0;
4192         if (t < ix0 || (t == ix0 && t1 <= ix1)) {
4193             s1 = t1 + r;
4194             if ((t1&sign) == sign && (s1 & sign) == 0)
4195                 s0++;
4196             ix0 -= t;
4197             if (ix1 < t1)
4198                 ix0--;
4199             ix1 -= t1;
4200             q1 += r;
4201         }
4202         ix0 += ix0 + ((ix1 & sign) >> 31);
4203         ix1 += ix1;
4204         r >>= 1;
4205     }
4206
4207     /* use floating add to find out rounding direction */
4208     if ((ix0 | ix1) != 0) {
4209         z = 1.0 - tiny; /* raise inexact flag */
4210         if (z >= 1.0) {
4211             z = 1.0 + tiny;
4212             if (q1 == (unsigned int)0xffffffff) {
4213                 q1 = 0;
4214                 q++;
4215             } else if (z > 1.0) {
4216                 if (q1 == (unsigned int)0xfffffffe)
4217                     q++;
4218                 q1 += 2;
4219             } else
4220                 q1 += q1 & 1;
4221         }
4222     }
4223     ix0 = (q >> 1) + 0x3fe00000;
4224     ix1 = q1 >> 1;
4225     if (q & 1)
4226         ix1 |= sign;
4227     ix = ix0 + ((unsigned int)m << 20);
4228     ix <<= 32;
4229     ix |= ix1;
4230     return *(double*)&ix;
4231 #endif
4232 }
4233
4234 /* Copied from musl: src/math/__tan.c */
4235 static double __tan(double x, double y, int odd)
4236 {
4237     static const double T[] = {
4238         3.33333333333334091986e-01,
4239         1.33333333333201242699e-01,
4240         5.39682539762260521377e-02,
4241         2.18694882948595424599e-02,
4242         8.86323982359930005737e-03,
4243         3.59207910759131235356e-03,
4244         1.45620945432529025516e-03,
4245         5.88041240820264096874e-04,
4246         2.46463134818469906812e-04,
4247         7.81794442939557092300e-05,
4248         7.14072491382608190305e-05,
4249         -1.85586374855275456654e-05,
4250         2.59073051863633712884e-05,
4251     };
4252     static const double pio4 = 7.85398163397448278999e-01;
4253     static const double pio4lo = 3.06161699786838301793e-17;
4254
4255     double z, r, v, w, s, a, w0, a0;
4256     UINT32 hx;
4257     int big, sign;
4258
4259     hx = *(ULONGLONG*)&x >> 32;
4260     big = (hx & 0x7fffffff) >= 0x3FE59428; /* |x| >= 0.6744 */
4261     if (big) {
4262         sign = hx >> 31;
4263         if (sign) {
4264             x = -x;
4265             y = -y;
4266         }
4267         x = (pio4 - x) + (pio4lo - y);
4268         y = 0.0;
4269     }
4270     z = x * x;
4271     w = z * z;
4272     r = T[1] + w * (T[3] + w * (T[5] + w * (T[7] + w * (T[9] + w * T[11]))));
4273     v = z * (T[2] + w * (T[4] + w * (T[6] + w * (T[8] + w * (T[10] + w * T[12])))));
4274     s = z * x;
4275     r = y + z * (s * (r + v) + y) + s * T[0];
4276     w = x + r;
4277     if (big) {
4278         s = 1 - 2 * odd;
4279         v = s - 2.0 * (x + (r - w * w / (w + s)));
4280         return sign ? -v : v;
4281     }
4282     if (!odd)
4283         return w;
4284     /* -1.0/(x+r) has up to 2ulp error, so compute it accurately */
4285     w0 = w;
4286     *(LONGLONG*)&w0 = *(LONGLONG*)&w0 & 0xffffffff00000000ULL;
4287     v = r - (w0 - x);       /* w0+v = r+x */
4288     a0 = a = -1.0 / w;
4289     *(LONGLONG*)&a0 = *(LONGLONG*)&a0 & 0xffffffff00000000ULL;
4290     return a0 + a * (1.0 + a0 * w0 + a0 * v);
4291 }
4292
4293 /*********************************************************************
4294  *              tan (MSVCRT.@)
4295  *
4296  * Copied from musl: src/math/tan.c
4297  */
4298 double CDECL tan( double x )
4299 {
4300     double y[2];
4301     UINT32 ix;
4302     unsigned n;
4303
4304     ix = *(ULONGLONG*)&x >> 32;
4305     ix &= 0x7fffffff;
4306
4307     if (ix <= 0x3fe921fb) { /* |x| ~< pi/4 */
4308         if (ix < 0x3e400000) { /* |x| < 2**-27 */
4309             /* raise inexact if x!=0 and underflow if subnormal */
4310             fp_barrier(ix < 0x00100000 ? x / 0x1p120f : x + 0x1p120f);
4311             return x;
4312         }
4313         return __tan(x, 0.0, 0);
4314     }
4315
4316     if (isinf(x))
4317         return math_error(_DOMAIN, "tan", x, 0, x - x);
4318     if (ix >= 0x7ff00000)
4319         return x - x;
4320
4321     n = __rem_pio2(x, y);
4322     return __tan(y[0], y[1], n & 1);
4323 }
4324
4325 /*********************************************************************
4326  *              tanh (MSVCRT.@)
4327  */
4328 double CDECL tanh( double x )
4329 {
4330     UINT64 ui = *(UINT64*)&x;
4331     UINT32 w;
4332     int sign;
4333     double t;
4334
4335     /* x = |x| */
4336     sign = ui >> 63;
4337     ui &= (UINT64)-1 / 2;
4338     x = *(double*)&ui;
4339     w = ui >> 32;
4340
4341     if (w > 0x3fe193ea) {
4342         /* |x| > log(3)/2 ~= 0.5493 or nan */
4343         if (w > 0x40340000) {
4344 #if _MSVCR_VER < 140
4345             if (isnan(x))
4346                 return math_error(_DOMAIN, "tanh", x, 0, x);
4347 #endif
4348             /* |x| > 20 or nan */
4349             /* note: this branch avoids raising overflow */
4350             fp_barrier(x + 0x1p120f);
4351             t = 1 - 0 / x;
4352         } else {
4353             t = __expm1(2 * x);
4354             t = 1 - 2 / (t + 2);
4355         }
4356     } else if (w > 0x3fd058ae) {
4357         /* |x| > log(5/3)/2 ~= 0.2554 */
4358         t = __expm1(2 * x);
4359         t = t / (t + 2);
4360     } else if (w >= 0x00100000) {
4361         /* |x| >= 0x1p-1022, up to 2ulp error in [0.1,0.2554] */
4362         t = __expm1(-2 * x);
4363         t = -t / (t + 2);
4364     } else {
4365         /* |x| is subnormal */
4366         /* note: the branch above would not raise underflow in [0x1p-1023,0x1p-1022) */
4367         fp_barrier((float)x);
4368         t = x;
4369     }
4370     return sign ? -t : t;
4371 }
4372
4373
4374 #if (defined(__GNUC__) || defined(__clang__)) && defined(__i386__)
4375
4376 #define CREATE_FPU_FUNC1(name, call) \
4377     __ASM_GLOBAL_FUNC(name, \
4378             "pushl   %ebp\n\t" \
4379             __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t") \
4380             __ASM_CFI(".cfi_rel_offset %ebp,0\n\t") \
4381             "movl    %esp, %ebp\n\t" \
4382             __ASM_CFI(".cfi_def_cfa_register %ebp\n\t") \
4383             "subl    $68, %esp\n\t" /* sizeof(double)*8 + sizeof(int) */ \
4384             "fstpl   (%esp)\n\t"    /* store function argument */ \
4385             "fwait\n\t" \
4386             "movl    $1, %ecx\n\t"  /* empty FPU stack */ \
4387             "1:\n\t" \
4388             "fxam\n\t" \
4389             "fstsw   %ax\n\t" \
4390             "and     $0x4500, %ax\n\t" \
4391             "cmp     $0x4100, %ax\n\t" \
4392             "je      2f\n\t" \
4393             "fstpl    (%esp,%ecx,8)\n\t" \
4394             "fwait\n\t" \
4395             "incl    %ecx\n\t" \
4396             "jmp     1b\n\t" \
4397             "2:\n\t" \
4398             "movl    %ecx, -4(%ebp)\n\t" \
4399             "call    " __ASM_NAME( #call ) "\n\t" \
4400             "movl    -4(%ebp), %ecx\n\t" \
4401             "fstpl   (%esp)\n\t"    /* save result */ \
4402             "3:\n\t"                /* restore FPU stack */ \
4403             "decl    %ecx\n\t" \
4404             "fldl    (%esp,%ecx,8)\n\t" \
4405             "cmpl    $0, %ecx\n\t" \
4406             "jne     3b\n\t" \
4407             "leave\n\t" \
4408             __ASM_CFI(".cfi_def_cfa %esp,4\n\t") \
4409             __ASM_CFI(".cfi_same_value %ebp\n\t") \
4410             "ret")
4411
4412 #define CREATE_FPU_FUNC2(name, call) \
4413     __ASM_GLOBAL_FUNC(name, \
4414             "pushl   %ebp\n\t" \
4415             __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t") \
4416             __ASM_CFI(".cfi_rel_offset %ebp,0\n\t") \
4417             "movl    %esp, %ebp\n\t" \
4418             __ASM_CFI(".cfi_def_cfa_register %ebp\n\t") \
4419             "subl    $68, %esp\n\t" /* sizeof(double)*8 + sizeof(int) */ \
4420             "fstpl   8(%esp)\n\t"   /* store function argument */ \
4421             "fwait\n\t" \
4422             "fstpl   (%esp)\n\t" \
4423             "fwait\n\t" \
4424             "movl    $2, %ecx\n\t"  /* empty FPU stack */ \
4425             "1:\n\t" \
4426             "fxam\n\t" \
4427             "fstsw   %ax\n\t" \
4428             "and     $0x4500, %ax\n\t" \
4429             "cmp     $0x4100, %ax\n\t" \
4430             "je      2f\n\t" \
4431             "fstpl    (%esp,%ecx,8)\n\t" \
4432             "fwait\n\t" \
4433             "incl    %ecx\n\t" \
4434             "jmp     1b\n\t" \
4435             "2:\n\t" \
4436             "movl    %ecx, -4(%ebp)\n\t" \
4437             "call    " __ASM_NAME( #call ) "\n\t" \
4438             "movl    -4(%ebp), %ecx\n\t" \
4439             "fstpl   8(%esp)\n\t"   /* save result */ \
4440             "3:\n\t"                /* restore FPU stack */ \
4441             "decl    %ecx\n\t" \
4442             "fldl    (%esp,%ecx,8)\n\t" \
4443             "cmpl    $1, %ecx\n\t" \
4444             "jne     3b\n\t" \
4445             "leave\n\t" \
4446             __ASM_CFI(".cfi_def_cfa %esp,4\n\t") \
4447             __ASM_CFI(".cfi_same_value %ebp\n\t") \
4448             "ret")
4449
4450 CREATE_FPU_FUNC1(_CIacos, acos)
4451 CREATE_FPU_FUNC1(_CIasin, asin)
4452 CREATE_FPU_FUNC1(_CIatan, atan)
4453 CREATE_FPU_FUNC2(_CIatan2, atan2)
4454 CREATE_FPU_FUNC1(_CIcos, cos)
4455 CREATE_FPU_FUNC1(_CIcosh, cosh)
4456 CREATE_FPU_FUNC1(_CIexp, exp)
4457 CREATE_FPU_FUNC2(_CIfmod, fmod)
4458 CREATE_FPU_FUNC1(_CIlog, log)
4459 CREATE_FPU_FUNC1(_CIlog10, log10)
4460 CREATE_FPU_FUNC2(_CIpow, pow)
4461 CREATE_FPU_FUNC1(_CIsin, sin)
4462 CREATE_FPU_FUNC1(_CIsinh, sinh)
4463 CREATE_FPU_FUNC1(_CIsqrt, sqrt)
4464 CREATE_FPU_FUNC1(_CItan, tan)
4465 CREATE_FPU_FUNC1(_CItanh, tanh)
4466
4467 __ASM_GLOBAL_FUNC(_ftol,
4468         "pushl   %ebp\n\t"
4469         __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t")
4470         __ASM_CFI(".cfi_rel_offset %ebp,0\n\t")
4471         "movl    %esp, %ebp\n\t"
4472         __ASM_CFI(".cfi_def_cfa_register %ebp\n\t")
4473         "subl    $12, %esp\n\t"     /* sizeof(LONGLONG) + 2*sizeof(WORD) */
4474         "fnstcw  (%esp)\n\t"
4475         "mov     (%esp), %ax\n\t"
4476         "or      $0xc00, %ax\n\t"
4477         "mov     %ax, 2(%esp)\n\t"
4478         "fldcw   2(%esp)\n\t"
4479         "fistpq  4(%esp)\n\t"
4480         "fldcw   (%esp)\n\t"
4481         "movl    4(%esp), %eax\n\t"
4482         "movl    8(%esp), %edx\n\t"
4483         "leave\n\t"
4484         __ASM_CFI(".cfi_def_cfa %esp,4\n\t")
4485         __ASM_CFI(".cfi_same_value %ebp\n\t")
4486         "ret")
4487
4488 #endif /* (defined(__GNUC__) || defined(__clang__)) && defined(__i386__) */
4489
4490 /*********************************************************************
4491  *              _fpclass (MSVCRT.@)
4492  */
4493 int CDECL _fpclass(double num)
4494 {
4495     union { double f; UINT64 i; } u = { num };
4496     int e = u.i >> 52 & 0x7ff;
4497     int s = u.i >> 63;
4498
4499     switch (e)
4500     {
4501     case 0:
4502         if (u.i << 1) return s ? _FPCLASS_ND : _FPCLASS_PD;
4503         return s ? _FPCLASS_NZ : _FPCLASS_PZ;
4504     case 0x7ff:
4505         if (u.i << 12) return ((u.i >> 51) & 1) ? _FPCLASS_QNAN : _FPCLASS_SNAN;
4506         return s ? _FPCLASS_NINF : _FPCLASS_PINF;
4507     default:
4508         return s ? _FPCLASS_NN : _FPCLASS_PN;
4509     }
4510 }
4511
4512 /*********************************************************************
4513  *              _rotl (MSVCRT.@)
4514  */
4515 unsigned int CDECL MSVCRT__rotl(unsigned int num, int shift)
4516 {
4517   shift &= 31;
4518   return (num << shift) | (num >> (32-shift));
4519 }
4520
4521 /*********************************************************************
4522  *              _lrotl (MSVCRT.@)
4523  */
4524 __msvcrt_ulong CDECL MSVCRT__lrotl(__msvcrt_ulong num, int shift)
4525 {
4526   shift &= 0x1f;
4527   return (num << shift) | (num >> (32-shift));
4528 }
4529
4530 /*********************************************************************
4531  *              _lrotr (MSVCRT.@)
4532  */
4533 __msvcrt_ulong CDECL MSVCRT__lrotr(__msvcrt_ulong num, int shift)
4534 {
4535   shift &= 0x1f;
4536   return (num >> shift) | (num << (32-shift));
4537 }
4538
4539 /*********************************************************************
4540  *              _rotr (MSVCRT.@)
4541  */
4542 unsigned int CDECL MSVCRT__rotr(unsigned int num, int shift)
4543 {
4544     shift &= 0x1f;
4545     return (num >> shift) | (num << (32-shift));
4546 }
4547
4548 /*********************************************************************
4549  *              _rotl64 (MSVCRT.@)
4550  */
4551 unsigned __int64 CDECL MSVCRT__rotl64(unsigned __int64 num, int shift)
4552 {
4553   shift &= 63;
4554   return (num << shift) | (num >> (64-shift));
4555 }
4556
4557 /*********************************************************************
4558  *              _rotr64 (MSVCRT.@)
4559  */
4560 unsigned __int64 CDECL MSVCRT__rotr64(unsigned __int64 num, int shift)
4561 {
4562     shift &= 63;
4563     return (num >> shift) | (num << (64-shift));
4564 }
4565
4566 /*********************************************************************
4567  *              abs (MSVCRT.@)
4568  */
4569 int CDECL abs( int n )
4570 {
4571     return n >= 0 ? n : -n;
4572 }
4573
4574 /*********************************************************************
4575  *              labs (MSVCRT.@)
4576  */
4577 __msvcrt_long CDECL labs( __msvcrt_long n )
4578 {
4579     return n >= 0 ? n : -n;
4580 }
4581
4582 #if _MSVCR_VER>=100
4583 /*********************************************************************
4584  *              llabs (MSVCR100.@)
4585  */
4586 __int64 CDECL llabs( __int64 n )
4587 {
4588     return n >= 0 ? n : -n;
4589 }
4590 #endif
4591
4592 #if _MSVCR_VER>=120
4593 /*********************************************************************
4594  *              imaxabs (MSVCR120.@)
4595  */
4596 intmax_t CDECL imaxabs( intmax_t n )
4597 {
4598     return n >= 0 ? n : -n;
4599 }
4600 #endif
4601
4602 /*********************************************************************
4603  *              _abs64 (MSVCRT.@)
4604  */
4605 __int64 CDECL _abs64( __int64 n )
4606 {
4607     return n >= 0 ? n : -n;
4608 }
4609
4610 /* Copied from musl: src/math/ilogb.c */
4611 static int __ilogb(double x)
4612 {
4613     union { double f; UINT64 i; } u = { x };
4614     int e = u.i >> 52 & 0x7ff;
4615
4616     if (!e)
4617     {
4618         u.i <<= 12;
4619         if (u.i == 0) return FP_ILOGB0;
4620         /* subnormal x */
4621         for (e = -0x3ff; u.i >> 63 == 0; e--, u.i <<= 1);
4622         return e;
4623     }
4624     if (e == 0x7ff) return u.i << 12 ? FP_ILOGBNAN : INT_MAX;
4625     return e - 0x3ff;
4626 }
4627
4628 /*********************************************************************
4629  *              _logb (MSVCRT.@)
4630  *
4631  * Copied from musl: src/math/logb.c
4632  */
4633 double CDECL _logb(double x)
4634 {
4635     if (!isfinite(x))
4636         return x * x;
4637     if (x == 0)
4638         return math_error(_SING, "_logb", x, 0, -1 / (x * x));
4639     return __ilogb(x);
4640 }
4641
4642 static void sq(double *hi, double *lo, double x)
4643 {
4644     double xh, xl, xc;
4645
4646     xc = x * (0x1p27 + 1);
4647     xh = x - xc + xc;
4648     xl = x - xh;
4649     *hi = x * x;
4650     *lo = xh * xh - *hi + 2 * xh * xl + xl * xl;
4651 }
4652
4653 /*********************************************************************
4654  *              _hypot (MSVCRT.@)
4655  *
4656  * Copied from musl: src/math/hypot.c
4657  */
4658 double CDECL _hypot(double x, double y)
4659 {
4660     UINT64 ux = *(UINT64*)&x, uy = *(UINT64*)&y, ut;
4661     double hx, lx, hy, ly, z;
4662     int ex, ey;
4663
4664     /* arrange |x| >= |y| */
4665     ux &= -1ULL >> 1;
4666     uy &= -1ULL >> 1;
4667     if (ux < uy) {
4668         ut = ux;
4669         ux = uy;
4670         uy = ut;
4671     }
4672
4673     /* special cases */
4674     ex = ux >> 52;
4675     ey = uy >> 52;
4676     x = *(double*)&ux;
4677     y = *(double*)&uy;
4678     /* note: hypot(inf,nan) == inf */
4679     if (ey == 0x7ff)
4680         return y;
4681     if (ex == 0x7ff || uy == 0)
4682         return x;
4683     /* note: hypot(x,y) ~= x + y*y/x/2 with inexact for small y/x */
4684     /* 64 difference is enough for ld80 double_t */
4685     if (ex - ey > 64)
4686         return x + y;
4687
4688     /* precise sqrt argument in nearest rounding mode without overflow */
4689     /* xh*xh must not overflow and xl*xl must not underflow in sq */
4690     z = 1;
4691     if (ex > 0x3ff + 510) {
4692         z = 0x1p700;
4693         x *= 0x1p-700;
4694         y *= 0x1p-700;
4695     } else if (ey < 0x3ff - 450) {
4696         z = 0x1p-700;
4697         x *= 0x1p700;
4698         y *= 0x1p700;
4699     }
4700     sq(&hx, &lx, x);
4701     sq(&hy, &ly, y);
4702     return z * sqrt(ly + lx + hy + hx);
4703 }
4704
4705 /*********************************************************************
4706  *      _hypotf (MSVCRT.@)
4707  *
4708  * Copied from musl: src/math/hypotf.c
4709  */
4710 float CDECL _hypotf(float x, float y)
4711 {
4712     UINT32 ux = *(UINT32*)&x, uy = *(UINT32*)&y, ut;
4713     float z;
4714
4715     ux &= -1U >> 1;
4716     uy &= -1U >> 1;
4717     if (ux < uy) {
4718         ut = ux;
4719         ux = uy;
4720         uy = ut;
4721     }
4722
4723     x = *(float*)&ux;
4724     y = *(float*)&uy;
4725     if (uy == 0xff << 23)
4726         return y;
4727     if (ux >= 0xff << 23 || uy == 0 || ux - uy >= 25 << 23)
4728         return x + y;
4729
4730     z = 1;
4731     if (ux >= (0x7f + 60) << 23) {
4732         z = 0x1p90f;
4733         x *= 0x1p-90f;
4734         y *= 0x1p-90f;
4735     } else if (uy < (0x7f - 60) << 23) {
4736         z = 0x1p-90f;
4737         x *= 0x1p90f;
4738         y *= 0x1p90f;
4739     }
4740     return z * sqrtf((double)x * x + (double)y * y);
4741 }
4742
4743 /*********************************************************************
4744  *              ceil (MSVCRT.@)
4745  *
4746  * Based on musl: src/math/ceilf.c
4747  */
4748 double CDECL ceil( double x )
4749 {
4750     union {double f; UINT64 i;} u = {x};
4751     int e = (u.i >> 52 & 0x7ff) - 0x3ff;
4752     UINT64 m;
4753
4754     if (e >= 52)
4755         return x;
4756     if (e >= 0) {
4757         m = 0x000fffffffffffffULL >> e;
4758         if ((u.i & m) == 0)
4759             return x;
4760         if (u.i >> 63 == 0)
4761             u.i += m;
4762         u.i &= ~m;
4763     } else {
4764         if (u.i >> 63)
4765             return -0.0;
4766         else if (u.i << 1)
4767             return 1.0;
4768     }
4769     return u.f;
4770 }
4771
4772 /*********************************************************************
4773  *              floor (MSVCRT.@)
4774  *
4775  * Based on musl: src/math/floorf.c
4776  */
4777 double CDECL floor( double x )
4778 {
4779     union {double f; UINT64 i;} u = {x};
4780     int e = (int)(u.i >> 52 & 0x7ff) - 0x3ff;
4781     UINT64 m;
4782
4783     if (e >= 52)
4784         return x;
4785     if (e >= 0) {
4786         m = 0x000fffffffffffffULL >> e;
4787         if ((u.i & m) == 0)
4788             return x;
4789         if (u.i >> 63)
4790             u.i += m;
4791         u.i &= ~m;
4792     } else {
4793         if (u.i >> 63 == 0)
4794             return 0;
4795         else if (u.i << 1)
4796             return -1;
4797     }
4798     return u.f;
4799 }
4800
4801 /*********************************************************************
4802  *      fma (MSVCRT.@)
4803  *
4804  * Copied from musl: src/math/fma.c
4805  */
4806 struct fma_num
4807 {
4808     UINT64 m;
4809     int e;
4810     int sign;
4811 };
4812
4813 static struct fma_num normalize(double x)
4814 {
4815     UINT64 ix = *(UINT64*)&x;
4816     int e = ix >> 52;
4817     int sign = e & 0x800;
4818     struct fma_num ret;
4819
4820     e &= 0x7ff;
4821     if (!e) {
4822         x *= 0x1p63;
4823         ix = *(UINT64*)&x;
4824         e = ix >> 52 & 0x7ff;
4825         e = e ? e - 63 : 0x800;
4826     }
4827     ix &= (1ull << 52) - 1;
4828     ix |= 1ull << 52;
4829     ix <<= 1;
4830     e -= 0x3ff + 52 + 1;
4831
4832     ret.m = ix;
4833     ret.e = e;
4834     ret.sign = sign;
4835     return ret;
4836 }
4837
4838 static void mul(UINT64 *hi, UINT64 *lo, UINT64 x, UINT64 y)
4839 {
4840     UINT64 t1, t2, t3;
4841     UINT64 xlo = (UINT32)x, xhi = x >> 32;
4842     UINT64 ylo = (UINT32)y, yhi = y >> 32;
4843
4844     t1 = xlo * ylo;
4845     t2 = xlo * yhi + xhi * ylo;
4846     t3 = xhi * yhi;
4847     *lo = t1 + (t2 << 32);
4848     *hi = t3 + (t2 >> 32) + (t1 > *lo);
4849 }
4850
4851 double CDECL fma( double x, double y, double z )
4852 {
4853     int e, d, sign, samesign, nonzero;
4854     UINT64 rhi, rlo, zhi, zlo;
4855     struct fma_num nx, ny, nz;
4856     double r;
4857     INT64 i;
4858
4859     /* normalize so top 10bits and last bit are 0 */
4860     nx = normalize(x);
4861     ny = normalize(y);
4862     nz = normalize(z);
4863
4864     if (nx.e >= 0x7ff - 0x3ff - 52 - 1 || ny.e >= 0x7ff - 0x3ff - 52 - 1) {
4865         r = x * y + z;
4866         if (!isnan(x) && !isnan(y) && !isnan(z) && isnan(r)) *_errno() = EDOM;
4867         return r;
4868     }
4869     if (nz.e >= 0x7ff - 0x3ff - 52 - 1) {
4870         if (nz.e > 0x7ff - 0x3ff - 52 - 1) {/* z==0 */
4871             r = x * y + z;
4872             if (!isnan(x) && !isnan(y) && isnan(r)) *_errno() = EDOM;
4873             return r;
4874         }
4875         return z;
4876     }
4877
4878     /* mul: r = x*y */
4879     mul(&rhi, &rlo, nx.m, ny.m);
4880     /* either top 20 or 21 bits of rhi and last 2 bits of rlo are 0 */
4881
4882     /* align exponents */
4883     e = nx.e + ny.e;
4884     d = nz.e - e;
4885     /* shift bits z<<=kz, r>>=kr, so kz+kr == d, set e = e+kr (== ez-kz) */
4886     if (d > 0) {
4887         if (d < 64) {
4888             zlo = nz.m << d;
4889             zhi = nz.m >> (64 - d);
4890         } else {
4891             zlo = 0;
4892             zhi = nz.m;
4893             e = nz.e - 64;
4894             d -= 64;
4895             if (d < 64 && d) {
4896                 rlo = rhi << (64 - d) | rlo >> d | !!(rlo << (64 - d));
4897                 rhi = rhi >> d;
4898             } else if (d) {
4899                 rlo = 1;
4900                 rhi = 0;
4901             }
4902         }
4903     } else {
4904         zhi = 0;
4905         d = -d;
4906         if (d == 0) {
4907             zlo = nz.m;
4908         } else if (d < 64) {
4909             zlo = nz.m >> d | !!(nz.m << (64 - d));
4910         } else {
4911             zlo = 1;
4912         }
4913     }
4914
4915     /* add */
4916     sign = nx.sign ^ ny.sign;
4917     samesign = !(sign ^ nz.sign);
4918     nonzero = 1;
4919     if (samesign) {
4920         /* r += z */
4921         rlo += zlo;
4922         rhi += zhi + (rlo < zlo);
4923     } else {
4924         /* r -= z */
4925         UINT64 t = rlo;
4926         rlo -= zlo;
4927         rhi = rhi - zhi - (t < rlo);
4928         if (rhi >> 63) {
4929             rlo = -rlo;
4930             rhi = -rhi - !!rlo;
4931             sign = !sign;
4932         }
4933         nonzero = !!rhi;
4934     }
4935
4936     /* set rhi to top 63bit of the result (last bit is sticky) */
4937     if (nonzero) {
4938         e += 64;
4939         if (rhi >> 32) {
4940             BitScanReverse((DWORD*)&d, rhi >> 32);
4941             d = 31 - d - 1;
4942         } else {
4943             BitScanReverse((DWORD*)&d, rhi);
4944             d = 63 - d - 1;
4945         }
4946         /* note: d > 0 */
4947         rhi = rhi << d | rlo >> (64 - d) | !!(rlo << d);
4948     } else if (rlo) {
4949         if (rlo >> 32) {
4950             BitScanReverse((DWORD*)&d, rlo >> 32);
4951             d = 31 - d - 1;
4952         } else {
4953             BitScanReverse((DWORD*)&d, rlo);
4954             d = 63 - d - 1;
4955         }
4956         if (d < 0)
4957             rhi = rlo >> 1 | (rlo & 1);
4958         else
4959             rhi = rlo << d;
4960     } else {
4961         /* exact +-0 */
4962         return x * y + z;
4963     }
4964     e -= d;
4965
4966     /* convert to double */
4967     i = rhi; /* i is in [1<<62,(1<<63)-1] */
4968     if (sign)
4969         i = -i;
4970     r = i; /* |r| is in [0x1p62,0x1p63] */
4971
4972     if (e < -1022 - 62) {
4973         /* result is subnormal before rounding */
4974         if (e == -1022 - 63) {
4975             double c = 0x1p63;
4976             if (sign)
4977                 c = -c;
4978             if (r == c) {
4979                 /* min normal after rounding, underflow depends
4980                    on arch behaviour which can be imitated by
4981                    a double to float conversion */
4982                 float fltmin = 0x0.ffffff8p-63 * FLT_MIN * r;
4983                 return DBL_MIN / FLT_MIN * fltmin;
4984             }
4985             /* one bit is lost when scaled, add another top bit to
4986                only round once at conversion if it is inexact */
4987             if (rhi << 53) {
4988                 double tiny;
4989
4990                 i = rhi >> 1 | (rhi & 1) | 1ull << 62;
4991                 if (sign)
4992                     i = -i;
4993                 r = i;
4994                 r = 2 * r - c; /* remove top bit */
4995
4996                 /* raise underflow portably, such that it
4997                    cannot be optimized away */
4998                 tiny = DBL_MIN / FLT_MIN * r;
4999                 r += (double)(tiny * tiny) * (r - r);
5000             }
5001         } else {
5002             /* only round once when scaled */
5003             d = 10;
5004             i = (rhi >> d | !!(rhi << (64 - d))) << d;
5005             if (sign)
5006                 i = -i;
5007             r = i;
5008         }
5009     }
5010     return __scalbn(r, e);
5011 }
5012
5013 /*********************************************************************
5014  *      fmaf (MSVCRT.@)
5015  *
5016  * Copied from musl: src/math/fmaf.c
5017  */
5018 float CDECL fmaf( float x, float y, float z )
5019 {
5020     union { double f; UINT64 i; } u;
5021     double xy, adjust;
5022     int e;
5023
5024     xy = (double)x * y;
5025     u.f = xy + z;
5026     e = u.i>>52 & 0x7ff;
5027     /* Common case: The double precision result is fine. */
5028     if ((u.i & 0x1fffffff) != 0x10000000 || /* not a halfway case */
5029             e == 0x7ff || /* NaN */
5030             (u.f - xy == z && u.f - z == xy) || /* exact */
5031             (_controlfp(0, 0) & _MCW_RC) != _RC_NEAR) /* not round-to-nearest */
5032     {
5033         if (!isnan(x) && !isnan(y) && !isnan(z) && isnan(u.f)) *_errno() = EDOM;
5034
5035         /* underflow may not be raised correctly, example:
5036            fmaf(0x1p-120f, 0x1p-120f, 0x1p-149f) */
5037         if (e < 0x3ff-126 && e >= 0x3ff-149 && _statusfp() & _SW_INEXACT)
5038             fp_barrierf((float)u.f * (float)u.f);
5039         return u.f;
5040     }
5041
5042     /*
5043      * If result is inexact, and exactly halfway between two float values,
5044      * we need to adjust the low-order bit in the direction of the error.
5045      */
5046     _controlfp(_RC_CHOP, _MCW_RC);
5047     adjust = fp_barrier(xy + z);
5048     _controlfp(_RC_NEAR, _MCW_RC);
5049     if (u.f == adjust)
5050         u.i++;
5051     return u.f;
5052 }
5053
5054 /*********************************************************************
5055  *              fabs (MSVCRT.@)
5056  *
5057  * Copied from musl: src/math/fabsf.c
5058  */
5059 double CDECL fabs( double x )
5060 {
5061     union { double f; UINT64 i; } u = { x };
5062     u.i &= ~0ull >> 1;
5063     return u.f;
5064 }
5065
5066 /*********************************************************************
5067  *              frexp (MSVCRT.@)
5068  *
5069  * Copied from musl: src/math/frexp.c
5070  */
5071 double CDECL frexp( double x, int *e )
5072 {
5073     UINT64 ux = *(UINT64*)&x;
5074     int ee = ux >> 52 & 0x7ff;
5075
5076     if (!ee) {
5077         if (x) {
5078             x = frexp(x * 0x1p64, e);
5079             *e -= 64;
5080         } else *e = 0;
5081         return x;
5082     } else if (ee == 0x7ff) {
5083         return x;
5084     }
5085
5086     *e = ee - 0x3fe;
5087     ux &= 0x800fffffffffffffull;
5088     ux |= 0x3fe0000000000000ull;
5089     return *(double*)&ux;
5090 }
5091
5092 /*********************************************************************
5093  *              modf (MSVCRT.@)
5094  *
5095  * Copied from musl: src/math/modf.c
5096  */
5097 double CDECL modf( double x, double *iptr )
5098 {
5099     union {double f; UINT64 i;} u = {x};
5100     UINT64 mask;
5101     int e = (u.i >> 52 & 0x7ff) - 0x3ff;
5102
5103     /* no fractional part */
5104     if (e >= 52) {
5105         *iptr = x;
5106         if (e == 0x400 && u.i << 12 != 0) /* nan */
5107             return x;
5108         u.i &= 1ULL << 63;
5109         return u.f;
5110     }
5111
5112     /* no integral part*/
5113     if (e < 0) {
5114         u.i &= 1ULL << 63;
5115         *iptr = u.f;
5116         return x;
5117     }
5118
5119     mask = -1ULL >> 12 >> e;
5120     if ((u.i & mask) == 0) {
5121         *iptr = x;
5122         u.i &= 1ULL << 63;
5123         return u.f;
5124     }
5125     u.i &= ~mask;
5126     *iptr = u.f;
5127     return x - u.f;
5128 }
5129
5130 /**********************************************************************
5131  *              _statusfp2 (MSVCRT.@)
5132  *
5133  * Not exported by native msvcrt, added in msvcr80.
5134  */
5135 #if defined(__i386__) || defined(__x86_64__)
5136 void CDECL _statusfp2( unsigned int *x86_sw, unsigned int *sse2_sw )
5137 {
5138 #if defined(__GNUC__) || defined(__clang__)
5139     unsigned int flags;
5140     unsigned long fpword;
5141
5142     if (x86_sw)
5143     {
5144         __asm__ __volatile__( "fstsw %0" : "=m" (fpword) );
5145         flags = 0;
5146         if (fpword & 0x1)  flags |= _SW_INVALID;
5147         if (fpword & 0x2)  flags |= _SW_DENORMAL;
5148         if (fpword & 0x4)  flags |= _SW_ZERODIVIDE;
5149         if (fpword & 0x8)  flags |= _SW_OVERFLOW;
5150         if (fpword & 0x10) flags |= _SW_UNDERFLOW;
5151         if (fpword & 0x20) flags |= _SW_INEXACT;
5152         *x86_sw = flags;
5153     }
5154
5155     if (!sse2_sw) return;
5156
5157     if (sse2_supported)
5158     {
5159         __asm__ __volatile__( "stmxcsr %0" : "=m" (fpword) );
5160         flags = 0;
5161         if (fpword & 0x1)  flags |= _SW_INVALID;
5162         if (fpword & 0x2)  flags |= _SW_DENORMAL;
5163         if (fpword & 0x4)  flags |= _SW_ZERODIVIDE;
5164         if (fpword & 0x8)  flags |= _SW_OVERFLOW;
5165         if (fpword & 0x10) flags |= _SW_UNDERFLOW;
5166         if (fpword & 0x20) flags |= _SW_INEXACT;
5167         *sse2_sw = flags;
5168     }
5169     else *sse2_sw = 0;
5170 #else
5171     FIXME( "not implemented\n" );
5172 #endif
5173 }
5174 #endif
5175
5176 /**********************************************************************
5177  *              _statusfp (MSVCRT.@)
5178  */
5179 unsigned int CDECL _statusfp(void)
5180 {
5181     unsigned int flags = 0;
5182 #if defined(__i386__) || defined(__x86_64__)
5183     unsigned int x86_sw, sse2_sw;
5184
5185     _statusfp2( &x86_sw, &sse2_sw );
5186     /* FIXME: there's no definition for ambiguous status, just return all status bits for now */
5187     flags = x86_sw | sse2_sw;
5188 #elif defined(__aarch64__)
5189     ULONG_PTR fpsr;
5190
5191     __asm__ __volatile__( "mrs %0, fpsr" : "=r" (fpsr) );
5192     if (fpsr & 0x1)  flags |= _SW_INVALID;
5193     if (fpsr & 0x2)  flags |= _SW_ZERODIVIDE;
5194     if (fpsr & 0x4)  flags |= _SW_OVERFLOW;
5195     if (fpsr & 0x8)  flags |= _SW_UNDERFLOW;
5196     if (fpsr & 0x10) flags |= _SW_INEXACT;
5197     if (fpsr & 0x80) flags |= _SW_DENORMAL;
5198 #else
5199     FIXME( "not implemented\n" );
5200 #endif
5201     return flags;
5202 }
5203
5204 /*********************************************************************
5205  *              _clearfp (MSVCRT.@)
5206  */
5207 unsigned int CDECL _clearfp(void)
5208 {
5209     unsigned int flags = 0;
5210 #if (defined(__GNUC__) || defined(__clang__)) && (defined(__i386__) || defined(__x86_64__))
5211     unsigned long fpword;
5212
5213     __asm__ __volatile__( "fnstsw %0; fnclex" : "=m" (fpword) );
5214     if (fpword & 0x1)  flags |= _SW_INVALID;
5215     if (fpword & 0x2)  flags |= _SW_DENORMAL;
5216     if (fpword & 0x4)  flags |= _SW_ZERODIVIDE;
5217     if (fpword & 0x8)  flags |= _SW_OVERFLOW;
5218     if (fpword & 0x10) flags |= _SW_UNDERFLOW;
5219     if (fpword & 0x20) flags |= _SW_INEXACT;
5220
5221     if (sse2_supported)
5222     {
5223         __asm__ __volatile__( "stmxcsr %0" : "=m" (fpword) );
5224         if (fpword & 0x1)  flags |= _SW_INVALID;
5225         if (fpword & 0x2)  flags |= _SW_DENORMAL;
5226         if (fpword & 0x4)  flags |= _SW_ZERODIVIDE;
5227         if (fpword & 0x8)  flags |= _SW_OVERFLOW;
5228         if (fpword & 0x10) flags |= _SW_UNDERFLOW;
5229         if (fpword & 0x20) flags |= _SW_INEXACT;
5230         fpword &= ~0x3f;
5231         __asm__ __volatile__( "ldmxcsr %0" : : "m" (fpword) );
5232     }
5233 #elif defined(__aarch64__)
5234     ULONG_PTR fpsr;
5235
5236     __asm__ __volatile__( "mrs %0, fpsr" : "=r" (fpsr) );
5237     if (fpsr & 0x1)  flags |= _SW_INVALID;
5238     if (fpsr & 0x2)  flags |= _SW_ZERODIVIDE;
5239     if (fpsr & 0x4)  flags |= _SW_OVERFLOW;
5240     if (fpsr & 0x8)  flags |= _SW_UNDERFLOW;
5241     if (fpsr & 0x10) flags |= _SW_INEXACT;
5242     if (fpsr & 0x80) flags |= _SW_DENORMAL;
5243     fpsr &= ~0x9f;
5244     __asm__ __volatile__( "msr fpsr, %0" :: "r" (fpsr) );
5245 #else
5246     FIXME( "not implemented\n" );
5247 #endif
5248     return flags;
5249 }
5250
5251 /*********************************************************************
5252  *              __fpecode (MSVCRT.@)
5253  */
5254 int * CDECL __fpecode(void)
5255 {
5256     return &msvcrt_get_thread_data()->fpecode;
5257 }
5258
5259 /*********************************************************************
5260  *              ldexp (MSVCRT.@)
5261  */
5262 double CDECL ldexp(double num, int exp)
5263 {
5264   double z = __scalbn(num, exp);
5265
5266   if (isfinite(num) && !isfinite(z))
5267     return math_error(_OVERFLOW, "ldexp", num, exp, z);
5268   if (num && isfinite(num) && !z)
5269     return math_error(_UNDERFLOW, "ldexp", num, exp, z);
5270   return z;
5271 }
5272
5273 /*********************************************************************
5274  *              _cabs (MSVCRT.@)
5275  */
5276 double CDECL _cabs(struct _complex num)
5277 {
5278   return sqrt(num.x * num.x + num.y * num.y);
5279 }
5280
5281 /*********************************************************************
5282  *              _chgsign (MSVCRT.@)
5283  */
5284 double CDECL _chgsign(double num)
5285 {
5286     union { double f; UINT64 i; } u = { num };
5287     u.i ^= 1ull << 63;
5288     return u.f;
5289 }
5290
5291 /*********************************************************************
5292  *              __control87_2 (MSVCR80.@)
5293  *
5294  * Not exported by native msvcrt, added in msvcr80.
5295  */
5296 #ifdef __i386__
5297 int CDECL __control87_2( unsigned int newval, unsigned int mask,
5298                          unsigned int *x86_cw, unsigned int *sse2_cw )
5299 {
5300 #if defined(__GNUC__) || defined(__clang__)
5301     unsigned long fpword;
5302     unsigned int flags;
5303     unsigned int old_flags;
5304
5305     if (x86_cw)
5306     {
5307         __asm__ __volatile__( "fstcw %0" : "=m" (fpword) );
5308
5309         /* Convert into mask constants */
5310         flags = 0;
5311         if (fpword & 0x1)  flags |= _EM_INVALID;
5312         if (fpword & 0x2)  flags |= _EM_DENORMAL;
5313         if (fpword & 0x4)  flags |= _EM_ZERODIVIDE;
5314         if (fpword & 0x8)  flags |= _EM_OVERFLOW;
5315         if (fpword & 0x10) flags |= _EM_UNDERFLOW;
5316         if (fpword & 0x20) flags |= _EM_INEXACT;
5317         switch (fpword & 0xc00)
5318         {
5319         case 0xc00: flags |= _RC_UP|_RC_DOWN; break;
5320         case 0x800: flags |= _RC_UP; break;
5321         case 0x400: flags |= _RC_DOWN; break;
5322         }
5323         switch (fpword & 0x300)
5324         {
5325         case 0x0:   flags |= _PC_24; break;
5326         case 0x200: flags |= _PC_53; break;
5327         case 0x300: flags |= _PC_64; break;
5328         }
5329         if (fpword & 0x1000) flags |= _IC_AFFINE;
5330
5331         TRACE( "x86 flags=%08x newval=%08x mask=%08x\n", flags, newval, mask );
5332         if (mask)
5333         {
5334             flags = (flags & ~mask) | (newval & mask);
5335
5336             /* Convert (masked) value back to fp word */
5337             fpword = 0;
5338             if (flags & _EM_INVALID)    fpword |= 0x1;
5339             if (flags & _EM_DENORMAL)   fpword |= 0x2;
5340             if (flags & _EM_ZERODIVIDE) fpword |= 0x4;
5341             if (flags & _EM_OVERFLOW)   fpword |= 0x8;
5342             if (flags & _EM_UNDERFLOW)  fpword |= 0x10;
5343             if (flags & _EM_INEXACT)    fpword |= 0x20;
5344             switch (flags & _MCW_RC)
5345             {
5346             case _RC_UP|_RC_DOWN:   fpword |= 0xc00; break;
5347             case _RC_UP:            fpword |= 0x800; break;
5348             case _RC_DOWN:          fpword |= 0x400; break;
5349             }
5350             switch (flags & _MCW_PC)
5351             {
5352             case _PC_64: fpword |= 0x300; break;
5353             case _PC_53: fpword |= 0x200; break;
5354             case _PC_24: fpword |= 0x0; break;
5355             }
5356             if (flags & _IC_AFFINE) fpword |= 0x1000;
5357
5358             __asm__ __volatile__( "fldcw %0" : : "m" (fpword) );
5359         }
5360         *x86_cw = flags;
5361     }
5362
5363     if (!sse2_cw) return 1;
5364
5365     if (sse2_supported)
5366     {
5367         __asm__ __volatile__( "stmxcsr %0" : "=m" (fpword) );
5368
5369         /* Convert into mask constants */
5370         flags = 0;
5371         if (fpword & 0x80)   flags |= _EM_INVALID;
5372         if (fpword & 0x100)  flags |= _EM_DENORMAL;
5373         if (fpword & 0x200)  flags |= _EM_ZERODIVIDE;
5374         if (fpword & 0x400)  flags |= _EM_OVERFLOW;
5375         if (fpword & 0x800)  flags |= _EM_UNDERFLOW;
5376         if (fpword & 0x1000) flags |= _EM_INEXACT;
5377         switch (fpword & 0x6000)
5378         {
5379         case 0x6000: flags |= _RC_UP|_RC_DOWN; break;
5380         case 0x4000: flags |= _RC_UP; break;
5381         case 0x2000: flags |= _RC_DOWN; break;
5382         }
5383         switch (fpword & 0x8040)
5384         {
5385         case 0x0040: flags |= _DN_FLUSH_OPERANDS_SAVE_RESULTS; break;
5386         case 0x8000: flags |= _DN_SAVE_OPERANDS_FLUSH_RESULTS; break;
5387         case 0x8040: flags |= _DN_FLUSH; break;
5388         }
5389
5390         TRACE( "sse2 flags=%08x newval=%08x mask=%08x\n", flags, newval, mask );
5391         if (mask)
5392         {
5393             old_flags = flags;
5394             mask &= _MCW_EM | _MCW_RC | _MCW_DN;
5395             flags = (flags & ~mask) | (newval & mask);
5396
5397             if (flags != old_flags)
5398             {
5399                 /* Convert (masked) value back to fp word */
5400                 fpword = 0;
5401                 if (flags & _EM_INVALID)    fpword |= 0x80;
5402                 if (flags & _EM_DENORMAL)   fpword |= 0x100;
5403                 if (flags & _EM_ZERODIVIDE) fpword |= 0x200;
5404                 if (flags & _EM_OVERFLOW)   fpword |= 0x400;
5405                 if (flags & _EM_UNDERFLOW)  fpword |= 0x800;
5406                 if (flags & _EM_INEXACT)    fpword |= 0x1000;
5407                 switch (flags & _MCW_RC)
5408                 {
5409                 case _RC_UP|_RC_DOWN:   fpword |= 0x6000; break;
5410                 case _RC_UP:            fpword |= 0x4000; break;
5411                 case _RC_DOWN:          fpword |= 0x2000; break;
5412                 }
5413                 switch (flags & _MCW_DN)
5414                 {
5415                 case _DN_FLUSH_OPERANDS_SAVE_RESULTS: fpword |= 0x0040; break;
5416                 case _DN_SAVE_OPERANDS_FLUSH_RESULTS: fpword |= 0x8000; break;
5417                 case _DN_FLUSH:                       fpword |= 0x8040; break;
5418                 }
5419                 __asm__ __volatile__( "ldmxcsr %0" : : "m" (fpword) );
5420             }
5421         }
5422         *sse2_cw = flags;
5423     }
5424     else *sse2_cw = 0;
5425
5426     return 1;
5427 #else
5428     FIXME( "not implemented\n" );
5429     return 0;
5430 #endif
5431 }
5432 #endif
5433
5434 /*********************************************************************
5435  *              _control87 (MSVCRT.@)
5436  */
5437 unsigned int CDECL _control87(unsigned int newval, unsigned int mask)
5438 {
5439     unsigned int flags = 0;
5440 #ifdef __i386__
5441     unsigned int sse2_cw;
5442
5443     __control87_2( newval, mask, &flags, &sse2_cw );
5444
5445     if ((flags ^ sse2_cw) & (_MCW_EM | _MCW_RC)) flags |= _EM_AMBIGUOUS;
5446     flags |= sse2_cw;
5447 #elif defined(__x86_64__)
5448     unsigned long fpword;
5449     unsigned int old_flags;
5450
5451     __asm__ __volatile__( "stmxcsr %0" : "=m" (fpword) );
5452     if (fpword & 0x80)   flags |= _EM_INVALID;
5453     if (fpword & 0x100)  flags |= _EM_DENORMAL;
5454     if (fpword & 0x200)  flags |= _EM_ZERODIVIDE;
5455     if (fpword & 0x400)  flags |= _EM_OVERFLOW;
5456     if (fpword & 0x800)  flags |= _EM_UNDERFLOW;
5457     if (fpword & 0x1000) flags |= _EM_INEXACT;
5458     switch (fpword & 0x6000)
5459     {
5460     case 0x6000: flags |= _RC_CHOP; break;
5461     case 0x4000: flags |= _RC_UP; break;
5462     case 0x2000: flags |= _RC_DOWN; break;
5463     }
5464     switch (fpword & 0x8040)
5465     {
5466     case 0x0040: flags |= _DN_FLUSH_OPERANDS_SAVE_RESULTS; break;
5467     case 0x8000: flags |= _DN_SAVE_OPERANDS_FLUSH_RESULTS; break;
5468     case 0x8040: flags |= _DN_FLUSH; break;
5469     }
5470     old_flags = flags;
5471     mask &= _MCW_EM | _MCW_RC | _MCW_DN;
5472     flags = (flags & ~mask) | (newval & mask);
5473     if (flags != old_flags)
5474     {
5475         fpword = 0;
5476         if (flags & _EM_INVALID)    fpword |= 0x80;
5477         if (flags & _EM_DENORMAL)   fpword |= 0x100;
5478         if (flags & _EM_ZERODIVIDE) fpword |= 0x200;
5479         if (flags & _EM_OVERFLOW)   fpword |= 0x400;
5480         if (flags & _EM_UNDERFLOW)  fpword |= 0x800;
5481         if (flags & _EM_INEXACT)    fpword |= 0x1000;
5482         switch (flags & _MCW_RC)
5483         {
5484         case _RC_CHOP: fpword |= 0x6000; break;
5485         case _RC_UP:   fpword |= 0x4000; break;
5486         case _RC_DOWN: fpword |= 0x2000; break;
5487         }
5488         switch (flags & _MCW_DN)
5489         {
5490         case _DN_FLUSH_OPERANDS_SAVE_RESULTS: fpword |= 0x0040; break;
5491         case _DN_SAVE_OPERANDS_FLUSH_RESULTS: fpword |= 0x8000; break;
5492         case _DN_FLUSH:                       fpword |= 0x8040; break;
5493         }
5494         __asm__ __volatile__( "ldmxcsr %0" :: "m" (fpword) );
5495     }
5496 #elif defined(__aarch64__)
5497     ULONG_PTR fpcr;
5498
5499     __asm__ __volatile__( "mrs %0, fpcr" : "=r" (fpcr) );
5500     if (!(fpcr & 0x100))  flags |= _EM_INVALID;
5501     if (!(fpcr & 0x200))  flags |= _EM_ZERODIVIDE;
5502     if (!(fpcr & 0x400))  flags |= _EM_OVERFLOW;
5503     if (!(fpcr & 0x800))  flags |= _EM_UNDERFLOW;
5504     if (!(fpcr & 0x1000)) flags |= _EM_INEXACT;
5505     if (!(fpcr & 0x8000)) flags |= _EM_DENORMAL;
5506     switch (fpcr & 0xc00000)
5507     {
5508     case 0x400000: flags |= _RC_UP; break;
5509     case 0x800000: flags |= _RC_DOWN; break;
5510     case 0xc00000: flags |= _RC_CHOP; break;
5511     }
5512     flags = (flags & ~mask) | (newval & mask);
5513     fpcr &= ~0xc09f00ul;
5514     if (!(flags & _EM_INVALID)) fpcr |= 0x100;
5515     if (!(flags & _EM_ZERODIVIDE)) fpcr |= 0x200;
5516     if (!(flags & _EM_OVERFLOW)) fpcr |= 0x400;
5517     if (!(flags & _EM_UNDERFLOW)) fpcr |= 0x800;
5518     if (!(flags & _EM_INEXACT)) fpcr |= 0x1000;
5519     if (!(flags & _EM_DENORMAL)) fpcr |= 0x8000;
5520     switch (flags & _MCW_RC)
5521     {
5522     case _RC_CHOP: fpcr |= 0xc00000; break;
5523     case _RC_UP:   fpcr |= 0x400000; break;
5524     case _RC_DOWN: fpcr |= 0x800000; break;
5525     }
5526     __asm__ __volatile__( "msr fpcr, %0" :: "r" (fpcr) );
5527 #else
5528     FIXME( "not implemented\n" );
5529 #endif
5530     return flags;
5531 }
5532
5533 /*********************************************************************
5534  *              _controlfp (MSVCRT.@)
5535  */
5536 unsigned int CDECL _controlfp(unsigned int newval, unsigned int mask)
5537 {
5538   return _control87( newval, mask & ~_EM_DENORMAL );
5539 }
5540
5541 /*********************************************************************
5542  *              _set_controlfp (MSVCRT.@)
5543  */
5544 void CDECL _set_controlfp( unsigned int newval, unsigned int mask )
5545 {
5546     _controlfp( newval, mask );
5547 }
5548
5549 /*********************************************************************
5550  *              _controlfp_s (MSVCRT.@)
5551  */
5552 int CDECL _controlfp_s(unsigned int *cur, unsigned int newval, unsigned int mask)
5553 {
5554     static const unsigned int all_flags = (_MCW_EM | _MCW_IC | _MCW_RC |
5555                                            _MCW_PC | _MCW_DN);
5556     unsigned int val;
5557
5558     if (!MSVCRT_CHECK_PMT( !(newval & mask & ~all_flags) ))
5559     {
5560         if (cur) *cur = _controlfp( 0, 0 );  /* retrieve it anyway */
5561         return EINVAL;
5562     }
5563     val = _controlfp( newval, mask );
5564     if (cur) *cur = val;
5565     return 0;
5566 }
5567
5568 #if _MSVCR_VER >= 140
5569 enum fenv_masks
5570 {
5571     FENV_X_INVALID = 0x00100010,
5572     FENV_X_DENORMAL = 0x00200020,
5573     FENV_X_ZERODIVIDE = 0x00080008,
5574     FENV_X_OVERFLOW = 0x00040004,
5575     FENV_X_UNDERFLOW = 0x00020002,
5576     FENV_X_INEXACT = 0x00010001,
5577     FENV_X_AFFINE = 0x00004000,
5578     FENV_X_UP = 0x00800200,
5579     FENV_X_DOWN = 0x00400100,
5580     FENV_X_24 = 0x00002000,
5581     FENV_X_53 = 0x00001000,
5582     FENV_Y_INVALID = 0x10000010,
5583     FENV_Y_DENORMAL = 0x20000020,
5584     FENV_Y_ZERODIVIDE = 0x08000008,
5585     FENV_Y_OVERFLOW = 0x04000004,
5586     FENV_Y_UNDERFLOW = 0x02000002,
5587     FENV_Y_INEXACT = 0x01000001,
5588     FENV_Y_UP = 0x80000200,
5589     FENV_Y_DOWN = 0x40000100,
5590     FENV_Y_FLUSH = 0x00000400,
5591     FENV_Y_FLUSH_SAVE = 0x00000800
5592 };
5593
5594 /* encodes x87/sse control/status word in ulong */
5595 static __msvcrt_ulong fenv_encode(unsigned int x, unsigned int y)
5596 {
5597     __msvcrt_ulong ret = 0;
5598
5599 #ifdef __i386__
5600     if (x & _EM_INVALID) ret |= FENV_X_INVALID;
5601     if (x & _EM_DENORMAL) ret |= FENV_X_DENORMAL;
5602     if (x & _EM_ZERODIVIDE) ret |= FENV_X_ZERODIVIDE;
5603     if (x & _EM_OVERFLOW) ret |= FENV_X_OVERFLOW;
5604     if (x & _EM_UNDERFLOW) ret |= FENV_X_UNDERFLOW;
5605     if (x & _EM_INEXACT) ret |= FENV_X_INEXACT;
5606     if (x & _IC_AFFINE) ret |= FENV_X_AFFINE;
5607     if (x & _RC_UP) ret |= FENV_X_UP;
5608     if (x & _RC_DOWN) ret |= FENV_X_DOWN;
5609     if (x & _PC_24) ret |= FENV_X_24;
5610     if (x & _PC_53) ret |= FENV_X_53;
5611 #endif
5612     x &= ~(_MCW_EM | _MCW_IC | _MCW_RC | _MCW_PC);
5613
5614     if (y & _EM_INVALID) ret |= FENV_Y_INVALID;
5615     if (y & _EM_DENORMAL) ret |= FENV_Y_DENORMAL;
5616     if (y & _EM_ZERODIVIDE) ret |= FENV_Y_ZERODIVIDE;
5617     if (y & _EM_OVERFLOW) ret |= FENV_Y_OVERFLOW;
5618     if (y & _EM_UNDERFLOW) ret |= FENV_Y_UNDERFLOW;
5619     if (y & _EM_INEXACT) ret |= FENV_Y_INEXACT;
5620     if (y & _RC_UP) ret |= FENV_Y_UP;
5621     if (y & _RC_DOWN) ret |= FENV_Y_DOWN;
5622     if (y & _DN_FLUSH) ret |= FENV_Y_FLUSH;
5623     if (y & _DN_FLUSH_OPERANDS_SAVE_RESULTS) ret |= FENV_Y_FLUSH_SAVE;
5624     y &= ~(_MCW_EM | _MCW_IC | _MCW_RC | _MCW_DN);
5625
5626     if(x || y) FIXME("unsupported flags: %x, %x\n", x, y);
5627     return ret;
5628 }
5629
5630 /* decodes x87/sse control/status word, returns FALSE on error */
5631 #if (defined(__i386__) || defined(__x86_64__))
5632 static BOOL fenv_decode(__msvcrt_ulong enc, unsigned int *x, unsigned int *y)
5633 {
5634     *x = *y = 0;
5635     if ((enc & FENV_X_INVALID) == FENV_X_INVALID) *x |= _EM_INVALID;
5636     if ((enc & FENV_X_DENORMAL) == FENV_X_DENORMAL) *x |= _EM_DENORMAL;
5637     if ((enc & FENV_X_ZERODIVIDE) == FENV_X_ZERODIVIDE) *x |= _EM_ZERODIVIDE;
5638     if ((enc & FENV_X_OVERFLOW) == FENV_X_OVERFLOW) *x |= _EM_OVERFLOW;
5639     if ((enc & FENV_X_UNDERFLOW) == FENV_X_UNDERFLOW) *x |= _EM_UNDERFLOW;
5640     if ((enc & FENV_X_INEXACT) == FENV_X_INEXACT) *x |= _EM_INEXACT;
5641     if ((enc & FENV_X_AFFINE) == FENV_X_AFFINE) *x |= _IC_AFFINE;
5642     if ((enc & FENV_X_UP) == FENV_X_UP) *x |= _RC_UP;
5643     if ((enc & FENV_X_DOWN) == FENV_X_DOWN) *x |= _RC_DOWN;
5644     if ((enc & FENV_X_24) == FENV_X_24) *x |= _PC_24;
5645     if ((enc & FENV_X_53) == FENV_X_53) *x |= _PC_53;
5646
5647     if ((enc & FENV_Y_INVALID) == FENV_Y_INVALID) *y |= _EM_INVALID;
5648     if ((enc & FENV_Y_DENORMAL) == FENV_Y_DENORMAL) *y |= _EM_DENORMAL;
5649     if ((enc & FENV_Y_ZERODIVIDE) == FENV_Y_ZERODIVIDE) *y |= _EM_ZERODIVIDE;
5650     if ((enc & FENV_Y_OVERFLOW) == FENV_Y_OVERFLOW) *y |= _EM_OVERFLOW;
5651     if ((enc & FENV_Y_UNDERFLOW) == FENV_Y_UNDERFLOW) *y |= _EM_UNDERFLOW;
5652     if ((enc & FENV_Y_INEXACT) == FENV_Y_INEXACT) *y |= _EM_INEXACT;
5653     if ((enc & FENV_Y_UP) == FENV_Y_UP) *y |= _RC_UP;
5654     if ((enc & FENV_Y_DOWN) == FENV_Y_DOWN) *y |= _RC_DOWN;
5655     if ((enc & FENV_Y_FLUSH) == FENV_Y_FLUSH) *y |= _DN_FLUSH;
5656     if ((enc & FENV_Y_FLUSH_SAVE) == FENV_Y_FLUSH_SAVE) *y |= _DN_FLUSH_OPERANDS_SAVE_RESULTS;
5657
5658     if (fenv_encode(*x, *y) != enc)
5659     {
5660         WARN("can't decode: %lx\n", enc);
5661         return FALSE;
5662     }
5663     return TRUE;
5664 }
5665 #endif
5666 #elif _MSVCR_VER >= 120
5667 static __msvcrt_ulong fenv_encode(unsigned int x, unsigned int y)
5668 {
5669     return x | y;
5670 }
5671
5672 #if (defined(__i386__) || defined(__x86_64__))
5673 static BOOL fenv_decode(__msvcrt_ulong enc, unsigned int *x, unsigned int *y)
5674 {
5675     *x = *y = enc;
5676     return TRUE;
5677 }
5678 #endif
5679 #endif
5680
5681 #if _MSVCR_VER>=120
5682 /*********************************************************************
5683  *              fegetenv (MSVCR120.@)
5684  */
5685 int CDECL fegetenv(fenv_t *env)
5686 {
5687 #if _MSVCR_VER>=140 && defined(__i386__)
5688     unsigned int x87, sse;
5689     __control87_2(0, 0, &x87, &sse);
5690     env->_Fe_ctl = fenv_encode(x87, sse);
5691     _statusfp2(&x87, &sse);
5692     env->_Fe_stat = fenv_encode(x87, sse);
5693 #elif _MSVCR_VER>=140
5694     env->_Fe_ctl = fenv_encode(0, _control87(0, 0));
5695     env->_Fe_stat = fenv_encode(0, _statusfp());
5696 #else
5697     env->_Fe_ctl = _controlfp(0, 0) & (_EM_INEXACT | _EM_UNDERFLOW |
5698             _EM_OVERFLOW | _EM_ZERODIVIDE | _EM_INVALID | _RC_CHOP);
5699     env->_Fe_stat = _statusfp();
5700 #endif
5701     return 0;
5702 }
5703
5704 /*********************************************************************
5705  *              feupdateenv (MSVCR120.@)
5706  */
5707 int CDECL feupdateenv(const fenv_t *env)
5708 {
5709     fenv_t set;
5710     fegetenv(&set);
5711     set._Fe_ctl = env->_Fe_ctl;
5712     set._Fe_stat |= env->_Fe_stat;
5713     return fesetenv(&set);
5714 }
5715
5716 /*********************************************************************
5717  *      fetestexcept (MSVCR120.@)
5718  */
5719 int CDECL fetestexcept(int flags)
5720 {
5721     return _statusfp() & flags;
5722 }
5723
5724 /*********************************************************************
5725  *      fesetexceptflag (MSVCR120.@)
5726  */
5727 int CDECL fesetexceptflag(const fexcept_t *status, int excepts)
5728 {
5729     fenv_t env;
5730
5731     excepts &= FE_ALL_EXCEPT;
5732     if(!excepts)
5733         return 0;
5734
5735     fegetenv(&env);
5736     env._Fe_stat &= ~fenv_encode(excepts, excepts);
5737     env._Fe_stat |= *status & fenv_encode(excepts, excepts);
5738     return fesetenv(&env);
5739 }
5740
5741 /*********************************************************************
5742  *      feraiseexcept (MSVCR120.@)
5743  */
5744 int CDECL feraiseexcept(int flags)
5745 {
5746     fenv_t env;
5747
5748     flags &= FE_ALL_EXCEPT;
5749     fegetenv(&env);
5750     env._Fe_stat |= fenv_encode(flags, flags);
5751     return fesetenv(&env);
5752 }
5753
5754 /*********************************************************************
5755  *      feclearexcept (MSVCR120.@)
5756  */
5757 int CDECL feclearexcept(int flags)
5758 {
5759     fenv_t env;
5760
5761     fegetenv(&env);
5762     flags &= FE_ALL_EXCEPT;
5763     env._Fe_stat &= ~fenv_encode(flags, flags);
5764     return fesetenv(&env);
5765 }
5766
5767 /*********************************************************************
5768  *      fegetexceptflag (MSVCR120.@)
5769  */
5770 int CDECL fegetexceptflag(fexcept_t *status, int excepts)
5771 {
5772 #if _MSVCR_VER>=140 && defined(__i386__)
5773     unsigned int x87, sse;
5774     _statusfp2(&x87, &sse);
5775     *status = fenv_encode(x87 & excepts, sse & excepts);
5776 #else
5777     *status = fenv_encode(0, _statusfp() & excepts);
5778 #endif
5779     return 0;
5780 }
5781 #endif
5782
5783 #if _MSVCR_VER>=140
5784 /*********************************************************************
5785  *              __fpe_flt_rounds (UCRTBASE.@)
5786  */
5787 int CDECL __fpe_flt_rounds(void)
5788 {
5789     unsigned int fpc = _controlfp(0, 0) & _RC_CHOP;
5790
5791     TRACE("()\n");
5792
5793     switch(fpc) {
5794         case _RC_CHOP: return 0;
5795         case _RC_NEAR: return 1;
5796         case _RC_UP: return 2;
5797         default: return 3;
5798     }
5799 }
5800 #endif
5801
5802 #if _MSVCR_VER>=120
5803
5804 /*********************************************************************
5805  *              fegetround (MSVCR120.@)
5806  */
5807 int CDECL fegetround(void)
5808 {
5809     return _controlfp(0, 0) & _MCW_RC;
5810 }
5811
5812 /*********************************************************************
5813  *              fesetround (MSVCR120.@)
5814  */
5815 int CDECL fesetround(int round_mode)
5816 {
5817     if (round_mode & (~_MCW_RC))
5818         return 1;
5819     _controlfp(round_mode, _MCW_RC);
5820     return 0;
5821 }
5822
5823 #endif /* _MSVCR_VER>=120 */
5824
5825 /*********************************************************************
5826  *              _copysign (MSVCRT.@)
5827  *
5828  * Copied from musl: src/math/copysign.c
5829  */
5830 double CDECL _copysign( double x, double y )
5831 {
5832     union { double f; UINT64 i; } ux = { x }, uy = { y };
5833     ux.i &= ~0ull >> 1;
5834     ux.i |= uy.i & 1ull << 63;
5835     return ux.f;
5836 }
5837
5838 /*********************************************************************
5839  *              _finite (MSVCRT.@)
5840  */
5841 int CDECL _finite(double num)
5842 {
5843     union { double f; UINT64 i; } u = { num };
5844     return (u.i & ~0ull >> 1) < 0x7ffull << 52;
5845 }
5846
5847 /*********************************************************************
5848  *              _fpreset (MSVCRT.@)
5849  */
5850 void CDECL _fpreset(void)
5851 {
5852 #if (defined(__GNUC__) || defined(__clang__)) && (defined(__i386__) || defined(__x86_64__))
5853     const unsigned int x86_cw = 0x27f;
5854     __asm__ __volatile__( "fninit; fldcw %0" : : "m" (x86_cw) );
5855     if (sse2_supported)
5856     {
5857         const unsigned long sse2_cw = 0x1f80;
5858         __asm__ __volatile__( "ldmxcsr %0" : : "m" (sse2_cw) );
5859     }
5860 #else
5861     FIXME( "not implemented\n" );
5862 #endif
5863 }
5864
5865 #if _MSVCR_VER>=120
5866 /*********************************************************************
5867  *              fesetenv (MSVCR120.@)
5868  */
5869 int CDECL fesetenv(const fenv_t *env)
5870 {
5871 #if (defined(__GNUC__) || defined(__clang__)) && (defined(__i386__) || defined(__x86_64__))
5872     unsigned int x87_cw, sse_cw, x87_stat, sse_stat;
5873 #ifdef __i386__
5874     struct {
5875         WORD control_word;
5876         WORD unused1;
5877         WORD status_word;
5878         WORD unused2;
5879         WORD tag_word;
5880         WORD unused3;
5881         DWORD instruction_pointer;
5882         WORD code_segment;
5883         WORD unused4;
5884         DWORD operand_addr;
5885         WORD data_segment;
5886         WORD unused5;
5887     } fenv;
5888 #endif
5889
5890     TRACE( "(%p)\n", env );
5891
5892     if (!env->_Fe_ctl && !env->_Fe_stat) {
5893         _fpreset();
5894         return 0;
5895     }
5896
5897     if (!fenv_decode(env->_Fe_ctl, &x87_cw, &sse_cw))
5898         return 1;
5899     if (!fenv_decode(env->_Fe_stat, &x87_stat, &sse_stat))
5900         return 1;
5901
5902 #ifdef __i386__
5903     __asm__ __volatile__( "fnstenv %0" : "=m" (fenv) );
5904
5905     fenv.control_word &= ~0xc3d;
5906 #if _MSVCR_VER>=140
5907     fenv.control_word &= ~0x1302;
5908 #endif
5909     if (x87_cw & _EM_INVALID) fenv.control_word |= 0x1;
5910     if (x87_cw & _EM_ZERODIVIDE) fenv.control_word |= 0x4;
5911     if (x87_cw & _EM_OVERFLOW) fenv.control_word |= 0x8;
5912     if (x87_cw & _EM_UNDERFLOW) fenv.control_word |= 0x10;
5913     if (x87_cw & _EM_INEXACT) fenv.control_word |= 0x20;
5914     switch (x87_cw & _MCW_RC)
5915     {
5916         case _RC_UP|_RC_DOWN:   fenv.control_word |= 0xc00; break;
5917         case _RC_UP:            fenv.control_word |= 0x800; break;
5918         case _RC_DOWN:          fenv.control_word |= 0x400; break;
5919     }
5920 #if _MSVCR_VER>=140
5921     if (x87_cw & _EM_DENORMAL) fenv.control_word |= 0x2;
5922     switch (x87_cw & _MCW_PC)
5923     {
5924         case _PC_64: fenv.control_word |= 0x300; break;
5925         case _PC_53: fenv.control_word |= 0x200; break;
5926         case _PC_24: fenv.control_word |= 0x0; break;
5927     }
5928     if (x87_cw & _IC_AFFINE) fenv.control_word |= 0x1000;
5929 #endif
5930
5931     fenv.status_word &= ~0x3f;
5932     if (x87_stat & _SW_INVALID) fenv.status_word |= 0x1;
5933     if (x87_stat & _SW_DENORMAL) fenv.status_word |= 0x2;
5934     if (x87_stat & _SW_ZERODIVIDE) fenv.status_word |= 0x4;
5935     if (x87_stat & _SW_OVERFLOW) fenv.status_word |= 0x8;
5936     if (x87_stat & _SW_UNDERFLOW) fenv.status_word |= 0x10;
5937     if (x87_stat & _SW_INEXACT) fenv.status_word |= 0x20;
5938
5939     __asm__ __volatile__( "fldenv %0" : : "m" (fenv) : "st", "st(1)",
5940             "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)" );
5941 #endif
5942
5943     if (sse2_supported)
5944     {
5945         DWORD fpword;
5946         __asm__ __volatile__( "stmxcsr %0" : "=m" (fpword) );
5947         fpword &= ~0x7ebf;
5948 #if _MSVCR_VER>=140
5949         fpword &= ~0x8140;
5950 #endif
5951         if (sse_cw & _EM_INVALID) fpword |= 0x80;
5952         if (sse_cw & _EM_ZERODIVIDE) fpword |= 0x200;
5953         if (sse_cw & _EM_OVERFLOW) fpword |= 0x400;
5954         if (sse_cw & _EM_UNDERFLOW) fpword |= 0x800;
5955         if (sse_cw & _EM_INEXACT) fpword |= 0x1000;
5956         switch (sse_cw & _MCW_RC)
5957         {
5958             case _RC_CHOP: fpword |= 0x6000; break;
5959             case _RC_UP:   fpword |= 0x4000; break;
5960             case _RC_DOWN: fpword |= 0x2000; break;
5961         }
5962         if (sse_stat & _SW_INVALID) fpword |= 0x1;
5963         if (sse_stat & _SW_DENORMAL) fpword |= 0x2;
5964         if (sse_stat & _SW_ZERODIVIDE) fpword |= 0x4;
5965         if (sse_stat & _SW_OVERFLOW) fpword |= 0x8;
5966         if (sse_stat & _SW_UNDERFLOW) fpword |= 0x10;
5967         if (sse_stat & _SW_INEXACT) fpword |= 0x20;
5968 #if _MSVCR_VER>=140
5969         if (sse_cw & _EM_DENORMAL) fpword |= 0x100;
5970         switch (sse_cw & _MCW_DN)
5971         {
5972             case _DN_FLUSH_OPERANDS_SAVE_RESULTS: fpword |= 0x0040; break;
5973             case _DN_SAVE_OPERANDS_FLUSH_RESULTS: fpword |= 0x8000; break;
5974             case _DN_FLUSH:                       fpword |= 0x8040; break;
5975         }
5976 #endif
5977         __asm__ __volatile__( "ldmxcsr %0" : : "m" (fpword) );
5978     }
5979
5980     return 0;
5981 #else
5982     FIXME( "not implemented\n" );
5983 #endif
5984     return 1;
5985 }
5986 #endif
5987
5988 /*********************************************************************
5989  *              _isnan (MSVCRT.@)
5990  */
5991 int CDECL _isnan(double num)
5992 {
5993     union { double f; UINT64 i; } u = { num };
5994     return (u.i & ~0ull >> 1) > 0x7ffull << 52;
5995 }
5996
5997 static double pzero(double x)
5998 {
5999     static const double pR8[6] = { /* for x in [inf, 8]=1/[0,0.125] */
6000         0.00000000000000000000e+00,
6001         -7.03124999999900357484e-02,
6002         -8.08167041275349795626e+00,
6003         -2.57063105679704847262e+02,
6004         -2.48521641009428822144e+03,
6005         -5.25304380490729545272e+03,
6006     }, pS8[5] = {
6007         1.16534364619668181717e+02,
6008         3.83374475364121826715e+03,
6009         4.05978572648472545552e+04,
6010         1.16752972564375915681e+05,
6011         4.76277284146730962675e+04,
6012     }, pR5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */
6013         -1.14125464691894502584e-11,
6014         -7.03124940873599280078e-02,
6015         -4.15961064470587782438e+00,
6016         -6.76747652265167261021e+01,
6017         -3.31231299649172967747e+02,
6018         -3.46433388365604912451e+02,
6019     }, pS5[5] = {
6020         6.07539382692300335975e+01,
6021         1.05125230595704579173e+03,
6022         5.97897094333855784498e+03,
6023         9.62544514357774460223e+03,
6024         2.40605815922939109441e+03,
6025     }, pR3[6] = {/* for x in [4.547,2.8571]=1/[0.2199,0.35001] */
6026         -2.54704601771951915620e-09,
6027         -7.03119616381481654654e-02,
6028         -2.40903221549529611423e+00,
6029         -2.19659774734883086467e+01,
6030         -5.80791704701737572236e+01,
6031         -3.14479470594888503854e+01,
6032     }, pS3[5] = {
6033         3.58560338055209726349e+01,
6034         3.61513983050303863820e+02,
6035         1.19360783792111533330e+03,
6036         1.12799679856907414432e+03,
6037         1.73580930813335754692e+02,
6038     }, pR2[6] = {/* for x in [2.8570,2]=1/[0.3499,0.5] */
6039         -8.87534333032526411254e-08,
6040         -7.03030995483624743247e-02,
6041         -1.45073846780952986357e+00,
6042         -7.63569613823527770791e+00,
6043         -1.11931668860356747786e+01,
6044         -3.23364579351335335033e+00,
6045     }, pS2[5] = {
6046         2.22202997532088808441e+01,
6047         1.36206794218215208048e+02,
6048         2.70470278658083486789e+02,
6049         1.53875394208320329881e+02,
6050         1.46576176948256193810e+01,
6051     };
6052
6053     const double *p, *q;
6054     double z, r, s;
6055     UINT32 ix;
6056
6057     ix = *(ULONGLONG*)&x >> 32;
6058     ix &= 0x7fffffff;
6059     if (ix >= 0x40200000) {
6060         p = pR8;
6061         q = pS8;
6062     } else if (ix >= 0x40122E8B) {
6063         p = pR5;
6064         q = pS5;
6065     } else if (ix >= 0x4006DB6D) {
6066         p = pR3;
6067         q = pS3;
6068     } else /*ix >= 0x40000000*/ {
6069         p = pR2;
6070         q = pS2;
6071     }
6072
6073     z = 1.0 / (x * x);
6074     r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5]))));
6075     s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * q[4]))));
6076     return 1.0 + r / s;
6077 }
6078
6079 static double qzero(double x)
6080 {
6081     static const double qR8[6] = { /* for x in [inf, 8]=1/[0,0.125] */
6082         0.00000000000000000000e+00,
6083         7.32421874999935051953e-02,
6084         1.17682064682252693899e+01,
6085         5.57673380256401856059e+02,
6086         8.85919720756468632317e+03,
6087         3.70146267776887834771e+04,
6088     }, qS8[6] = {
6089         1.63776026895689824414e+02,
6090         8.09834494656449805916e+03,
6091         1.42538291419120476348e+05,
6092         8.03309257119514397345e+05,
6093         8.40501579819060512818e+05,
6094         -3.43899293537866615225e+05,
6095     }, qR5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */
6096         1.84085963594515531381e-11,
6097         7.32421766612684765896e-02,
6098         5.83563508962056953777e+00,
6099         1.35111577286449829671e+02,
6100         1.02724376596164097464e+03,
6101         1.98997785864605384631e+03,
6102     }, qS5[6] = {
6103         8.27766102236537761883e+01,
6104         2.07781416421392987104e+03,
6105         1.88472887785718085070e+04,
6106         5.67511122894947329769e+04,
6107         3.59767538425114471465e+04,
6108         -5.35434275601944773371e+03,
6109     }, qR3[6] = {/* for x in [4.547,2.8571]=1/[0.2199,0.35001] */
6110         4.37741014089738620906e-09,
6111         7.32411180042911447163e-02,
6112         3.34423137516170720929e+00,
6113         4.26218440745412650017e+01,
6114         1.70808091340565596283e+02,
6115         1.66733948696651168575e+02,
6116     }, qS3[6] = {
6117         4.87588729724587182091e+01,
6118         7.09689221056606015736e+02,
6119         3.70414822620111362994e+03,
6120         6.46042516752568917582e+03,
6121         2.51633368920368957333e+03,
6122         -1.49247451836156386662e+02,
6123     }, qR2[6] = {/* for x in [2.8570,2]=1/[0.3499,0.5] */
6124         1.50444444886983272379e-07,
6125         7.32234265963079278272e-02,
6126         1.99819174093815998816e+00,
6127         1.44956029347885735348e+01,
6128         3.16662317504781540833e+01,
6129         1.62527075710929267416e+01,
6130     }, qS2[6] = {
6131         3.03655848355219184498e+01,
6132         2.69348118608049844624e+02,
6133         8.44783757595320139444e+02,
6134         8.82935845112488550512e+02,
6135         2.12666388511798828631e+02,
6136         -5.31095493882666946917e+00,
6137     };
6138
6139     const double *p, *q;
6140     double s, r, z;
6141     unsigned int ix;
6142
6143     ix = *(ULONGLONG*)&x >> 32;
6144     ix &= 0x7fffffff;
6145     if (ix >= 0x40200000) {
6146         p = qR8;
6147         q = qS8;
6148     } else if (ix >= 0x40122E8B) {
6149         p = qR5;
6150         q = qS5;
6151     } else if (ix >= 0x4006DB6D) {
6152         p = qR3;
6153         q = qS3;
6154     } else /*ix >= 0x40000000*/ {
6155         p = qR2;
6156         q = qS2;
6157     }
6158
6159     z = 1.0 / (x * x);
6160     r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5]))));
6161     s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * (q[4] + z * q[5])))));
6162     return (-0.125 + r / s) / x;
6163 }
6164
6165 /* j0 and y0 approximation for |x|>=2 */
6166 static double j0_y0_approx(unsigned int ix, double x, BOOL y0)
6167 {
6168     static const double invsqrtpi = 5.64189583547756279280e-01;
6169
6170     double s, c, ss, cc, z;
6171
6172     s = sin(x);
6173     c = cos(x);
6174     if (y0) c = -c;
6175     cc = s + c;
6176     /* avoid overflow in 2*x, big ulp error when x>=0x1p1023 */
6177     if (ix < 0x7fe00000) {
6178         ss = s - c;
6179         z = -cos(2 * x);
6180         if (s * c < 0) cc = z / ss;
6181         else ss = z / cc;
6182         if (ix < 0x48000000) {
6183             if (y0) ss = -ss;
6184             cc = pzero(x) * cc - qzero(x) * ss;
6185         }
6186     }
6187     return invsqrtpi * cc / sqrt(x);
6188 }
6189
6190 /*********************************************************************
6191  *              _j0 (MSVCRT.@)
6192  *
6193  * Copied from musl: src/math/j0.c
6194  */
6195 double CDECL _j0(double x)
6196 {
6197     static const double R02 =  1.56249999999999947958e-02,
6198             R03 = -1.89979294238854721751e-04,
6199             R04 =  1.82954049532700665670e-06,
6200             R05 = -4.61832688532103189199e-09,
6201             S01 =  1.56191029464890010492e-02,
6202             S02 =  1.16926784663337450260e-04,
6203             S03 =  5.13546550207318111446e-07,
6204             S04 =  1.16614003333790000205e-09;
6205
6206     double z, r, s;
6207     unsigned int ix;
6208
6209     ix = *(ULONGLONG*)&x >> 32;
6210     ix &= 0x7fffffff;
6211
6212     /* j0(+-inf)=0, j0(nan)=nan */
6213     if (ix >= 0x7ff00000)
6214         return math_error(_DOMAIN, "_j0", x, 0, 1 / (x * x));
6215     x = fabs(x);
6216
6217     if (ix >= 0x40000000) {  /* |x| >= 2 */
6218         /* large ulp error near zeros: 2.4, 5.52, 8.6537,.. */
6219         return j0_y0_approx(ix, x, FALSE);
6220     }
6221
6222     if (ix >= 0x3f200000) {  /* |x| >= 2**-13 */
6223         /* up to 4ulp error close to 2 */
6224         z = x * x;
6225         r = z * (R02 + z * (R03 + z * (R04 + z * R05)));
6226         s = 1 + z * (S01 + z * (S02 + z * (S03 + z * S04)));
6227         return (1 + x / 2) * (1 - x / 2) + z * (r / s);
6228     }
6229
6230     /* 1 - x*x/4 */
6231     /* prevent underflow */
6232     /* inexact should be raised when x!=0, this is not done correctly */
6233     if (ix >= 0x38000000)  /* |x| >= 2**-127 */
6234         x = 0.25 * x * x;
6235     return 1 - x;
6236 }
6237
6238 static double pone(double x)
6239 {
6240     static const double pr8[6] = { /* for x in [inf, 8]=1/[0,0.125] */
6241         0.00000000000000000000e+00,
6242         1.17187499999988647970e-01,
6243         1.32394806593073575129e+01,
6244         4.12051854307378562225e+02,
6245         3.87474538913960532227e+03,
6246         7.91447954031891731574e+03,
6247     }, ps8[5] = {
6248         1.14207370375678408436e+02,
6249         3.65093083420853463394e+03,
6250         3.69562060269033463555e+04,
6251         9.76027935934950801311e+04,
6252         3.08042720627888811578e+04,
6253     }, pr5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */
6254         1.31990519556243522749e-11,
6255         1.17187493190614097638e-01,
6256         6.80275127868432871736e+00,
6257         1.08308182990189109773e+02,
6258         5.17636139533199752805e+02,
6259         5.28715201363337541807e+02,
6260     }, ps5[5] = {
6261         5.92805987221131331921e+01,
6262         9.91401418733614377743e+02,
6263         5.35326695291487976647e+03,
6264         7.84469031749551231769e+03,
6265         1.50404688810361062679e+03,
6266     }, pr3[6] = {
6267         3.02503916137373618024e-09,
6268         1.17186865567253592491e-01,
6269         3.93297750033315640650e+00,
6270         3.51194035591636932736e+01,
6271         9.10550110750781271918e+01,
6272         4.85590685197364919645e+01,
6273     }, ps3[5] = {
6274         3.47913095001251519989e+01,
6275         3.36762458747825746741e+02,
6276         1.04687139975775130551e+03,
6277         8.90811346398256432622e+02,
6278         1.03787932439639277504e+02,
6279     }, pr2[6] = { /* for x in [2.8570,2]=1/[0.3499,0.5] */
6280         1.07710830106873743082e-07,
6281         1.17176219462683348094e-01,
6282         2.36851496667608785174e+00,
6283         1.22426109148261232917e+01,
6284         1.76939711271687727390e+01,
6285         5.07352312588818499250e+00,
6286     }, ps2[5] = {
6287         2.14364859363821409488e+01,
6288         1.25290227168402751090e+02,
6289         2.32276469057162813669e+02,
6290         1.17679373287147100768e+02,
6291         8.36463893371618283368e+00,
6292     };
6293
6294     const double *p, *q;
6295     double z, r, s;
6296     unsigned int ix;
6297
6298     ix = *(ULONGLONG*)&x >> 32;
6299     ix &= 0x7fffffff;
6300     if (ix >= 0x40200000) {
6301         p = pr8;
6302         q = ps8;
6303     } else if (ix >= 0x40122E8B) {
6304         p = pr5;
6305         q = ps5;
6306     } else if (ix >= 0x4006DB6D) {
6307         p = pr3;
6308         q = ps3;
6309     } else /*ix >= 0x40000000*/ {
6310         p = pr2;
6311         q = ps2;
6312     }
6313     z = 1.0 / (x * x);
6314     r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5]))));
6315     s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * q[4]))));
6316     return 1.0 + r / s;
6317 }
6318
6319 static double qone(double x)
6320 {
6321     static const double qr8[6] = { /* for x in [inf, 8]=1/[0,0.125] */
6322         0.00000000000000000000e+00,
6323         -1.02539062499992714161e-01,
6324         -1.62717534544589987888e+01,
6325         -7.59601722513950107896e+02,
6326         -1.18498066702429587167e+04,
6327         -4.84385124285750353010e+04,
6328     }, qs8[6] = {
6329         1.61395369700722909556e+02,
6330         7.82538599923348465381e+03,
6331         1.33875336287249578163e+05,
6332         7.19657723683240939863e+05,
6333         6.66601232617776375264e+05,
6334         -2.94490264303834643215e+05,
6335     }, qr5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */
6336         -2.08979931141764104297e-11,
6337         -1.02539050241375426231e-01,
6338         -8.05644828123936029840e+00,
6339         -1.83669607474888380239e+02,
6340         -1.37319376065508163265e+03,
6341         -2.61244440453215656817e+03,
6342     }, qs5[6] = {
6343         8.12765501384335777857e+01,
6344         1.99179873460485964642e+03,
6345         1.74684851924908907677e+04,
6346         4.98514270910352279316e+04,
6347         2.79480751638918118260e+04,
6348         -4.71918354795128470869e+03,
6349     }, qr3[6] = {
6350         -5.07831226461766561369e-09,
6351         -1.02537829820837089745e-01,
6352         -4.61011581139473403113e+00,
6353         -5.78472216562783643212e+01,
6354         -2.28244540737631695038e+02,
6355         -2.19210128478909325622e+02,
6356     }, qs3[6] = {
6357         4.76651550323729509273e+01,
6358         6.73865112676699709482e+02,
6359         3.38015286679526343505e+03,
6360         5.54772909720722782367e+03,
6361         1.90311919338810798763e+03,
6362         -1.35201191444307340817e+02,
6363     }, qr2[6] = { /* for x in [2.8570,2]=1/[0.3499,0.5] */
6364         -1.78381727510958865572e-07,
6365         -1.02517042607985553460e-01,
6366         -2.75220568278187460720e+00,
6367         -1.96636162643703720221e+01,
6368         -4.23253133372830490089e+01,
6369         -2.13719211703704061733e+01,
6370     }, qs2[6] = {
6371         2.95333629060523854548e+01,
6372         2.52981549982190529136e+02,
6373         7.57502834868645436472e+02,
6374         7.39393205320467245656e+02,
6375         1.55949003336666123687e+02,
6376         -4.95949898822628210127e+00,
6377     };
6378
6379     const double *p, *q;
6380     double s, r, z;
6381     unsigned int ix;
6382
6383     ix = *(ULONGLONG*)&x >> 32;
6384     ix &= 0x7fffffff;
6385     if (ix >= 0x40200000) {
6386         p = qr8;
6387         q = qs8;
6388     } else if (ix >= 0x40122E8B) {
6389         p = qr5;
6390         q = qs5;
6391     } else if (ix >= 0x4006DB6D) {
6392         p = qr3;
6393         q = qs3;
6394     } else /*ix >= 0x40000000*/ {
6395         p = qr2;
6396         q = qs2;
6397     }
6398     z = 1.0 / (x * x);
6399     r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5]))));
6400     s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * (q[4] + z * q[5])))));
6401     return (0.375 + r / s) / x;
6402 }
6403
6404 static double j1_y1_approx(unsigned int ix, double x, BOOL y1, int sign)
6405 {
6406     static const double invsqrtpi = 5.64189583547756279280e-01;
6407
6408     double z, s, c, ss, cc;
6409
6410     s = sin(x);
6411     if (y1) s = -s;
6412     c = cos(x);
6413     cc = s - c;
6414     if (ix < 0x7fe00000) {
6415         ss = -s - c;
6416         z = cos(2 * x);
6417         if (s * c > 0) cc = z / ss;
6418         else ss = z / cc;
6419         if (ix < 0x48000000) {
6420             if (y1)
6421                 ss = -ss;
6422             cc = pone(x) * cc - qone(x) * ss;
6423         }
6424     }
6425     if (sign)
6426         cc = -cc;
6427     return invsqrtpi * cc / sqrt(x);
6428 }
6429
6430 /*********************************************************************
6431  *              _j1 (MSVCRT.@)
6432  *
6433  * Copied from musl: src/math/j1.c
6434  */
6435 double CDECL _j1(double x)
6436 {
6437     static const double r00 = -6.25000000000000000000e-02,
6438         r01 =  1.40705666955189706048e-03,
6439         r02 = -1.59955631084035597520e-05,
6440         r03 =  4.96727999609584448412e-08,
6441         s01 =  1.91537599538363460805e-02,
6442         s02 =  1.85946785588630915560e-04,
6443         s03 =  1.17718464042623683263e-06,
6444         s04 =  5.04636257076217042715e-09,
6445         s05 =  1.23542274426137913908e-11;
6446
6447     double z, r, s;
6448     unsigned int ix;
6449     int sign;
6450
6451     ix = *(ULONGLONG*)&x >> 32;
6452     sign = ix >> 31;
6453     ix &= 0x7fffffff;
6454     if (ix >= 0x7ff00000)
6455         return math_error(isnan(x) ? 0 : _DOMAIN, "_j1", x, 0, 1 / (x * x));
6456     if (ix >= 0x40000000)  /* |x| >= 2 */
6457         return j1_y1_approx(ix, fabs(x), FALSE, sign);
6458     if (ix >= 0x38000000) {  /* |x| >= 2**-127 */
6459         z = x * x;
6460         r = z * (r00 + z * (r01 + z * (r02 + z * r03)));
6461         s = 1 + z * (s01 + z * (s02 + z * (s03 + z * (s04 + z * s05))));
6462         z = r / s;
6463     } else {
6464         /* avoid underflow, raise inexact if x!=0 */
6465         z = x;
6466     }
6467     return (0.5 + z) * x;
6468 }
6469
6470 /*********************************************************************
6471  *              _jn (MSVCRT.@)
6472  *
6473  * Copied from musl: src/math/jn.c
6474  */
6475 double CDECL _jn(int n, double x)
6476 {
6477     static const double invsqrtpi = 5.64189583547756279280e-01;
6478
6479     unsigned int ix, lx;
6480     int nm1, i, sign;
6481     double a, b, temp;
6482
6483     ix = *(ULONGLONG*)&x >> 32;
6484     lx = *(ULONGLONG*)&x;
6485     sign = ix >> 31;
6486     ix &= 0x7fffffff;
6487
6488     if ((ix | (lx | -lx) >> 31) > 0x7ff00000) /* nan */
6489         return x;
6490
6491     if (n == 0)
6492         return _j0(x);
6493     if (n < 0) {
6494         nm1 = -(n + 1);
6495         x = -x;
6496         sign ^= 1;
6497     } else {
6498         nm1 = n-1;
6499     }
6500     if (nm1 == 0)
6501         return j1(x);
6502
6503     sign &= n;  /* even n: 0, odd n: signbit(x) */
6504     x = fabs(x);
6505     if ((ix | lx) == 0 || ix == 0x7ff00000)  /* if x is 0 or inf */
6506         b = 0.0;
6507     else if (nm1 < x) {
6508         if (ix >= 0x52d00000) { /* x > 2**302 */
6509             switch(nm1 & 3) {
6510             case 0:
6511                 temp = -cos(x) + sin(x);
6512                 break;
6513             case 1:
6514                 temp = -cos(x) - sin(x);
6515                 break;
6516             case 2:
6517                 temp =  cos(x) - sin(x);
6518                 break;
6519             default:
6520                 temp =  cos(x) + sin(x);
6521                 break;
6522             }
6523             b = invsqrtpi * temp / sqrt(x);
6524         } else {
6525             a = _j0(x);
6526             b = _j1(x);
6527             for (i = 0; i < nm1; ) {
6528                 i++;
6529                 temp = b;
6530                 b = b * (2.0 * i / x) - a; /* avoid underflow */
6531                 a = temp;
6532             }
6533         }
6534     } else {
6535         if (ix < 0x3e100000) { /* x < 2**-29 */
6536             if (nm1 > 32)  /* underflow */
6537                 b = 0.0;
6538             else {
6539                 temp = x * 0.5;
6540                 b = temp;
6541                 a = 1.0;
6542                 for (i = 2; i <= nm1 + 1; i++) {
6543                     a *= (double)i; /* a = n! */
6544                     b *= temp;      /* b = (x/2)^n */
6545                 }
6546                 b = b / a;
6547             }
6548         } else {
6549             double t, q0, q1, w, h, z, tmp, nf;
6550             int k;
6551
6552             nf = nm1 + 1.0;
6553             w = 2 * nf / x;
6554             h = 2 / x;
6555             z = w + h;
6556             q0 = w;
6557             q1 = w * z - 1.0;
6558             k = 1;
6559             while (q1 < 1.0e9) {
6560                 k += 1;
6561                 z += h;
6562                 tmp = z * q1 - q0;
6563                 q0 = q1;
6564                 q1 = tmp;
6565             }
6566             for (t = 0.0, i = k; i >= 0; i--)
6567                 t = 1 / (2 * (i + nf) / x - t);
6568             a = t;
6569             b = 1.0;
6570             tmp = nf * log(fabs(w));
6571             if (tmp < 7.09782712893383973096e+02) {
6572                 for (i = nm1; i > 0; i--) {
6573                     temp = b;
6574                     b = b * (2.0 * i) / x - a;
6575                     a = temp;
6576                 }
6577             } else {
6578                 for (i = nm1; i > 0; i--) {
6579                     temp = b;
6580                     b = b * (2.0 * i) / x - a;
6581                     a = temp;
6582                     /* scale b to avoid spurious overflow */
6583                     if (b > 0x1p500) {
6584                         a /= b;
6585                         t /= b;
6586                         b  = 1.0;
6587                     }
6588                 }
6589             }
6590             z = j0(x);
6591             w = j1(x);
6592             if (fabs(z) >= fabs(w))
6593                 b = t * z / b;
6594             else
6595                 b = t * w / a;
6596         }
6597     }
6598     return sign ? -b : b;
6599 }
6600
6601 /*********************************************************************
6602  *              _y0 (MSVCRT.@)
6603  */
6604 double CDECL _y0(double x)
6605 {
6606     static const double tpi = 6.36619772367581382433e-01,
6607         u00  = -7.38042951086872317523e-02,
6608         u01  =  1.76666452509181115538e-01,
6609         u02  = -1.38185671945596898896e-02,
6610         u03  =  3.47453432093683650238e-04,
6611         u04  = -3.81407053724364161125e-06,
6612         u05  =  1.95590137035022920206e-08,
6613         u06  = -3.98205194132103398453e-11,
6614         v01  =  1.27304834834123699328e-02,
6615         v02  =  7.60068627350353253702e-05,
6616         v03  =  2.59150851840457805467e-07,
6617         v04  =  4.41110311332675467403e-10;
6618
6619     double z, u, v;
6620     unsigned int ix, lx;
6621
6622     ix = *(ULONGLONG*)&x >> 32;
6623     lx = *(ULONGLONG*)&x;
6624
6625     /* y0(nan)=nan, y0(<0)=nan, y0(0)=-inf, y0(inf)=0 */
6626     if ((ix << 1 | lx) == 0)
6627         return math_error(_OVERFLOW, "_y0", x, 0, -INFINITY);
6628     if (isnan(x))
6629         return x;
6630     if (ix >> 31)
6631         return math_error(_DOMAIN, "_y0", x, 0, 0 / (x - x));
6632     if (ix >= 0x7ff00000)
6633         return 1 / x;
6634
6635     if (ix >= 0x40000000) {  /* x >= 2 */
6636         /* large ulp errors near zeros: 3.958, 7.086,.. */
6637         return j0_y0_approx(ix, x, TRUE);
6638     }
6639
6640     if (ix >= 0x3e400000) {  /* x >= 2**-27 */
6641         /* large ulp error near the first zero, x ~= 0.89 */
6642         z = x * x;
6643         u = u00 + z * (u01 + z * (u02 + z * (u03 + z * (u04 + z * (u05 + z * u06)))));
6644         v = 1.0 + z * (v01 + z * (v02 + z * (v03 + z * v04)));
6645         return u / v + tpi * (j0(x) * log(x));
6646     }
6647     return u00 + tpi * log(x);
6648 }
6649
6650 /*********************************************************************
6651  *              _y1 (MSVCRT.@)
6652  */
6653 double CDECL _y1(double x)
6654 {
6655     static const double tpi = 6.36619772367581382433e-01,
6656         u00 =  -1.96057090646238940668e-01,
6657         u01 = 5.04438716639811282616e-02,
6658         u02 = -1.91256895875763547298e-03,
6659         u03 = 2.35252600561610495928e-05,
6660         u04 = -9.19099158039878874504e-08,
6661         v00 = 1.99167318236649903973e-02,
6662         v01 = 2.02552581025135171496e-04,
6663         v02 = 1.35608801097516229404e-06,
6664         v03 = 6.22741452364621501295e-09,
6665         v04 = 1.66559246207992079114e-11;
6666
6667     double z, u, v;
6668     unsigned int ix, lx;
6669
6670     ix = *(ULONGLONG*)&x >> 32;
6671     lx = *(ULONGLONG*)&x;
6672
6673     /* y1(nan)=nan, y1(<0)=nan, y1(0)=-inf, y1(inf)=0 */
6674     if ((ix << 1 | lx) == 0)
6675         return math_error(_OVERFLOW, "_y1", x, 0, -INFINITY);
6676     if (isnan(x))
6677         return x;
6678     if (ix >> 31)
6679         return math_error(_DOMAIN, "_y1", x, 0, 0 / (x - x));
6680     if (ix >= 0x7ff00000)
6681         return 1 / x;
6682
6683     if (ix >= 0x40000000)  /* x >= 2 */
6684         return j1_y1_approx(ix, x, TRUE, 0);
6685     if (ix < 0x3c900000)  /* x < 2**-54 */
6686         return -tpi / x;
6687     z = x * x;
6688     u = u00 + z * (u01 + z * (u02 + z * (u03 + z * u04)));
6689     v = 1 + z * (v00 + z * (v01 + z * (v02 + z * (v03 + z * v04))));
6690     return x * (u / v) + tpi * (j1(x) * log(x) - 1 / x);
6691 }
6692
6693 /*********************************************************************
6694  *              _yn (MSVCRT.@)
6695  *
6696  * Copied from musl: src/math/jn.c
6697  */
6698 double CDECL _yn(int n, double x)
6699 {
6700     static const double invsqrtpi = 5.64189583547756279280e-01;
6701
6702     unsigned int ix, lx, ib;
6703     int nm1, sign, i;
6704     double a, b, temp;
6705
6706     ix = *(ULONGLONG*)&x >> 32;
6707     lx = *(ULONGLONG*)&x;
6708     sign = ix >> 31;
6709     ix &= 0x7fffffff;
6710
6711     if ((ix | (lx | -lx) >> 31) > 0x7ff00000) /* nan */
6712         return x;
6713     if (sign && (ix | lx) != 0) /* x < 0 */
6714         return math_error(_DOMAIN, "_y1", x, 0, 0 / (x - x));
6715     if (ix == 0x7ff00000)
6716         return 0.0;
6717
6718     if (n == 0)
6719         return y0(x);
6720     if (n < 0) {
6721         nm1 = -(n + 1);
6722         sign = n & 1;
6723     } else {
6724         nm1 = n - 1;
6725         sign = 0;
6726     }
6727     if (nm1 == 0)
6728         return sign ? -y1(x) : y1(x);
6729
6730     if (ix >= 0x52d00000) { /* x > 2**302 */
6731         switch(nm1 & 3) {
6732         case 0:
6733             temp = -sin(x) - cos(x);
6734             break;
6735         case 1:
6736             temp = -sin(x) + cos(x);
6737             break;
6738         case 2:
6739             temp = sin(x) + cos(x);
6740             break;
6741         default:
6742             temp = sin(x) - cos(x);
6743             break;
6744         }
6745         b = invsqrtpi * temp / sqrt(x);
6746     } else {
6747         a = y0(x);
6748         b = y1(x);
6749         /* quit if b is -inf */
6750         ib = *(ULONGLONG*)&b >> 32;
6751         for (i = 0; i < nm1 && ib != 0xfff00000;) {
6752             i++;
6753             temp = b;
6754             b = (2.0 * i / x) * b - a;
6755             ib = *(ULONGLONG*)&b >> 32;
6756             a = temp;
6757         }
6758     }
6759     return sign ? -b : b;
6760 }
6761
6762 #if _MSVCR_VER>=120
6763
6764 /*********************************************************************
6765  *              _nearbyint (MSVCR120.@)
6766  *
6767  * Based on musl: src/math/nearbyteint.c
6768  */
6769 double CDECL nearbyint(double x)
6770 {
6771     fenv_t env;
6772
6773     fegetenv(&env);
6774     _control87(_MCW_EM, _MCW_EM);
6775     x = rint(x);
6776     feclearexcept(FE_INEXACT);
6777     feupdateenv(&env);
6778     return x;
6779 }
6780
6781 /*********************************************************************
6782  *              _nearbyintf (MSVCR120.@)
6783  *
6784  * Based on musl: src/math/nearbyteintf.c
6785  */
6786 float CDECL nearbyintf(float x)
6787 {
6788     fenv_t env;
6789
6790     fegetenv(&env);
6791     _control87(_MCW_EM, _MCW_EM);
6792     x = rintf(x);
6793     feclearexcept(FE_INEXACT);
6794     feupdateenv(&env);
6795     return x;
6796 }
6797
6798 /*********************************************************************
6799  *              nexttoward (MSVCR120.@)
6800  */
6801 double CDECL MSVCRT_nexttoward(double num, double next)
6802 {
6803     return _nextafter(num, next);
6804 }
6805
6806 /*********************************************************************
6807  *              nexttowardf (MSVCR120.@)
6808  *
6809  * Copied from musl: src/math/nexttowardf.c
6810  */
6811 float CDECL MSVCRT_nexttowardf(float x, double y)
6812 {
6813     unsigned int ix = *(unsigned int*)&x;
6814     unsigned int e;
6815     float ret;
6816
6817     if (isnan(x) || isnan(y))
6818         return x + y;
6819     if (x == y)
6820         return y;
6821     if (x == 0) {
6822         ix = 1;
6823         if (signbit(y))
6824             ix |= 0x80000000;
6825     } else if (x < y) {
6826         if (signbit(x))
6827             ix--;
6828         else
6829             ix++;
6830     } else {
6831         if (signbit(x))
6832             ix++;
6833         else
6834             ix--;
6835     }
6836     e = ix & 0x7f800000;
6837     /* raise overflow if ix is infinite and x is finite */
6838     if (e == 0x7f800000) {
6839         fp_barrierf(x + x);
6840         *_errno() = ERANGE;
6841     }
6842     ret = *(float*)&ix;
6843     /* raise underflow if ret is subnormal or zero */
6844     if (e == 0) {
6845         fp_barrierf(x * x + ret * ret);
6846         *_errno() = ERANGE;
6847     }
6848     return ret;
6849 }
6850
6851 #endif /* _MSVCR_VER>=120 */
6852
6853 /*********************************************************************
6854  *              _nextafter (MSVCRT.@)
6855  *
6856  * Copied from musl: src/math/nextafter.c
6857  */
6858 double CDECL _nextafter(double x, double y)
6859 {
6860     ULONGLONG llx = *(ULONGLONG*)&x;
6861     ULONGLONG lly = *(ULONGLONG*)&y;
6862     ULONGLONG ax, ay;
6863     int e;
6864
6865     if (isnan(x) || isnan(y))
6866         return x + y;
6867     if (llx == lly) {
6868         if (_fpclass(y) & (_FPCLASS_ND | _FPCLASS_PD | _FPCLASS_NZ | _FPCLASS_PZ ))
6869             *_errno() = ERANGE;
6870         return y;
6871     }
6872     ax = llx & -1ULL / 2;
6873     ay = lly & -1ULL / 2;
6874     if (ax == 0) {
6875         if (ay == 0)
6876             return y;
6877         llx = (lly & 1ULL << 63) | 1;
6878     } else if (ax > ay || ((llx ^ lly) & 1ULL << 63))
6879         llx--;
6880     else
6881         llx++;
6882     e = llx >> 52 & 0x7ff;
6883     /* raise overflow if llx is infinite and x is finite */
6884     if (e == 0x7ff) {
6885         fp_barrier(x + x);
6886         *_errno() = ERANGE;
6887     }
6888     /* raise underflow if llx is subnormal or zero */
6889     y = *(double*)&llx;
6890     if (e == 0) {
6891         fp_barrier(x * x + y * y);
6892         *_errno() = ERANGE;
6893     }
6894     return y;
6895 }
6896
6897 /*********************************************************************
6898  *              _ecvt (MSVCRT.@)
6899  */
6900 char * CDECL _ecvt( double number, int ndigits, int *decpt, int *sign )
6901 {
6902     int prec, len;
6903     thread_data_t *data = msvcrt_get_thread_data();
6904     /* FIXME: check better for overflow (native supports over 300 chars) */
6905     ndigits = min( ndigits, 80 - 8); /* 8 : space for sign, dec point, "e",
6906                                       * 4 for exponent and one for
6907                                       * terminating '\0' */
6908     if (!data->efcvt_buffer)
6909         data->efcvt_buffer = malloc( 80 ); /* ought to be enough */
6910
6911     /* handle cases with zero ndigits or less */
6912     prec = ndigits;
6913     if( prec < 1) prec = 2;
6914     len = _snprintf(data->efcvt_buffer, 80, "%.*le", prec - 1, number);
6915
6916     if (data->efcvt_buffer[0] == '-') {
6917         memmove( data->efcvt_buffer, data->efcvt_buffer + 1, len-- );
6918         *sign = 1;
6919     } else *sign = 0;
6920
6921     /* take the decimal "point away */
6922     if( prec != 1)
6923         memmove( data->efcvt_buffer + 1, data->efcvt_buffer + 2, len - 1 );
6924     /* take the exponential "e" out */
6925     data->efcvt_buffer[ prec] = '\0';
6926     /* read the exponent */
6927     sscanf( data->efcvt_buffer + prec + 1, "%d", decpt);
6928     (*decpt)++;
6929     /* adjust for some border cases */
6930     if( data->efcvt_buffer[0] == '0')/* value is zero */
6931         *decpt = 0;
6932     /* handle cases with zero ndigits or less */
6933     if( ndigits < 1){
6934         if( data->efcvt_buffer[ 0] >= '5')
6935             (*decpt)++;
6936         data->efcvt_buffer[ 0] = '\0';
6937     }
6938     TRACE("out=\"%s\"\n",data->efcvt_buffer);
6939     return data->efcvt_buffer;
6940 }
6941
6942 /*********************************************************************
6943  *              _ecvt_s (MSVCRT.@)
6944  */
6945 int CDECL _ecvt_s( char *buffer, size_t length, double number, int ndigits, int *decpt, int *sign )
6946 {
6947     int prec, len;
6948     char *result;
6949
6950     if (!MSVCRT_CHECK_PMT(buffer != NULL)) return EINVAL;
6951     if (!MSVCRT_CHECK_PMT(decpt != NULL)) return EINVAL;
6952     if (!MSVCRT_CHECK_PMT(sign != NULL)) return EINVAL;
6953     if (!MSVCRT_CHECK_PMT_ERR( length > 2, ERANGE )) return ERANGE;
6954     if (!MSVCRT_CHECK_PMT_ERR(ndigits < (int)length - 1, ERANGE )) return ERANGE;
6955
6956     /* handle cases with zero ndigits or less */
6957     prec = ndigits;
6958     if( prec < 1) prec = 2;
6959     result = malloc(prec + 8);
6960
6961     len = _snprintf(result, prec + 8, "%.*le", prec - 1, number);
6962     if (result[0] == '-') {
6963         memmove( result, result + 1, len-- );
6964         *sign = 1;
6965     } else *sign = 0;
6966
6967     /* take the decimal "point away */
6968     if( prec != 1)
6969         memmove( result + 1, result + 2, len - 1 );
6970     /* take the exponential "e" out */
6971     result[ prec] = '\0';
6972     /* read the exponent */
6973     sscanf( result + prec + 1, "%d", decpt);
6974     (*decpt)++;
6975     /* adjust for some border cases */
6976     if( result[0] == '0')/* value is zero */
6977         *decpt = 0;
6978     /* handle cases with zero ndigits or less */
6979     if( ndigits < 1){
6980         if( result[ 0] >= '5')
6981             (*decpt)++;
6982         result[ 0] = '\0';
6983     }
6984     memcpy( buffer, result, max(ndigits + 1, 1) );
6985     free( result );
6986     return 0;
6987 }
6988
6989 /***********************************************************************
6990  *              _fcvt  (MSVCRT.@)
6991  */
6992 char * CDECL _fcvt( double number, int ndigits, int *decpt, int *sign )
6993 {
6994     thread_data_t *data = msvcrt_get_thread_data();
6995     int stop, dec1, dec2;
6996     char *ptr1, *ptr2, *first;
6997     char buf[80]; /* ought to be enough */
6998     char decimal_separator = get_locinfo()->lconv->decimal_point[0];
6999
7000     if (!data->efcvt_buffer)
7001         data->efcvt_buffer = malloc( 80 ); /* ought to be enough */
7002
7003     stop = _snprintf(buf, 80, "%.*f", ndigits < 0 ? 0 : ndigits, number);
7004     ptr1 = buf;
7005     ptr2 = data->efcvt_buffer;
7006     first = NULL;
7007     dec1 = 0;
7008     dec2 = 0;
7009
7010     if (*ptr1 == '-') {
7011         *sign = 1;
7012         ptr1++;
7013     } else *sign = 0;
7014
7015     /* For numbers below the requested resolution, work out where
7016        the decimal point will be rather than finding it in the string */
7017     if (number < 1.0 && number > 0.0) {
7018         dec2 = log10(number + 1e-10);
7019         if (-dec2 <= ndigits) dec2 = 0;
7020     }
7021
7022     /* If requested digits is zero or less, we will need to truncate
7023      * the returned string */
7024     if (ndigits < 1) {
7025         stop += ndigits;
7026     }
7027
7028     while (*ptr1 == '0') ptr1++; /* Skip leading zeroes */
7029     while (*ptr1 != '\0' && *ptr1 != decimal_separator) {
7030         if (!first) first = ptr2;
7031         if ((ptr1 - buf) < stop) {
7032             *ptr2++ = *ptr1++;
7033         } else {
7034             ptr1++;
7035         }
7036         dec1++;
7037     }
7038
7039     if (ndigits > 0) {
7040         ptr1++;
7041         if (!first) {
7042             while (*ptr1 == '0') { /* Process leading zeroes */
7043                 *ptr2++ = *ptr1++;
7044                 dec1--;
7045             }
7046         }
7047         while (*ptr1 != '\0') {
7048             if (!first) first = ptr2;
7049             *ptr2++ = *ptr1++;
7050         }
7051     }
7052
7053     *ptr2 = '\0';
7054
7055     /* We never found a non-zero digit, then our number is either
7056      * smaller than the requested precision, or 0.0 */
7057     if (!first) {
7058         if (number > 0.0) {
7059             first = ptr2;
7060         } else {
7061             first = data->efcvt_buffer;
7062             dec1 = 0;
7063         }
7064     }
7065
7066     *decpt = dec2 ? dec2 : dec1;
7067     return first;
7068 }
7069
7070 /***********************************************************************
7071  *              _fcvt_s  (MSVCRT.@)
7072  */
7073 int CDECL _fcvt_s(char* outbuffer, size_t size, double number, int ndigits, int *decpt, int *sign)
7074 {
7075     int stop, dec1, dec2;
7076     char *ptr1, *ptr2, *first;
7077     char buf[80]; /* ought to be enough */
7078     char decimal_separator = get_locinfo()->lconv->decimal_point[0];
7079
7080     if (!outbuffer || !decpt || !sign || size == 0)
7081     {
7082         *_errno() = EINVAL;
7083         return EINVAL;
7084     }
7085
7086     stop = _snprintf(buf, 80, "%.*f", ndigits < 0 ? 0 : ndigits, number);
7087     ptr1 = buf;
7088     ptr2 = outbuffer;
7089     first = NULL;
7090     dec1 = 0;
7091     dec2 = 0;
7092
7093     if (*ptr1 == '-') {
7094         *sign = 1;
7095         ptr1++;
7096     } else *sign = 0;
7097
7098     /* For numbers below the requested resolution, work out where
7099        the decimal point will be rather than finding it in the string */
7100     if (number < 1.0 && number > 0.0) {
7101         dec2 = log10(number + 1e-10);
7102         if (-dec2 <= ndigits) dec2 = 0;
7103     }
7104
7105     /* If requested digits is zero or less, we will need to truncate
7106      * the returned string */
7107     if (ndigits < 1) {
7108         stop += ndigits;
7109     }
7110
7111     while (*ptr1 == '0') ptr1++; /* Skip leading zeroes */
7112     while (*ptr1 != '\0' && *ptr1 != decimal_separator) {
7113         if (!first) first = ptr2;
7114         if ((ptr1 - buf) < stop) {
7115             if (size > 1) {
7116                 *ptr2++ = *ptr1++;
7117                 size--;
7118             }
7119         } else {
7120             ptr1++;
7121         }
7122         dec1++;
7123     }
7124
7125     if (ndigits > 0) {
7126         ptr1++;
7127         if (!first) {
7128             while (*ptr1 == '0') { /* Process leading zeroes */
7129                 if (number == 0.0 && size > 1) {
7130                     *ptr2++ = '0';
7131                     size--;
7132                 }
7133                 ptr1++;
7134                 dec1--;
7135             }
7136         }
7137         while (*ptr1 != '\0') {
7138             if (!first) first = ptr2;
7139             if (size > 1) {
7140                 *ptr2++ = *ptr1++;
7141                 size--;
7142             }
7143         }
7144     }
7145
7146     *ptr2 = '\0';
7147
7148     /* We never found a non-zero digit, then our number is either
7149      * smaller than the requested precision, or 0.0 */
7150     if (!first && (number <= 0.0))
7151         dec1 = 0;
7152
7153     *decpt = dec2 ? dec2 : dec1;
7154     return 0;
7155 }
7156
7157 /***********************************************************************
7158  *              _gcvt  (MSVCRT.@)
7159  */
7160 char * CDECL _gcvt( double number, int ndigit, char *buff )
7161 {
7162     if(!buff) {
7163         *_errno() = EINVAL;
7164         return NULL;
7165     }
7166
7167     if(ndigit < 0) {
7168         *_errno() = ERANGE;
7169         return NULL;
7170     }
7171
7172     sprintf(buff, "%.*g", ndigit, number);
7173     return buff;
7174 }
7175
7176 /***********************************************************************
7177  *              _gcvt_s  (MSVCRT.@)
7178  */
7179 int CDECL _gcvt_s(char *buff, size_t size, double number, int digits)
7180 {
7181     int len;
7182
7183     if(!buff) {
7184         *_errno() = EINVAL;
7185         return EINVAL;
7186     }
7187
7188     if( digits<0 || digits>=size) {
7189         if(size)
7190             buff[0] = '\0';
7191
7192         *_errno() = ERANGE;
7193         return ERANGE;
7194     }
7195
7196     len = _scprintf("%.*g", digits, number);
7197     if(len > size) {
7198         buff[0] = '\0';
7199         *_errno() = ERANGE;
7200         return ERANGE;
7201     }
7202
7203     sprintf(buff, "%.*g", digits, number);
7204     return 0;
7205 }
7206
7207 #include <stdlib.h> /* div_t, ldiv_t */
7208
7209 /*********************************************************************
7210  *              div (MSVCRT.@)
7211  * VERSION
7212  *      [i386] Windows binary compatible - returns the struct in eax/edx.
7213  */
7214 #ifdef __i386__
7215 unsigned __int64 CDECL div(int num, int denom)
7216 {
7217     union {
7218         div_t div;
7219         unsigned __int64 uint64;
7220     } ret;
7221
7222     ret.div.quot = num / denom;
7223     ret.div.rem = num % denom;
7224     return ret.uint64;
7225 }
7226 #else
7227 /*********************************************************************
7228  *              div (MSVCRT.@)
7229  * VERSION
7230  *      [!i386] Non-x86 can't run win32 apps so we don't need binary compatibility
7231  */
7232 div_t CDECL div(int num, int denom)
7233 {
7234     div_t ret;
7235
7236     ret.quot = num / denom;
7237     ret.rem = num % denom;
7238     return ret;
7239 }
7240 #endif /* ifdef __i386__ */
7241
7242
7243 /*********************************************************************
7244  *              ldiv (MSVCRT.@)
7245  * VERSION
7246  *      [i386] Windows binary compatible - returns the struct in eax/edx.
7247  */
7248 #ifdef __i386__
7249 unsigned __int64 CDECL ldiv(__msvcrt_long num, __msvcrt_long denom)
7250 {
7251     union {
7252         ldiv_t ldiv;
7253         unsigned __int64 uint64;
7254     } ret;
7255
7256     ret.ldiv.quot = num / denom;
7257     ret.ldiv.rem = num % denom;
7258     return ret.uint64;
7259 }
7260 #else
7261 /*********************************************************************
7262  *              ldiv (MSVCRT.@)
7263  * VERSION
7264  *      [!i386] Non-x86 can't run win32 apps so we don't need binary compatibility
7265  */
7266 ldiv_t CDECL ldiv(__msvcrt_long num, __msvcrt_long denom)
7267 {
7268     ldiv_t ret;
7269
7270     ret.quot = num / denom;
7271     ret.rem = num % denom;
7272     return ret;
7273 }
7274 #endif /* ifdef __i386__ */
7275
7276 #if _MSVCR_VER>=100
7277 /*********************************************************************
7278  *              lldiv (MSVCR100.@)
7279  */
7280 lldiv_t CDECL lldiv(__int64 num, __int64 denom)
7281 {
7282   lldiv_t ret;
7283
7284   ret.quot = num / denom;
7285   ret.rem = num % denom;
7286
7287   return ret;
7288 }
7289 #endif
7290
7291 #ifdef __i386__
7292
7293 /*********************************************************************
7294  *              _adjust_fdiv (MSVCRT.@)
7295  * Used by the MSVC compiler to work around the Pentium FDIV bug.
7296  */
7297 int MSVCRT__adjust_fdiv = 0;
7298
7299 /***********************************************************************
7300  *              _adj_fdiv_m16i (MSVCRT.@)
7301  *
7302  * NOTE
7303  *    I _think_ this function is intended to work around the Pentium
7304  *    fdiv bug.
7305  */
7306 void __stdcall _adj_fdiv_m16i( short arg )
7307 {
7308   TRACE("(): stub\n");
7309 }
7310
7311 /***********************************************************************
7312  *              _adj_fdiv_m32 (MSVCRT.@)
7313  *
7314  * NOTE
7315  *    I _think_ this function is intended to work around the Pentium
7316  *    fdiv bug.
7317  */
7318 void __stdcall _adj_fdiv_m32( unsigned int arg )
7319 {
7320   TRACE("(): stub\n");
7321 }
7322
7323 /***********************************************************************
7324  *              _adj_fdiv_m32i (MSVCRT.@)
7325  *
7326  * NOTE
7327  *    I _think_ this function is intended to work around the Pentium
7328  *    fdiv bug.
7329  */
7330 void __stdcall _adj_fdiv_m32i( int arg )
7331 {
7332   TRACE("(): stub\n");
7333 }
7334
7335 /***********************************************************************
7336  *              _adj_fdiv_m64 (MSVCRT.@)
7337  *
7338  * NOTE
7339  *    I _think_ this function is intended to work around the Pentium
7340  *    fdiv bug.
7341  */
7342 void __stdcall _adj_fdiv_m64( unsigned __int64 arg )
7343 {
7344   TRACE("(): stub\n");
7345 }
7346
7347 /***********************************************************************
7348  *              _adj_fdiv_r (MSVCRT.@)
7349  * FIXME
7350  *    This function is likely to have the wrong number of arguments.
7351  *
7352  * NOTE
7353  *    I _think_ this function is intended to work around the Pentium
7354  *    fdiv bug.
7355  */
7356 void _adj_fdiv_r(void)
7357 {
7358   TRACE("(): stub\n");
7359 }
7360
7361 /***********************************************************************
7362  *              _adj_fdivr_m16i (MSVCRT.@)
7363  *
7364  * NOTE
7365  *    I _think_ this function is intended to work around the Pentium
7366  *    fdiv bug.
7367  */
7368 void __stdcall _adj_fdivr_m16i( short arg )
7369 {
7370   TRACE("(): stub\n");
7371 }
7372
7373 /***********************************************************************
7374  *              _adj_fdivr_m32 (MSVCRT.@)
7375  *
7376  * NOTE
7377  *    I _think_ this function is intended to work around the Pentium
7378  *    fdiv bug.
7379  */
7380 void __stdcall _adj_fdivr_m32( unsigned int arg )
7381 {
7382   TRACE("(): stub\n");
7383 }
7384
7385 /***********************************************************************
7386  *              _adj_fdivr_m32i (MSVCRT.@)
7387  *
7388  * NOTE
7389  *    I _think_ this function is intended to work around the Pentium
7390  *    fdiv bug.
7391  */
7392 void __stdcall _adj_fdivr_m32i( int arg )
7393 {
7394   TRACE("(): stub\n");
7395 }
7396
7397 /***********************************************************************
7398  *              _adj_fdivr_m64 (MSVCRT.@)
7399  *
7400  * NOTE
7401  *    I _think_ this function is intended to work around the Pentium
7402  *    fdiv bug.
7403  */
7404 void __stdcall _adj_fdivr_m64( unsigned __int64 arg )
7405 {
7406   TRACE("(): stub\n");
7407 }
7408
7409 /***********************************************************************
7410  *              _adj_fpatan (MSVCRT.@)
7411  * FIXME
7412  *    This function is likely to have the wrong number of arguments.
7413  *
7414  * NOTE
7415  *    I _think_ this function is intended to work around the Pentium
7416  *    fdiv bug.
7417  */
7418 void _adj_fpatan(void)
7419 {
7420   TRACE("(): stub\n");
7421 }
7422
7423 /***********************************************************************
7424  *              _adj_fprem (MSVCRT.@)
7425  * FIXME
7426  *    This function is likely to have the wrong number of arguments.
7427  *
7428  * NOTE
7429  *    I _think_ this function is intended to work around the Pentium
7430  *    fdiv bug.
7431  */
7432 void _adj_fprem(void)
7433 {
7434   TRACE("(): stub\n");
7435 }
7436
7437 /***********************************************************************
7438  *              _adj_fprem1 (MSVCRT.@)
7439  * FIXME
7440  *    This function is likely to have the wrong number of arguments.
7441  *
7442  * NOTE
7443  *    I _think_ this function is intended to work around the Pentium
7444  *    fdiv bug.
7445  */
7446 void _adj_fprem1(void)
7447 {
7448   TRACE("(): stub\n");
7449 }
7450
7451 /***********************************************************************
7452  *              _adj_fptan (MSVCRT.@)
7453  * FIXME
7454  *    This function is likely to have the wrong number of arguments.
7455  *
7456  * NOTE
7457  *    I _think_ this function is intended to work around the Pentium
7458  *    fdiv bug.
7459  */
7460 void _adj_fptan(void)
7461 {
7462   TRACE("(): stub\n");
7463 }
7464
7465 /***********************************************************************
7466  *              _safe_fdiv (MSVCRT.@)
7467  * FIXME
7468  *    This function is likely to have the wrong number of arguments.
7469  *
7470  * NOTE
7471  *    I _think_ this function is intended to work around the Pentium
7472  *    fdiv bug.
7473  */
7474 void _safe_fdiv(void)
7475 {
7476   TRACE("(): stub\n");
7477 }
7478
7479 /***********************************************************************
7480  *              _safe_fdivr (MSVCRT.@)
7481  * FIXME
7482  *    This function is likely to have the wrong number of arguments.
7483  *
7484  * NOTE
7485  *    I _think_ this function is intended to work around the Pentium
7486  *    fdiv bug.
7487  */
7488 void _safe_fdivr(void)
7489 {
7490   TRACE("(): stub\n");
7491 }
7492
7493 /***********************************************************************
7494  *              _safe_fprem (MSVCRT.@)
7495  * FIXME
7496  *    This function is likely to have the wrong number of arguments.
7497  *
7498  * NOTE
7499  *    I _think_ this function is intended to work around the Pentium
7500  *    fdiv bug.
7501  */
7502 void _safe_fprem(void)
7503 {
7504   TRACE("(): stub\n");
7505 }
7506
7507 /***********************************************************************
7508  *              _safe_fprem1 (MSVCRT.@)
7509  *
7510  * FIXME
7511  *    This function is likely to have the wrong number of arguments.
7512  *
7513  * NOTE
7514  *    I _think_ this function is intended to work around the Pentium
7515  *    fdiv bug.
7516  */
7517 void _safe_fprem1(void)
7518 {
7519   TRACE("(): stub\n");
7520 }
7521
7522 /***********************************************************************
7523  *              __libm_sse2_acos   (MSVCRT.@)
7524  */
7525 void __cdecl __libm_sse2_acos(void)
7526 {
7527     double d;
7528     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7529     d = acos( d );
7530     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7531 }
7532
7533 /***********************************************************************
7534  *              __libm_sse2_acosf   (MSVCRT.@)
7535  */
7536 void __cdecl __libm_sse2_acosf(void)
7537 {
7538     float f;
7539     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7540     f = acosf( f );
7541     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7542 }
7543
7544 /***********************************************************************
7545  *              __libm_sse2_asin   (MSVCRT.@)
7546  */
7547 void __cdecl __libm_sse2_asin(void)
7548 {
7549     double d;
7550     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7551     d = asin( d );
7552     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7553 }
7554
7555 /***********************************************************************
7556  *              __libm_sse2_asinf   (MSVCRT.@)
7557  */
7558 void __cdecl __libm_sse2_asinf(void)
7559 {
7560     float f;
7561     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7562     f = asinf( f );
7563     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7564 }
7565
7566 /***********************************************************************
7567  *              __libm_sse2_atan   (MSVCRT.@)
7568  */
7569 void __cdecl __libm_sse2_atan(void)
7570 {
7571     double d;
7572     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7573     d = atan( d );
7574     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7575 }
7576
7577 /***********************************************************************
7578  *              __libm_sse2_atan2   (MSVCRT.@)
7579  */
7580 void __cdecl __libm_sse2_atan2(void)
7581 {
7582     double d1, d2;
7583     __asm__ __volatile__( "movq %%xmm0,%0; movq %%xmm1,%1 " : "=m" (d1), "=m" (d2) );
7584     d1 = atan2( d1, d2 );
7585     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d1) );
7586 }
7587
7588 /***********************************************************************
7589  *              __libm_sse2_atanf   (MSVCRT.@)
7590  */
7591 void __cdecl __libm_sse2_atanf(void)
7592 {
7593     float f;
7594     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7595     f = atanf( f );
7596     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7597 }
7598
7599 /***********************************************************************
7600  *              __libm_sse2_cos   (MSVCRT.@)
7601  */
7602 void __cdecl __libm_sse2_cos(void)
7603 {
7604     double d;
7605     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7606     d = cos( d );
7607     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7608 }
7609
7610 /***********************************************************************
7611  *              __libm_sse2_cosf   (MSVCRT.@)
7612  */
7613 void __cdecl __libm_sse2_cosf(void)
7614 {
7615     float f;
7616     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7617     f = cosf( f );
7618     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7619 }
7620
7621 /***********************************************************************
7622  *              __libm_sse2_exp   (MSVCRT.@)
7623  */
7624 void __cdecl __libm_sse2_exp(void)
7625 {
7626     double d;
7627     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7628     d = exp( d );
7629     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7630 }
7631
7632 /***********************************************************************
7633  *              __libm_sse2_expf   (MSVCRT.@)
7634  */
7635 void __cdecl __libm_sse2_expf(void)
7636 {
7637     float f;
7638     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7639     f = expf( f );
7640     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7641 }
7642
7643 /***********************************************************************
7644  *              __libm_sse2_log   (MSVCRT.@)
7645  */
7646 void __cdecl __libm_sse2_log(void)
7647 {
7648     double d;
7649     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7650     d = log( d );
7651     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7652 }
7653
7654 /***********************************************************************
7655  *              __libm_sse2_log10   (MSVCRT.@)
7656  */
7657 void __cdecl __libm_sse2_log10(void)
7658 {
7659     double d;
7660     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7661     d = log10( d );
7662     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7663 }
7664
7665 /***********************************************************************
7666  *              __libm_sse2_log10f   (MSVCRT.@)
7667  */
7668 void __cdecl __libm_sse2_log10f(void)
7669 {
7670     float f;
7671     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7672     f = log10f( f );
7673     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7674 }
7675
7676 /***********************************************************************
7677  *              __libm_sse2_logf   (MSVCRT.@)
7678  */
7679 void __cdecl __libm_sse2_logf(void)
7680 {
7681     float f;
7682     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7683     f = logf( f );
7684     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7685 }
7686
7687 /***********************************************************************
7688  *              __libm_sse2_pow   (MSVCRT.@)
7689  */
7690 void __cdecl __libm_sse2_pow(void)
7691 {
7692     double d1, d2;
7693     __asm__ __volatile__( "movq %%xmm0,%0; movq %%xmm1,%1 " : "=m" (d1), "=m" (d2) );
7694     d1 = pow( d1, d2 );
7695     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d1) );
7696 }
7697
7698 /***********************************************************************
7699  *              __libm_sse2_powf   (MSVCRT.@)
7700  */
7701 void __cdecl __libm_sse2_powf(void)
7702 {
7703     float f1, f2;
7704     __asm__ __volatile__( "movd %%xmm0,%0; movd %%xmm1,%1" : "=g" (f1), "=g" (f2) );
7705     f1 = powf( f1, f2 );
7706     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f1) );
7707 }
7708
7709 /***********************************************************************
7710  *              __libm_sse2_sin   (MSVCRT.@)
7711  */
7712 void __cdecl __libm_sse2_sin(void)
7713 {
7714     double d;
7715     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7716     d = sin( d );
7717     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7718 }
7719
7720 /***********************************************************************
7721  *              __libm_sse2_sinf   (MSVCRT.@)
7722  */
7723 void __cdecl __libm_sse2_sinf(void)
7724 {
7725     float f;
7726     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7727     f = sinf( f );
7728     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7729 }
7730
7731 /***********************************************************************
7732  *              __libm_sse2_tan   (MSVCRT.@)
7733  */
7734 void __cdecl __libm_sse2_tan(void)
7735 {
7736     double d;
7737     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7738     d = tan( d );
7739     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7740 }
7741
7742 /***********************************************************************
7743  *              __libm_sse2_tanf   (MSVCRT.@)
7744  */
7745 void __cdecl __libm_sse2_tanf(void)
7746 {
7747     float f;
7748     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7749     f = tanf( f );
7750     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7751 }
7752
7753 /***********************************************************************
7754  *              __libm_sse2_sqrt_precise   (MSVCR110.@)
7755  */
7756 void __cdecl __libm_sse2_sqrt_precise(void)
7757 {
7758     unsigned int cw;
7759     double d;
7760
7761     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7762     __control87_2(0, 0, NULL, &cw);
7763     if (cw & _MCW_RC)
7764     {
7765         d = sqrt(d);
7766         __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7767         return;
7768     }
7769
7770     if (!sqrt_validate(&d, FALSE))
7771     {
7772         __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7773         return;
7774     }
7775     __asm__ __volatile__( "call " __ASM_NAME( "sse2_sqrt" ) );
7776 }
7777 #endif  /* __i386__ */
7778
7779 /*********************************************************************
7780  *      _fdclass (MSVCR120.@)
7781  *
7782  * Copied from musl: src/math/__fpclassifyf.c
7783  */
7784 short CDECL _fdclass(float x)
7785 {
7786     union { float f; UINT32 i; } u = { x };
7787     int e = u.i >> 23 & 0xff;
7788
7789     if (!e) return u.i << 1 ? FP_SUBNORMAL : FP_ZERO;
7790     if (e == 0xff) return u.i << 9 ? FP_NAN : FP_INFINITE;
7791     return FP_NORMAL;
7792 }
7793
7794 /*********************************************************************
7795  *      _dclass (MSVCR120.@)
7796  *
7797  * Copied from musl: src/math/__fpclassify.c
7798  */
7799 short CDECL _dclass(double x)
7800 {
7801     union { double f; UINT64 i; } u = { x };
7802     int e = u.i >> 52 & 0x7ff;
7803
7804     if (!e) return u.i << 1 ? FP_SUBNORMAL : FP_ZERO;
7805     if (e == 0x7ff) return (u.i << 12) ? FP_NAN : FP_INFINITE;
7806     return FP_NORMAL;
7807 }
7808
7809 #if _MSVCR_VER>=120
7810
7811 /*********************************************************************
7812  *      cbrt (MSVCR120.@)
7813  *
7814  * Copied from musl: src/math/cbrt.c
7815  */
7816 double CDECL cbrt(double x)
7817 {
7818     static const UINT32 B1 = 715094163, B2 = 696219795;
7819     static const double P0 =  1.87595182427177009643,
7820                  P1 = -1.88497979543377169875,
7821                  P2 =  1.621429720105354466140,
7822                  P3 = -0.758397934778766047437,
7823                  P4 =  0.145996192886612446982;
7824
7825     union {double f; UINT64 i;} u = {x};
7826     double r,s,t,w;
7827     UINT32 hx = u.i >> 32 & 0x7fffffff;
7828
7829     if (hx >= 0x7ff00000)  /* cbrt(NaN,INF) is itself */
7830         return x + x;
7831
7832     if (hx < 0x00100000) { /* zero or subnormal? */
7833         u.f = x * 0x1p54;
7834         hx = u.i>>32 & 0x7fffffff;
7835         if (hx == 0)
7836             return x;
7837         hx = hx / 3 + B2;
7838     } else
7839         hx = hx / 3 + B1;
7840     u.i &= 1ULL << 63;
7841     u.i |= (UINT64)hx << 32;
7842     t = u.f;
7843
7844     r = (t * t) * (t / x);
7845     t = t * ((P0 + r * (P1 + r * P2)) + ((r * r) * r) * (P3 + r * P4));
7846
7847     u.f = t;
7848     u.i = (u.i + 0x80000000) & 0xffffffffc0000000ULL;
7849     t = u.f;
7850
7851     s = t * t;
7852     r = x / s;
7853     w = t + t;
7854     r = (r - t) / (w + r);
7855     t = t + t * r;
7856     return t;
7857 }
7858
7859 /*********************************************************************
7860  *      cbrtf (MSVCR120.@)
7861  *
7862  * Copied from musl: src/math/cbrtf.c
7863  */
7864 float CDECL cbrtf(float x)
7865 {
7866     static const unsigned B1 = 709958130, B2 = 642849266;
7867
7868     double r,T;
7869     union {float f; UINT32 i;} u = {x};
7870     UINT32 hx = u.i & 0x7fffffff;
7871
7872     if (hx >= 0x7f800000)
7873         return x + x;
7874
7875     if (hx < 0x00800000) {  /* zero or subnormal? */
7876         if (hx == 0)
7877             return x;
7878         u.f = x * 0x1p24f;
7879         hx = u.i & 0x7fffffff;
7880         hx = hx / 3 + B2;
7881     } else
7882         hx = hx / 3 + B1;
7883     u.i &= 0x80000000;
7884     u.i |= hx;
7885
7886     T = u.f;
7887     r = T * T * T;
7888     T = T * (x + x + r) / (x + r + r);
7889
7890     r = T * T * T;
7891     T = T * (x + x + r) / (x + r + r);
7892     return T;
7893 }
7894
7895 /*********************************************************************
7896  *      exp2 (MSVCR120.@)
7897  *
7898  * Copied from musl: src/math/exp2.c
7899  */
7900 double CDECL exp2(double x)
7901 {
7902     static const double C[] = {
7903         0x1.62e42fefa39efp-1,
7904         0x1.ebfbdff82c424p-3,
7905         0x1.c6b08d70cf4b5p-5,
7906         0x1.3b2abd24650ccp-7,
7907         0x1.5d7e09b4e3a84p-10
7908     };
7909
7910     UINT32 abstop;
7911     UINT64 ki, idx, top, sbits;
7912     double kd, r, r2, scale, tail, tmp;
7913
7914     abstop = (*(UINT64*)&x >> 52) & 0x7ff;
7915     if (abstop - 0x3c9 >= 0x408 - 0x3c9) {
7916         if (abstop - 0x3c9 >= 0x80000000) {
7917             /* Avoid spurious underflow for tiny x. */
7918             /* Note: 0 is common input. */
7919             return 1.0 + x;
7920         }
7921         if (abstop >= 409) {
7922             if (*(UINT64*)&x == 0xfff0000000000000ull)
7923                 return 0.0;
7924             if (abstop >= 0x7ff)
7925                 return 1.0 + x;
7926             if (!(*(UINT64*)&x >> 63)) {
7927                 *_errno() = ERANGE;
7928                 return fp_barrier(DBL_MAX) * DBL_MAX;
7929             }
7930             else if (x <= -2147483648.0) {
7931                 fp_barrier(x + 0x1p120f);
7932                 return 0;
7933             }
7934             else if (*(UINT64*)&x >= 0xc090cc0000000000ull) {
7935                 *_errno() = ERANGE;
7936                 fp_barrier(x + 0x1p120f);
7937                 return 0;
7938             }
7939         }
7940         if (2 * *(UINT64*)&x > 2 * 0x408d000000000000ull)
7941             /* Large x is special cased below. */
7942             abstop = 0;
7943     }
7944
7945     /* exp2(x) = 2^(k/N) * 2^r, with 2^r in [2^(-1/2N),2^(1/2N)]. */
7946     /* x = k/N + r, with int k and r in [-1/2N, 1/2N]. */
7947     kd = fp_barrier(x + 0x1.8p52 / (1 << 7));
7948     ki = *(UINT64*)&kd; /* k. */
7949     kd -= 0x1.8p52 / (1 << 7); /* k/N for int k. */
7950     r = x - kd;
7951     /* 2^(k/N) ~= scale * (1 + tail). */
7952     idx = 2 * (ki % (1 << 7));
7953     top = ki << (52 - 7);
7954     tail = *(double*)&exp_T[idx];
7955     /* This is only a valid scale when -1023*N < k < 1024*N. */
7956     sbits = exp_T[idx + 1] + top;
7957     /* exp2(x) = 2^(k/N) * 2^r ~= scale + scale * (tail + 2^r - 1). */
7958     /* Evaluation is optimized assuming superscalar pipelined execution. */
7959     r2 = r * r;
7960     /* Without fma the worst case error is 0.5/N ulp larger. */
7961     /* Worst case error is less than 0.5+0.86/N+(abs poly error * 2^53) ulp. */
7962     tmp = tail + r * C[0] + r2 * (C[1] + r * C[2]) + r2 * r2 * (C[3] + r * C[4]);
7963     if (abstop == 0)
7964     {
7965         /* Handle cases that may overflow or underflow when computing the result that
7966            is scale*(1+TMP) without intermediate rounding. The bit representation of
7967            scale is in SBITS, however it has a computed exponent that may have
7968            overflown into the sign bit so that needs to be adjusted before using it as
7969            a double. (int32_t)KI is the k used in the argument reduction and exponent
7970            adjustment of scale, positive k here means the result may overflow and
7971            negative k means the result may underflow. */
7972         double scale, y;
7973
7974         if ((ki & 0x80000000) == 0) {
7975             /* k > 0, the exponent of scale might have overflowed by 1. */
7976             sbits -= 1ull << 52;
7977             scale = *(double*)&sbits;
7978             y = 2 * (scale + scale * tmp);
7979             return y;
7980         }
7981         /* k < 0, need special care in the subnormal range. */
7982         sbits += 1022ull << 52;
7983         scale = *(double*)&sbits;
7984         y = scale + scale * tmp;
7985         if (y < 1.0) {
7986             /* Round y to the right precision before scaling it into the subnormal
7987                range to avoid double rounding that can cause 0.5+E/2 ulp error where
7988                E is the worst-case ulp error outside the subnormal range. So this
7989                is only useful if the goal is better than 1 ulp worst-case error. */
7990             double hi, lo;
7991             lo = scale - y + scale * tmp;
7992             hi = 1.0 + y;
7993             lo = 1.0 - hi + y + lo;
7994             y = hi + lo - 1.0;
7995             /* Avoid -0.0 with downward rounding. */
7996             if (y == 0.0)
7997                 y = 0.0;
7998             /* The underflow exception needs to be signaled explicitly. */
7999             fp_barrier(fp_barrier(0x1p-1022) * 0x1p-1022);
8000         }
8001         y = 0x1p-1022 * y;
8002         return y;
8003     }
8004     scale = *(double*)&sbits;
8005     /* Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-928, so there
8006        is no spurious underflow here even without fma. */
8007     return scale + scale * tmp;
8008 }
8009
8010 /*********************************************************************
8011  *      exp2f (MSVCR120.@)
8012  *
8013  * Copied from musl: src/math/exp2f.c
8014  */
8015 float CDECL exp2f(float x)
8016 {
8017     static const double C[] = {
8018         0x1.c6af84b912394p-5, 0x1.ebfce50fac4f3p-3, 0x1.62e42ff0c52d6p-1
8019     };
8020     static const double shift = 0x1.8p+52 / (1 << 5);
8021
8022     double kd, xd, z, r, r2, y, s;
8023     UINT32 abstop;
8024     UINT64 ki, t;
8025
8026     xd = x;
8027     abstop = (*(UINT32*)&x >> 20) & 0x7ff;
8028     if (abstop >= 0x430) {
8029         /* |x| >= 128 or x is nan.  */
8030         if (*(UINT32*)&x == 0xff800000)
8031             return 0.0f;
8032         if (abstop >= 0x7f8)
8033             return x + x;
8034         if (x > 0.0f) {
8035             *_errno() = ERANGE;
8036             return fp_barrierf(x * FLT_MAX);
8037         }
8038         if (x <= -150.0f) {
8039             fp_barrierf(x - 0x1p120);
8040             return 0;
8041         }
8042     }
8043
8044     /* x = k/N + r with r in [-1/(2N), 1/(2N)] and int k, N = 1 << 5. */
8045     kd = xd + shift;
8046     ki = *(UINT64*)&kd;
8047     kd -= shift; /* k/(1<<5) for int k.  */
8048     r = xd - kd;
8049
8050     /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
8051     t = exp2f_T[ki % (1 << 5)];
8052     t += ki << (52 - 5);
8053     s = *(double*)&t;
8054     z = C[0] * r + C[1];
8055     r2 = r * r;
8056     y = C[2] * r + 1;
8057     y = z * r2 + y;
8058     y = y * s;
8059     return y;
8060 }
8061
8062 /*********************************************************************
8063  *      expm1 (MSVCR120.@)
8064  */
8065 double CDECL expm1(double x)
8066 {
8067     return __expm1(x);
8068 }
8069
8070 /*********************************************************************
8071  *      expm1f (MSVCR120.@)
8072  */
8073 float CDECL expm1f(float x)
8074 {
8075     return __expm1f(x);
8076 }
8077
8078 /*********************************************************************
8079  *      log1p (MSVCR120.@)
8080  *
8081  * Copied from musl: src/math/log1p.c
8082  */
8083 double CDECL log1p(double x)
8084 {
8085     static const double ln2_hi = 6.93147180369123816490e-01,
8086         ln2_lo = 1.90821492927058770002e-10,
8087         Lg1 = 6.666666666666735130e-01,
8088         Lg2 = 3.999999999940941908e-01,
8089         Lg3 = 2.857142874366239149e-01,
8090         Lg4 = 2.222219843214978396e-01,
8091         Lg5 = 1.818357216161805012e-01,
8092         Lg6 = 1.531383769920937332e-01,
8093         Lg7 = 1.479819860511658591e-01;
8094
8095     union {double f; UINT64 i;} u = {x};
8096     double hfsq, f, c, s, z, R, w, t1, t2, dk;
8097     UINT32 hx, hu;
8098     int k;
8099
8100     hx = u.i >> 32;
8101     k = 1;
8102     if (hx < 0x3fda827a || hx >> 31) { /* 1+x < sqrt(2)+ */
8103         if (hx >= 0xbff00000) { /* x <= -1.0 */
8104             if (x == -1) {
8105                 *_errno() = ERANGE;
8106                 return x / 0.0; /* og1p(-1) = -inf */
8107             }
8108             *_errno() = EDOM;
8109             return (x-x) / 0.0; /* log1p(x<-1) = NaN */
8110         }
8111         if (hx << 1 < 0x3ca00000 << 1) { /* |x| < 2**-53 */
8112             fp_barrier(x + 0x1p120f);
8113             /* underflow if subnormal */
8114             if ((hx & 0x7ff00000) == 0)
8115                 fp_barrierf(x);
8116             return x;
8117         }
8118         if (hx <= 0xbfd2bec4) { /* sqrt(2)/2- <= 1+x < sqrt(2)+ */
8119             k = 0;
8120             c = 0;
8121             f = x;
8122         }
8123     } else if (hx >= 0x7ff00000)
8124         return x;
8125     if (k) {
8126         u.f = 1 + x;
8127         hu = u.i >> 32;
8128         hu += 0x3ff00000 - 0x3fe6a09e;
8129         k = (int)(hu >> 20) - 0x3ff;
8130         /* correction term ~ log(1+x)-log(u), avoid underflow in c/u */
8131         if (k < 54) {
8132             c = k >= 2 ? 1 - (u.f - x) : x - (u.f - 1);
8133             c /= u.f;
8134         } else
8135             c = 0;
8136         /* reduce u into [sqrt(2)/2, sqrt(2)] */
8137         hu = (hu & 0x000fffff) + 0x3fe6a09e;
8138         u.i = (UINT64)hu << 32 | (u.i & 0xffffffff);
8139         f = u.f - 1;
8140     }
8141     hfsq = 0.5 * f * f;
8142     s = f / (2.0 + f);
8143     z = s * s;
8144     w = z * z;
8145     t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
8146     t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
8147     R = t2 + t1;
8148     dk = k;
8149     return s * (hfsq + R) + (dk * ln2_lo + c) - hfsq + f + dk * ln2_hi;
8150 }
8151
8152 /*********************************************************************
8153  *      log1pf (MSVCR120.@)
8154  *
8155  * Copied from musl: src/math/log1pf.c
8156  */
8157 float CDECL log1pf(float x)
8158 {
8159     static const float ln2_hi = 6.9313812256e-01,
8160         ln2_lo = 9.0580006145e-06,
8161         Lg1 = 0xaaaaaa.0p-24,
8162         Lg2 = 0xccce13.0p-25,
8163         Lg3 = 0x91e9ee.0p-25,
8164         Lg4 = 0xf89e26.0p-26;
8165
8166     union {float f; UINT32 i;} u = {x};
8167     float hfsq, f, c, s, z, R, w, t1, t2, dk;
8168     UINT32 ix, iu;
8169     int k;
8170
8171     ix = u.i;
8172     k = 1;
8173     if (ix < 0x3ed413d0 || ix >> 31) { /* 1+x < sqrt(2)+ */
8174         if (ix >= 0xbf800000) { /* x <= -1.0 */
8175             if (x == -1) {
8176                 *_errno() = ERANGE;
8177                 return x / 0.0f; /* log1p(-1)=+inf */
8178             }
8179             *_errno() = EDOM;
8180             return (x - x) / 0.0f; /* log1p(x<-1)=NaN */
8181         }
8182         if (ix<<1 < 0x33800000<<1) { /* |x| < 2**-24 */
8183             /* underflow if subnormal */
8184             if ((ix & 0x7f800000) == 0)
8185                 fp_barrierf(x * x);
8186             return x;
8187         }
8188         if (ix <= 0xbe95f619) { /* sqrt(2)/2- <= 1+x < sqrt(2)+ */
8189             k = 0;
8190             c = 0;
8191             f = x;
8192         }
8193     } else if (ix >= 0x7f800000)
8194         return x;
8195     if (k) {
8196         u.f = 1 + x;
8197         iu = u.i;
8198         iu += 0x3f800000 - 0x3f3504f3;
8199         k = (int)(iu >> 23) - 0x7f;
8200         /* correction term ~ log(1+x)-log(u), avoid underflow in c/u */
8201         if (k < 25) {
8202             c = k >= 2 ? 1 - (u.f - x) : x - (u.f - 1);
8203             c /= u.f;
8204         } else
8205             c = 0;
8206         /* reduce u into [sqrt(2)/2, sqrt(2)] */
8207         iu = (iu & 0x007fffff) + 0x3f3504f3;
8208         u.i = iu;
8209         f = u.f - 1;
8210     }
8211     s = f / (2.0f + f);
8212     z = s * s;
8213     w = z * z;
8214     t1= w * (Lg2 + w * Lg4);
8215     t2= z * (Lg1 + w * Lg3);
8216     R = t2 + t1;
8217     hfsq = 0.5f * f * f;
8218     dk = k;
8219     return s * (hfsq + R) + (dk * ln2_lo + c) - hfsq + f + dk * ln2_hi;
8220 }
8221
8222 /*********************************************************************
8223  *      log2 (MSVCR120.@)
8224  *
8225  * Copied from musl: src/math/log2.c
8226  */
8227 double CDECL log2(double x)
8228 {
8229     static const double invln2hi = 0x1.7154765200000p+0,
8230         invln2lo = 0x1.705fc2eefa200p-33;
8231     static const double A[] = {
8232         -0x1.71547652b8339p-1,
8233         0x1.ec709dc3a04bep-2,
8234         -0x1.7154764702ffbp-2,
8235         0x1.2776c50034c48p-2,
8236         -0x1.ec7b328ea92bcp-3,
8237         0x1.a6225e117f92ep-3
8238     };
8239     static const double B[] = {
8240         -0x1.71547652b82fep-1,
8241         0x1.ec709dc3a03f7p-2,
8242         -0x1.71547652b7c3fp-2,
8243         0x1.2776c50f05be4p-2,
8244         -0x1.ec709dd768fe5p-3,
8245         0x1.a61761ec4e736p-3,
8246         -0x1.7153fbc64a79bp-3,
8247         0x1.484d154f01b4ap-3,
8248         -0x1.289e4a72c383cp-3,
8249         0x1.0b32f285aee66p-3
8250     };
8251     static const struct {
8252         double invc, logc;
8253     } T[] = {
8254         {0x1.724286bb1acf8p+0, -0x1.1095feecdb000p-1},
8255         {0x1.6e1f766d2cca1p+0, -0x1.08494bd76d000p-1},
8256         {0x1.6a13d0e30d48ap+0, -0x1.00143aee8f800p-1},
8257         {0x1.661ec32d06c85p+0, -0x1.efec5360b4000p-2},
8258         {0x1.623fa951198f8p+0, -0x1.dfdd91ab7e000p-2},
8259         {0x1.5e75ba4cf026cp+0, -0x1.cffae0cc79000p-2},
8260         {0x1.5ac055a214fb8p+0, -0x1.c043811fda000p-2},
8261         {0x1.571ed0f166e1ep+0, -0x1.b0b67323ae000p-2},
8262         {0x1.53909590bf835p+0, -0x1.a152f5a2db000p-2},
8263         {0x1.5014fed61adddp+0, -0x1.9217f5af86000p-2},
8264         {0x1.4cab88e487bd0p+0, -0x1.8304db0719000p-2},
8265         {0x1.49539b4334feep+0, -0x1.74189f9a9e000p-2},
8266         {0x1.460cbdfafd569p+0, -0x1.6552bb5199000p-2},
8267         {0x1.42d664ee4b953p+0, -0x1.56b23a29b1000p-2},
8268         {0x1.3fb01111dd8a6p+0, -0x1.483650f5fa000p-2},
8269         {0x1.3c995b70c5836p+0, -0x1.39de937f6a000p-2},
8270         {0x1.3991c4ab6fd4ap+0, -0x1.2baa1538d6000p-2},
8271         {0x1.3698e0ce099b5p+0, -0x1.1d98340ca4000p-2},
8272         {0x1.33ae48213e7b2p+0, -0x1.0fa853a40e000p-2},
8273         {0x1.30d191985bdb1p+0, -0x1.01d9c32e73000p-2},
8274         {0x1.2e025cab271d7p+0, -0x1.e857da2fa6000p-3},
8275         {0x1.2b404cf13cd82p+0, -0x1.cd3c8633d8000p-3},
8276         {0x1.288b02c7ccb50p+0, -0x1.b26034c14a000p-3},
8277         {0x1.25e2263944de5p+0, -0x1.97c1c2f4fe000p-3},
8278         {0x1.234563d8615b1p+0, -0x1.7d6023f800000p-3},
8279         {0x1.20b46e33eaf38p+0, -0x1.633a71a05e000p-3},
8280         {0x1.1e2eefdcda3ddp+0, -0x1.494f5e9570000p-3},
8281         {0x1.1bb4a580b3930p+0, -0x1.2f9e424e0a000p-3},
8282         {0x1.19453847f2200p+0, -0x1.162595afdc000p-3},
8283         {0x1.16e06c0d5d73cp+0, -0x1.f9c9a75bd8000p-4},
8284         {0x1.1485f47b7e4c2p+0, -0x1.c7b575bf9c000p-4},
8285         {0x1.12358ad0085d1p+0, -0x1.960c60ff48000p-4},
8286         {0x1.0fef00f532227p+0, -0x1.64ce247b60000p-4},
8287         {0x1.0db2077d03a8fp+0, -0x1.33f78b2014000p-4},
8288         {0x1.0b7e6d65980d9p+0, -0x1.0387d1a42c000p-4},
8289         {0x1.0953efe7b408dp+0, -0x1.a6f9208b50000p-5},
8290         {0x1.07325cac53b83p+0, -0x1.47a954f770000p-5},
8291         {0x1.05197e40d1b5cp+0, -0x1.d23a8c50c0000p-6},
8292         {0x1.03091c1208ea2p+0, -0x1.16a2629780000p-6},
8293         {0x1.0101025b37e21p+0, -0x1.720f8d8e80000p-8},
8294         {0x1.fc07ef9caa76bp-1, 0x1.6fe53b1500000p-7},
8295         {0x1.f4465d3f6f184p-1, 0x1.11ccce10f8000p-5},
8296         {0x1.ecc079f84107fp-1, 0x1.c4dfc8c8b8000p-5},
8297         {0x1.e573a99975ae8p-1, 0x1.3aa321e574000p-4},
8298         {0x1.de5d6f0bd3de6p-1, 0x1.918a0d08b8000p-4},
8299         {0x1.d77b681ff38b3p-1, 0x1.e72e9da044000p-4},
8300         {0x1.d0cb5724de943p-1, 0x1.1dcd2507f6000p-3},
8301         {0x1.ca4b2dc0e7563p-1, 0x1.476ab03dea000p-3},
8302         {0x1.c3f8ee8d6cb51p-1, 0x1.7074377e22000p-3},
8303         {0x1.bdd2b4f020c4cp-1, 0x1.98ede8ba94000p-3},
8304         {0x1.b7d6c006015cap-1, 0x1.c0db86ad2e000p-3},
8305         {0x1.b20366e2e338fp-1, 0x1.e840aafcee000p-3},
8306         {0x1.ac57026295039p-1, 0x1.0790ab4678000p-2},
8307         {0x1.a6d01bc2731ddp-1, 0x1.1ac056801c000p-2},
8308         {0x1.a16d3bc3ff18bp-1, 0x1.2db11d4fee000p-2},
8309         {0x1.9c2d14967feadp-1, 0x1.406464ec58000p-2},
8310         {0x1.970e4f47c9902p-1, 0x1.52dbe093af000p-2},
8311         {0x1.920fb3982bcf2p-1, 0x1.651902050d000p-2},
8312         {0x1.8d30187f759f1p-1, 0x1.771d2cdeaf000p-2},
8313         {0x1.886e5ebb9f66dp-1, 0x1.88e9c857d9000p-2},
8314         {0x1.83c97b658b994p-1, 0x1.9a80155e16000p-2},
8315         {0x1.7f405ffc61022p-1, 0x1.abe186ed3d000p-2},
8316         {0x1.7ad22181415cap-1, 0x1.bd0f2aea0e000p-2},
8317         {0x1.767dcf99eff8cp-1, 0x1.ce0a43dbf4000p-2}
8318     };
8319     static const struct {
8320         double chi, clo;
8321     } T2[] = {
8322         {0x1.6200012b90a8ep-1, 0x1.904ab0644b605p-55},
8323         {0x1.66000045734a6p-1, 0x1.1ff9bea62f7a9p-57},
8324         {0x1.69fffc325f2c5p-1, 0x1.27ecfcb3c90bap-55},
8325         {0x1.6e00038b95a04p-1, 0x1.8ff8856739326p-55},
8326         {0x1.71fffe09994e3p-1, 0x1.afd40275f82b1p-55},
8327         {0x1.7600015590e1p-1, -0x1.2fd75b4238341p-56},
8328         {0x1.7a00012655bd5p-1, 0x1.808e67c242b76p-56},
8329         {0x1.7e0003259e9a6p-1, -0x1.208e426f622b7p-57},
8330         {0x1.81fffedb4b2d2p-1, -0x1.402461ea5c92fp-55},
8331         {0x1.860002dfafcc3p-1, 0x1.df7f4a2f29a1fp-57},
8332         {0x1.89ffff78c6b5p-1, -0x1.e0453094995fdp-55},
8333         {0x1.8e00039671566p-1, -0x1.a04f3bec77b45p-55},
8334         {0x1.91fffe2bf1745p-1, -0x1.7fa34400e203cp-56},
8335         {0x1.95fffcc5c9fd1p-1, -0x1.6ff8005a0695dp-56},
8336         {0x1.9a0003bba4767p-1, 0x1.0f8c4c4ec7e03p-56},
8337         {0x1.9dfffe7b92da5p-1, 0x1.e7fd9478c4602p-55},
8338         {0x1.a1fffd72efdafp-1, -0x1.a0c554dcdae7ep-57},
8339         {0x1.a5fffde04ff95p-1, 0x1.67da98ce9b26bp-55},
8340         {0x1.a9fffca5e8d2bp-1, -0x1.284c9b54c13dep-55},
8341         {0x1.adfffddad03eap-1, 0x1.812c8ea602e3cp-58},
8342         {0x1.b1ffff10d3d4dp-1, -0x1.efaddad27789cp-55},
8343         {0x1.b5fffce21165ap-1, 0x1.3cb1719c61237p-58},
8344         {0x1.b9fffd950e674p-1, 0x1.3f7d94194cep-56},
8345         {0x1.be000139ca8afp-1, 0x1.50ac4215d9bcp-56},
8346         {0x1.c20005b46df99p-1, 0x1.beea653e9c1c9p-57},
8347         {0x1.c600040b9f7aep-1, -0x1.c079f274a70d6p-56},
8348         {0x1.ca0006255fd8ap-1, -0x1.a0b4076e84c1fp-56},
8349         {0x1.cdfffd94c095dp-1, 0x1.8f933f99ab5d7p-55},
8350         {0x1.d1ffff975d6cfp-1, -0x1.82c08665fe1bep-58},
8351         {0x1.d5fffa2561c93p-1, -0x1.b04289bd295f3p-56},
8352         {0x1.d9fff9d228b0cp-1, 0x1.70251340fa236p-55},
8353         {0x1.de00065bc7e16p-1, -0x1.5011e16a4d80cp-56},
8354         {0x1.e200002f64791p-1, 0x1.9802f09ef62ep-55},
8355         {0x1.e600057d7a6d8p-1, -0x1.e0b75580cf7fap-56},
8356         {0x1.ea00027edc00cp-1, -0x1.c848309459811p-55},
8357         {0x1.ee0006cf5cb7cp-1, -0x1.f8027951576f4p-55},
8358         {0x1.f2000782b7dccp-1, -0x1.f81d97274538fp-55},
8359         {0x1.f6000260c450ap-1, -0x1.071002727ffdcp-59},
8360         {0x1.f9fffe88cd533p-1, -0x1.81bdce1fda8bp-58},
8361         {0x1.fdfffd50f8689p-1, 0x1.7f91acb918e6ep-55},
8362         {0x1.0200004292367p+0, 0x1.b7ff365324681p-54},
8363         {0x1.05fffe3e3d668p+0, 0x1.6fa08ddae957bp-55},
8364         {0x1.0a0000a85a757p+0, -0x1.7e2de80d3fb91p-58},
8365         {0x1.0e0001a5f3fccp+0, -0x1.1823305c5f014p-54},
8366         {0x1.11ffff8afbaf5p+0, -0x1.bfabb6680bac2p-55},
8367         {0x1.15fffe54d91adp+0, -0x1.d7f121737e7efp-54},
8368         {0x1.1a00011ac36e1p+0, 0x1.c000a0516f5ffp-54},
8369         {0x1.1e00019c84248p+0, -0x1.082fbe4da5dap-54},
8370         {0x1.220000ffe5e6ep+0, -0x1.8fdd04c9cfb43p-55},
8371         {0x1.26000269fd891p+0, 0x1.cfe2a7994d182p-55},
8372         {0x1.2a00029a6e6dap+0, -0x1.00273715e8bc5p-56},
8373         {0x1.2dfffe0293e39p+0, 0x1.b7c39dab2a6f9p-54},
8374         {0x1.31ffff7dcf082p+0, 0x1.df1336edc5254p-56},
8375         {0x1.35ffff05a8b6p+0, -0x1.e03564ccd31ebp-54},
8376         {0x1.3a0002e0eaeccp+0, 0x1.5f0e74bd3a477p-56},
8377         {0x1.3e000043bb236p+0, 0x1.c7dcb149d8833p-54},
8378         {0x1.4200002d187ffp+0, 0x1.e08afcf2d3d28p-56},
8379         {0x1.460000d387cb1p+0, 0x1.20837856599a6p-55},
8380         {0x1.4a00004569f89p+0, -0x1.9fa5c904fbcd2p-55},
8381         {0x1.4e000043543f3p+0, -0x1.81125ed175329p-56},
8382         {0x1.51fffcc027f0fp+0, 0x1.883d8847754dcp-54},
8383         {0x1.55ffffd87b36fp+0, -0x1.709e731d02807p-55},
8384         {0x1.59ffff21df7bap+0, 0x1.7f79f68727b02p-55},
8385         {0x1.5dfffebfc3481p+0, -0x1.180902e30e93ep-54}
8386     };
8387
8388     double z, r, r2, r4, y, invc, logc, kd, hi, lo, t1, t2, t3, p, rhi, rlo;
8389     UINT64 ix, iz, tmp;
8390     UINT32 top;
8391     int k, i;
8392
8393     ix = *(UINT64*)&x;
8394     top = ix >> 48;
8395     if (ix - 0x3feea4af00000000ULL < 0x210aa00000000ULL) {
8396         /* Handle close to 1.0 inputs separately.  */
8397         /* Fix sign of zero with downward rounding when x==1.  */
8398         if (ix == 0x3ff0000000000000ULL)
8399             return 0;
8400         r = x - 1.0;
8401         *(UINT64*)&rhi = *(UINT64*)&r & -1ULL << 32;
8402         rlo = r - rhi;
8403         hi = rhi * invln2hi;
8404         lo = rlo * invln2hi + r * invln2lo;
8405         r2 = r * r; /* rounding error: 0x1p-62.  */
8406         r4 = r2 * r2;
8407         /* Worst-case error is less than 0.54 ULP (0.55 ULP without fma).  */
8408         p = r2 * (B[0] + r * B[1]);
8409         y = hi + p;
8410         lo += hi - y + p;
8411         lo += r4 * (B[2] + r * B[3] + r2 * (B[4] + r * B[5]) +
8412                 r4 * (B[6] + r * B[7] + r2 * (B[8] + r * B[9])));
8413         y += lo;
8414         return y;
8415     }
8416     if (top - 0x0010 >= 0x7ff0 - 0x0010) {
8417         /* x < 0x1p-1022 or inf or nan.  */
8418         if (ix * 2 == 0) {
8419             *_errno() = ERANGE;
8420             return -1.0 / x;
8421         }
8422         if (ix == 0x7ff0000000000000ULL) /* log(inf) == inf.  */
8423             return x;
8424         if ((top & 0x7ff0) == 0x7ff0 && (ix & 0xfffffffffffffULL))
8425             return x;
8426         if (top & 0x8000) {
8427             *_errno() = EDOM;
8428             return (x - x) / (x - x);
8429         }
8430         /* x is subnormal, normalize it.  */
8431         x *= 0x1p52;
8432         ix = *(UINT64*)&x;
8433         ix -= 52ULL << 52;
8434     }
8435
8436     /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
8437        The range is split into N subintervals.
8438        The ith subinterval contains z and c is near its center.  */
8439     tmp = ix - 0x3fe6000000000000ULL;
8440     i = (tmp >> (52 - 6)) % (1 << 6);
8441     k = (INT64)tmp >> 52; /* arithmetic shift */
8442     iz = ix - (tmp & 0xfffULL << 52);
8443     invc = T[i].invc;
8444     logc = T[i].logc;
8445     z = *(double*)&iz;
8446     kd = k;
8447
8448     /* log2(x) = log2(z/c) + log2(c) + k.  */
8449     /* r ~= z/c - 1, |r| < 1/(2*N).  */
8450     /* rounding error: 0x1p-55/N + 0x1p-65.  */
8451     r = (z - T2[i].chi - T2[i].clo) * invc;
8452     *(UINT64*)&rhi = *(UINT64*)&r & -1ULL << 32;
8453     rlo = r - rhi;
8454     t1 = rhi * invln2hi;
8455     t2 = rlo * invln2hi + r * invln2lo;
8456
8457     /* hi + lo = r/ln2 + log2(c) + k.  */
8458     t3 = kd + logc;
8459     hi = t3 + t1;
8460     lo = t3 - hi + t1 + t2;
8461
8462     /* log2(r+1) = r/ln2 + r^2*poly(r).  */
8463     /* Evaluation is optimized assuming superscalar pipelined execution.  */
8464     r2 = r * r; /* rounding error: 0x1p-54/N^2.  */
8465     r4 = r2 * r2;
8466     /* Worst-case error if |y| > 0x1p-4: 0.547 ULP (0.550 ULP without fma).
8467        ~ 0.5 + 2/N/ln2 + abs-poly-error*0x1p56 ULP (+ 0.003 ULP without fma).  */
8468     p = A[0] + r * A[1] + r2 * (A[2] + r * A[3]) + r4 * (A[4] + r * A[5]);
8469     y = lo + r2 * p + hi;
8470     return y;
8471 }
8472
8473 /*********************************************************************
8474  *      log2f (MSVCR120.@)
8475  *
8476  * Copied from musl: src/math/log2f.c
8477  */
8478 float CDECL log2f(float x)
8479 {
8480     static const double A[] = {
8481         -0x1.712b6f70a7e4dp-2,
8482         0x1.ecabf496832ep-2,
8483         -0x1.715479ffae3dep-1,
8484         0x1.715475f35c8b8p0
8485     };
8486     static const struct {
8487         double invc, logc;
8488     } T[] = {
8489         { 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 },
8490         { 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 },
8491         { 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 },
8492         { 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 },
8493         { 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 },
8494         { 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 },
8495         { 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 },
8496         { 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 },
8497         { 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 },
8498         { 0x1p+0, 0x0p+0 },
8499         { 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 },
8500         { 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 },
8501         { 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 },
8502         { 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 },
8503         { 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 },
8504         { 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 }
8505     };
8506
8507     double z, r, r2, p, y, y0, invc, logc;
8508     UINT32 ix, iz, top, tmp;
8509     int k, i;
8510
8511     ix = *(UINT32*)&x;
8512     /* Fix sign of zero with downward rounding when x==1. */
8513     if (ix == 0x3f800000)
8514         return 0;
8515     if (ix - 0x00800000 >= 0x7f800000 - 0x00800000) {
8516         /* x < 0x1p-126 or inf or nan. */
8517         if (ix * 2 == 0) {
8518             *_errno() = ERANGE;
8519             return -1.0f / x;
8520         }
8521         if (ix == 0x7f800000) /* log2(inf) == inf. */
8522             return x;
8523         if (ix * 2 > 0xff000000)
8524             return x;
8525         if (ix & 0x80000000) {
8526             *_errno() = EDOM;
8527             return (x - x) / (x - x);
8528         }
8529         /* x is subnormal, normalize it. */
8530         x *= 0x1p23f;
8531         ix = *(UINT32*)&x;
8532         ix -= 23 << 23;
8533     }
8534
8535     /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
8536        The range is split into N subintervals.
8537        The ith subinterval contains z and c is near its center. */
8538     tmp = ix - 0x3f330000;
8539     i = (tmp >> (23 - 4)) % (1 << 4);
8540     top = tmp & 0xff800000;
8541     iz = ix - top;
8542     k = (INT32)tmp >> 23; /* arithmetic shift */
8543     invc = T[i].invc;
8544     logc = T[i].logc;
8545     z = *(float*)&iz;
8546
8547     /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
8548     r = z * invc - 1;
8549     y0 = logc + (double)k;
8550
8551     /* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */
8552     r2 = r * r;
8553     y = A[1] * r + A[2];
8554     y = A[0] * r2 + y;
8555     p = A[3] * r + y0;
8556     y = y * r2 + p;
8557     return y;
8558 }
8559
8560 /*********************************************************************
8561  *      rint (MSVCR120.@)
8562  */
8563 double CDECL rint(double x)
8564 {
8565     return __rint(x);
8566 }
8567
8568 /*********************************************************************
8569  *      rintf (MSVCR120.@)
8570  *
8571  * Copied from musl: src/math/rintf.c
8572  */
8573 float CDECL rintf(float x)
8574 {
8575     static const float toint = 1 / FLT_EPSILON;
8576
8577     unsigned int ix = *(unsigned int*)&x;
8578     int e = ix >> 23 & 0xff;
8579     int s = ix >> 31;
8580     float y;
8581
8582     if (e >= 0x7f + 23)
8583         return x;
8584     if (s)
8585         y = fp_barrierf(x - toint) + toint;
8586     else
8587         y = fp_barrierf(x + toint) - toint;
8588     if (y == 0)
8589         return s ? -0.0f : 0.0f;
8590     return y;
8591 }
8592
8593 /*********************************************************************
8594  *      lrint (MSVCR120.@)
8595  */
8596 __msvcrt_long CDECL lrint(double x)
8597 {
8598     double d;
8599
8600     d = rint(x);
8601     if ((d < 0 && d != (double)(__msvcrt_long)d)
8602             || (d >= 0 && d != (double)(__msvcrt_ulong)d)) {
8603         *_errno() = EDOM;
8604         return 0;
8605     }
8606     return d;
8607 }
8608
8609 /*********************************************************************
8610  *      lrintf (MSVCR120.@)
8611  */
8612 __msvcrt_long CDECL lrintf(float x)
8613 {
8614     float f;
8615
8616     f = rintf(x);
8617     if ((f < 0 && f != (float)(__msvcrt_long)f)
8618             || (f >= 0 && f != (float)(__msvcrt_ulong)f)) {
8619         *_errno() = EDOM;
8620         return 0;
8621     }
8622     return f;
8623 }
8624
8625 /*********************************************************************
8626  *      llrint (MSVCR120.@)
8627  */
8628 __int64 CDECL llrint(double x)
8629 {
8630     double d;
8631
8632     d = rint(x);
8633     if ((d < 0 && d != (double)(__int64)d)
8634             || (d >= 0 && d != (double)(unsigned __int64)d)) {
8635         *_errno() = EDOM;
8636         return 0;
8637     }
8638     return d;
8639 }
8640
8641 /*********************************************************************
8642  *      llrintf (MSVCR120.@)
8643  */
8644 __int64 CDECL llrintf(float x)
8645 {
8646     float f;
8647
8648     f = rintf(x);
8649     if ((f < 0 && f != (float)(__int64)f)
8650             || (f >= 0 && f != (float)(unsigned __int64)f)) {
8651         *_errno() = EDOM;
8652         return 0;
8653     }
8654     return f;
8655 }
8656
8657 /*********************************************************************
8658  *      round (MSVCR120.@)
8659  */
8660 double CDECL round(double x)
8661 {
8662     return __round(x);
8663 }
8664
8665 /*********************************************************************
8666  *      roundf (MSVCR120.@)
8667  *
8668  * Copied from musl: src/math/roundf.c
8669  */
8670 float CDECL roundf(float x)
8671 {
8672     static const float toint = 1 / FLT_EPSILON;
8673
8674     unsigned int ix = *(unsigned int*)&x;
8675     int e = ix >> 23 & 0xff;
8676     float y;
8677
8678     if (e >= 0x7f + 23)
8679         return x;
8680     if (ix >> 31)
8681         x = -x;
8682     if (e < 0x7f - 1)
8683         return 0 * *(float*)&ix;
8684     y = fp_barrierf(x + toint) - toint - x;
8685     if (y > 0.5f)
8686         y = y + x - 1;
8687     else if (y <= -0.5f)
8688         y = y + x + 1;
8689     else
8690         y = y + x;
8691     if (ix >> 31)
8692         y = -y;
8693     return y;
8694 }
8695
8696 /*********************************************************************
8697  *      lround (MSVCR120.@)
8698  *
8699  * Copied from musl: src/math/lround.c
8700  */
8701 __msvcrt_long CDECL lround(double x)
8702 {
8703     double d = round(x);
8704     if (d != (double)(__msvcrt_long)d) {
8705         *_errno() = EDOM;
8706         return 0;
8707     }
8708     return d;
8709 }
8710
8711 /*********************************************************************
8712  *      lroundf (MSVCR120.@)
8713  *
8714  * Copied from musl: src/math/lroundf.c
8715  */
8716 __msvcrt_long CDECL lroundf(float x)
8717 {
8718     float f = roundf(x);
8719     if (f != (float)(__msvcrt_long)f) {
8720         *_errno() = EDOM;
8721         return 0;
8722     }
8723     return f;
8724 }
8725
8726 /*********************************************************************
8727  *      llround (MSVCR120.@)
8728  *
8729  * Copied from musl: src/math/llround.c
8730  */
8731 __int64 CDECL llround(double x)
8732 {
8733     double d = round(x);
8734     if (d != (double)(__int64)d) {
8735         *_errno() = EDOM;
8736         return 0;
8737     }
8738     return d;
8739 }
8740
8741 /*********************************************************************
8742  *      llroundf (MSVCR120.@)
8743  *
8744  * Copied from musl: src/math/llroundf.c
8745  */
8746 __int64 CDECL llroundf(float x)
8747 {
8748     float f = roundf(x);
8749     if (f != (float)(__int64)f) {
8750         *_errno() = EDOM;
8751         return 0;
8752     }
8753     return f;
8754 }
8755
8756 /*********************************************************************
8757  *      trunc (MSVCR120.@)
8758  *
8759  * Copied from musl: src/math/trunc.c
8760  */
8761 double CDECL trunc(double x)
8762 {
8763     union {double f; UINT64 i;} u = {x};
8764     int e = (u.i >> 52 & 0x7ff) - 0x3ff + 12;
8765     UINT64 m;
8766
8767     if (e >= 52 + 12)
8768         return x;
8769     if (e < 12)
8770         e = 1;
8771     m = -1ULL >> e;
8772     if ((u.i & m) == 0)
8773         return x;
8774     u.i &= ~m;
8775     return u.f;
8776 }
8777
8778 /*********************************************************************
8779  *      truncf (MSVCR120.@)
8780  *
8781  * Copied from musl: src/math/truncf.c
8782  */
8783 float CDECL truncf(float x)
8784 {
8785     union {float f; UINT32 i;} u = {x};
8786     int e = (u.i >> 23 & 0xff) - 0x7f + 9;
8787     UINT32 m;
8788
8789     if (e >= 23 + 9)
8790         return x;
8791     if (e < 9)
8792         e = 1;
8793     m = -1U >> e;
8794     if ((u.i & m) == 0)
8795         return x;
8796     u.i &= ~m;
8797     return u.f;
8798 }
8799
8800 /*********************************************************************
8801  *      _dtest (MSVCR120.@)
8802  */
8803 short CDECL _dtest(double *x)
8804 {
8805     return _dclass(*x);
8806 }
8807
8808 /*********************************************************************
8809  *      _fdtest (MSVCR120.@)
8810  */
8811 short CDECL _fdtest(float *x)
8812 {
8813     return _fdclass(*x);
8814 }
8815
8816 static double erfc1(double x)
8817 {
8818     static const double erx  = 8.45062911510467529297e-01,
8819                  pa0  = -2.36211856075265944077e-03,
8820                  pa1  =  4.14856118683748331666e-01,
8821                  pa2  = -3.72207876035701323847e-01,
8822                  pa3  =  3.18346619901161753674e-01,
8823                  pa4  = -1.10894694282396677476e-01,
8824                  pa5  =  3.54783043256182359371e-02,
8825                  pa6  = -2.16637559486879084300e-03,
8826                  qa1  =  1.06420880400844228286e-01,
8827                  qa2  =  5.40397917702171048937e-01,
8828                  qa3  =  7.18286544141962662868e-02,
8829                  qa4  =  1.26171219808761642112e-01,
8830                  qa5  =  1.36370839120290507362e-02,
8831                  qa6  =  1.19844998467991074170e-02;
8832
8833     double s, P, Q;
8834
8835     s = fabs(x) - 1;
8836     P = pa0 + s * (pa1 + s * (pa2 + s * (pa3 + s * (pa4 + s * (pa5 + s * pa6)))));
8837     Q = 1 + s * (qa1 + s * (qa2 + s * (qa3 + s * (qa4 + s * (qa5 + s * qa6)))));
8838     return 1 - erx - P / Q;
8839 }
8840
8841 static double erfc2(UINT32 ix, double x)
8842 {
8843     static const double ra0  = -9.86494403484714822705e-03,
8844                  ra1  = -6.93858572707181764372e-01,
8845                  ra2  = -1.05586262253232909814e+01,
8846                  ra3  = -6.23753324503260060396e+01,
8847                  ra4  = -1.62396669462573470355e+02,
8848                  ra5  = -1.84605092906711035994e+02,
8849                  ra6  = -8.12874355063065934246e+01,
8850                  ra7  = -9.81432934416914548592e+00,
8851                  sa1  =  1.96512716674392571292e+01,
8852                  sa2  =  1.37657754143519042600e+02,
8853                  sa3  =  4.34565877475229228821e+02,
8854                  sa4  =  6.45387271733267880336e+02,
8855                  sa5  =  4.29008140027567833386e+02,
8856                  sa6  =  1.08635005541779435134e+02,
8857                  sa7  =  6.57024977031928170135e+00,
8858                  sa8  = -6.04244152148580987438e-02,
8859                  rb0  = -9.86494292470009928597e-03,
8860                  rb1  = -7.99283237680523006574e-01,
8861                  rb2  = -1.77579549177547519889e+01,
8862                  rb3  = -1.60636384855821916062e+02,
8863                  rb4  = -6.37566443368389627722e+02,
8864                  rb5  = -1.02509513161107724954e+03,
8865                  rb6  = -4.83519191608651397019e+02,
8866                  sb1  =  3.03380607434824582924e+01,
8867                  sb2  =  3.25792512996573918826e+02,
8868                  sb3  =  1.53672958608443695994e+03,
8869                  sb4  =  3.19985821950859553908e+03,
8870                  sb5  =  2.55305040643316442583e+03,
8871                  sb6  =  4.74528541206955367215e+02,
8872                  sb7  = -2.24409524465858183362e+01;
8873
8874     double s, R, S, z;
8875     UINT64 iz;
8876
8877     if (ix < 0x3ff40000) /* |x| < 1.25 */
8878         return erfc1(x);
8879
8880     x = fabs(x);
8881     s = 1 / (x * x);
8882     if (ix < 0x4006db6d) { /* |x| < 1/.35 ~ 2.85714 */
8883         R = ra0 + s * (ra1 + s * (ra2 + s * (ra3 + s * (ra4 + s *
8884                             (ra5 + s * (ra6 + s * ra7))))));
8885         S = 1.0 + s * (sa1 + s * (sa2 + s * (sa3 + s * (sa4 + s *
8886                             (sa5 + s * (sa6 + s * (sa7 + s * sa8)))))));
8887     } else { /* |x| > 1/.35 */
8888         R = rb0 + s * (rb1 + s * (rb2 + s * (rb3 + s * (rb4 + s *
8889                             (rb5 + s * rb6)))));
8890         S = 1.0 + s * (sb1 + s * (sb2 + s * (sb3 + s * (sb4 + s *
8891                             (sb5 + s * (sb6 + s * sb7))))));
8892     }
8893     z = x;
8894     iz = *(ULONGLONG*)&z;
8895     iz &= 0xffffffff00000000ULL;
8896     z = *(double*)&iz;
8897     return exp(-z * z - 0.5625) * exp((z - x) * (z + x) + R / S) / x;
8898 }
8899
8900 /*********************************************************************
8901  *      erf (MSVCR120.@)
8902  */
8903 double CDECL erf(double x)
8904 {
8905     static const double efx8 =  1.02703333676410069053e+00,
8906                  pp0  =  1.28379167095512558561e-01,
8907                  pp1  = -3.25042107247001499370e-01,
8908                  pp2  = -2.84817495755985104766e-02,
8909                  pp3  = -5.77027029648944159157e-03,
8910                  pp4  = -2.37630166566501626084e-05,
8911                  qq1  =  3.97917223959155352819e-01,
8912                  qq2  =  6.50222499887672944485e-02,
8913                  qq3  =  5.08130628187576562776e-03,
8914                  qq4  =  1.32494738004321644526e-04,
8915                  qq5  = -3.96022827877536812320e-06;
8916
8917     double r, s, z, y;
8918     UINT32 ix;
8919     int sign;
8920
8921     ix = *(UINT64*)&x >> 32;
8922     sign = ix >> 31;
8923     ix &= 0x7fffffff;
8924     if (ix >= 0x7ff00000) {
8925         /* erf(nan)=nan, erf(+-inf)=+-1 */
8926         return 1 - 2 * sign + 1 / x;
8927     }
8928     if (ix < 0x3feb0000) { /* |x| < 0.84375 */
8929         if (ix < 0x3e300000) { /* |x| < 2**-28 */
8930             /* avoid underflow */
8931             return 0.125 * (8 * x + efx8 * x);
8932         }
8933         z = x * x;
8934         r = pp0 + z * (pp1 + z * (pp2 + z * (pp3 + z * pp4)));
8935         s = 1.0 + z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5))));
8936         y = r / s;
8937         return x + x * y;
8938     }
8939     if (ix < 0x40180000) /* 0.84375 <= |x| < 6 */
8940         y = 1 - erfc2(ix, x);
8941     else
8942         y = 1 - DBL_MIN;
8943     return sign ? -y : y;
8944 }
8945
8946 static float erfc1f(float x)
8947 {
8948     static const float erx  =  8.4506291151e-01,
8949                  pa0  = -2.3621185683e-03,
8950                  pa1  =  4.1485610604e-01,
8951                  pa2  = -3.7220788002e-01,
8952                  pa3  =  3.1834661961e-01,
8953                  pa4  = -1.1089469492e-01,
8954                  pa5  =  3.5478305072e-02,
8955                  pa6  = -2.1663755178e-03,
8956                  qa1  =  1.0642088205e-01,
8957                  qa2  =  5.4039794207e-01,
8958                  qa3  =  7.1828655899e-02,
8959                  qa4  =  1.2617121637e-01,
8960                  qa5  =  1.3637083583e-02,
8961                  qa6  =  1.1984500103e-02;
8962
8963     float s, P, Q;
8964
8965     s = fabsf(x) - 1;
8966     P = pa0 + s * (pa1 + s * (pa2 + s * (pa3 + s * (pa4 + s * (pa5 + s * pa6)))));
8967     Q = 1 + s * (qa1 + s * (qa2 + s * (qa3 + s * (qa4 + s * (qa5 + s * qa6)))));
8968     return 1 - erx - P / Q;
8969 }
8970
8971 static float erfc2f(UINT32 ix, float x)
8972 {
8973     static const float ra0  = -9.8649440333e-03,
8974                  ra1  = -6.9385856390e-01,
8975                  ra2  = -1.0558626175e+01,
8976                  ra3  = -6.2375331879e+01,
8977                  ra4  = -1.6239666748e+02,
8978                  ra5  = -1.8460508728e+02,
8979                  ra6  = -8.1287437439e+01,
8980                  ra7  = -9.8143291473e+00,
8981                  sa1  =  1.9651271820e+01,
8982                  sa2  =  1.3765776062e+02,
8983                  sa3  =  4.3456588745e+02,
8984                  sa4  =  6.4538726807e+02,
8985                  sa5  =  4.2900814819e+02,
8986                  sa6  =  1.0863500214e+02,
8987                  sa7  =  6.5702495575e+00,
8988                  sa8  = -6.0424413532e-02,
8989                  rb0  = -9.8649431020e-03,
8990                  rb1  = -7.9928326607e-01,
8991                  rb2  = -1.7757955551e+01,
8992                  rb3  = -1.6063638306e+02,
8993                  rb4  = -6.3756646729e+02,
8994                  rb5  = -1.0250950928e+03,
8995                  rb6  = -4.8351919556e+02,
8996                  sb1  =  3.0338060379e+01,
8997                  sb2  =  3.2579251099e+02,
8998                  sb3  =  1.5367296143e+03,
8999                  sb4  =  3.1998581543e+03,
9000                  sb5  =  2.5530502930e+03,
9001                  sb6  =  4.7452853394e+02,
9002                  sb7  = -2.2440952301e+01;
9003
9004     float s, R, S, z;
9005
9006     if (ix < 0x3fa00000) /* |x| < 1.25 */
9007         return erfc1f(x);
9008
9009     x = fabsf(x);
9010     s = 1 / (x * x);
9011     if (ix < 0x4036db6d) { /* |x| < 1/0.35 */
9012         R = ra0 + s * (ra1 + s * (ra2 + s * (ra3 + s * (ra4 + s *
9013                             (ra5 + s * (ra6 + s * ra7))))));
9014         S = 1.0f + s * (sa1 + s * (sa2 + s * (sa3 + s * (sa4 + s *
9015                             (sa5 + s * (sa6 + s * (sa7 + s * sa8)))))));
9016     } else { /* |x| >= 1/0.35 */
9017         R = rb0 + s * (rb1 + s * (rb2 + s * (rb3 + s * (rb4 + s * (rb5 + s * rb6)))));
9018         S = 1.0f + s * (sb1 + s * (sb2 + s * (sb3 + s * (sb4 + s *
9019                             (sb5 + s * (sb6 + s * sb7))))));
9020     }
9021
9022     ix = *(UINT32*)&x & 0xffffe000;
9023     z = *(float*)&ix;
9024     return expf(-z * z - 0.5625f) * expf((z - x) * (z + x) + R / S) / x;
9025 }
9026
9027 /*********************************************************************
9028  *      erff (MSVCR120.@)
9029  *
9030  * Copied from musl: src/math/erff.c
9031  */
9032 float CDECL erff(float x)
9033 {
9034     static const float efx8 =  1.0270333290e+00,
9035                  pp0  =  1.2837916613e-01,
9036                  pp1  = -3.2504209876e-01,
9037                  pp2  = -2.8481749818e-02,
9038                  pp3  = -5.7702702470e-03,
9039                  pp4  = -2.3763017452e-05,
9040                  qq1  =  3.9791721106e-01,
9041                  qq2  =  6.5022252500e-02,
9042                  qq3  =  5.0813062117e-03,
9043                  qq4  =  1.3249473704e-04,
9044                  qq5  = -3.9602282413e-06;
9045
9046     float r, s, z, y;
9047     UINT32 ix;
9048     int sign;
9049
9050     ix = *(UINT32*)&x;
9051     sign = ix >> 31;
9052     ix &= 0x7fffffff;
9053     if (ix >= 0x7f800000) {
9054         /* erf(nan)=nan, erf(+-inf)=+-1 */
9055         return 1 - 2 * sign + 1 / x;
9056     }
9057     if (ix < 0x3f580000) { /* |x| < 0.84375 */
9058         if (ix < 0x31800000) { /* |x| < 2**-28 */
9059             /*avoid underflow */
9060             return 0.125f * (8 * x + efx8 * x);
9061         }
9062         z = x * x;
9063         r = pp0 + z * (pp1 + z * (pp2 + z * (pp3 + z * pp4)));
9064         s = 1 + z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5))));
9065         y = r / s;
9066         return x + x * y;
9067     }
9068     if (ix < 0x40c00000) /* |x| < 6 */
9069         y = 1 - erfc2f(ix, x);
9070     else
9071         y = 1 - FLT_MIN;
9072     return sign ? -y : y;
9073 }
9074
9075 /*********************************************************************
9076  *      erfc (MSVCR120.@)
9077  *
9078  * Copied from musl: src/math/erf.c
9079  */
9080 double CDECL erfc(double x)
9081 {
9082     static const double pp0  =  1.28379167095512558561e-01,
9083                  pp1  = -3.25042107247001499370e-01,
9084                  pp2  = -2.84817495755985104766e-02,
9085                  pp3  = -5.77027029648944159157e-03,
9086                  pp4  = -2.37630166566501626084e-05,
9087                  qq1  =  3.97917223959155352819e-01,
9088                  qq2  =  6.50222499887672944485e-02,
9089                  qq3  =  5.08130628187576562776e-03,
9090                  qq4  =  1.32494738004321644526e-04,
9091                  qq5  = -3.96022827877536812320e-06;
9092
9093     double r, s, z, y;
9094     UINT32 ix;
9095     int sign;
9096
9097     ix = *(ULONGLONG*)&x >> 32;
9098     sign = ix >> 31;
9099     ix &= 0x7fffffff;
9100     if (ix >= 0x7ff00000) {
9101         /* erfc(nan)=nan, erfc(+-inf)=0,2 */
9102         return 2 * sign + 1 / x;
9103     }
9104     if (ix < 0x3feb0000) { /* |x| < 0.84375 */
9105         if (ix < 0x3c700000) /* |x| < 2**-56 */
9106             return 1.0 - x;
9107         z = x * x;
9108         r = pp0 + z * (pp1 + z * (pp2 + z * (pp3 + z * pp4)));
9109         s = 1.0 + z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5))));
9110         y = r / s;
9111         if (sign || ix < 0x3fd00000) { /* x < 1/4 */
9112             return 1.0 - (x + x * y);
9113         }
9114         return 0.5 - (x - 0.5 + x * y);
9115     }
9116     if (ix < 0x403c0000) { /* 0.84375 <= |x| < 28 */
9117         return sign ? 2 - erfc2(ix, x) : erfc2(ix, x);
9118     }
9119     if (sign)
9120         return 2 - DBL_MIN;
9121     *_errno() = ERANGE;
9122     return fp_barrier(DBL_MIN) * DBL_MIN;
9123 }
9124
9125 /*********************************************************************
9126  *      erfcf (MSVCR120.@)
9127  *
9128  * Copied from musl: src/math/erff.c
9129  */
9130 float CDECL erfcf(float x)
9131 {
9132     static const float pp0  =  1.2837916613e-01,
9133                  pp1  = -3.2504209876e-01,
9134                  pp2  = -2.8481749818e-02,
9135                  pp3  = -5.7702702470e-03,
9136                  pp4  = -2.3763017452e-05,
9137                  qq1  =  3.9791721106e-01,
9138                  qq2  =  6.5022252500e-02,
9139                  qq3  =  5.0813062117e-03,
9140                  qq4  =  1.3249473704e-04,
9141                  qq5  = -3.9602282413e-06;
9142
9143     float r, s, z, y;
9144     UINT32 ix;
9145     int sign;
9146
9147     ix = *(UINT32*)&x;
9148     sign = ix >> 31;
9149     ix &= 0x7fffffff;
9150     if (ix >= 0x7f800000) {
9151         /* erfc(nan)=nan, erfc(+-inf)=0,2 */
9152         return 2 * sign + 1 / x;
9153     }
9154
9155     if (ix < 0x3f580000) { /* |x| < 0.84375 */
9156         if (ix < 0x23800000) /* |x| < 2**-56 */
9157             return 1.0f - x;
9158         z = x * x;
9159         r = pp0 + z * (pp1 + z * (pp2 + z * (pp3 + z * pp4)));
9160         s = 1.0f + z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5))));
9161         y = r / s;
9162         if (sign || ix < 0x3e800000) /* x < 1/4 */
9163             return 1.0f - (x + x * y);
9164         return 0.5f - (x - 0.5f + x * y);
9165     }
9166     if (ix < 0x41e00000) { /* |x| < 28 */
9167         return sign ? 2 - erfc2f(ix, x) : erfc2f(ix, x);
9168     }
9169     if (sign)
9170         return 2 - FLT_MIN;
9171     *_errno() = ERANGE;
9172     return FLT_MIN * FLT_MIN;
9173 }
9174
9175 /*********************************************************************
9176  *      fmaxf (MSVCR120.@)
9177  */
9178 float CDECL fmaxf(float x, float y)
9179 {
9180     if(isnan(x))
9181         return y;
9182     if(isnan(y))
9183         return x;
9184     if(x==0 && y==0)
9185         return signbit(x) ? y : x;
9186     return x<y ? y : x;
9187 }
9188
9189 /*********************************************************************
9190  *      fmax (MSVCR120.@)
9191  */
9192 double CDECL fmax(double x, double y)
9193 {
9194     if(isnan(x))
9195         return y;
9196     if(isnan(y))
9197         return x;
9198     if(x==0 && y==0)
9199         return signbit(x) ? y : x;
9200     return x<y ? y : x;
9201 }
9202
9203 /*********************************************************************
9204  *      fdimf (MSVCR120.@)
9205  */
9206 float CDECL fdimf(float x, float y)
9207 {
9208     if(isnan(x))
9209         return x;
9210     if(isnan(y))
9211         return y;
9212     return x>y ? x-y : 0;
9213 }
9214
9215 /*********************************************************************
9216  *      fdim (MSVCR120.@)
9217  */
9218 double CDECL fdim(double x, double y)
9219 {
9220     if(isnan(x))
9221         return x;
9222     if(isnan(y))
9223         return y;
9224     return x>y ? x-y : 0;
9225 }
9226
9227 /*********************************************************************
9228  *      _fdsign (MSVCR120.@)
9229  */
9230 int CDECL _fdsign(float x)
9231 {
9232     union { float f; UINT32 i; } u = { x };
9233     return (u.i >> 16) & 0x8000;
9234 }
9235
9236 /*********************************************************************
9237  *      _dsign (MSVCR120.@)
9238  */
9239 int CDECL _dsign(double x)
9240 {
9241     union { double f; UINT64 i; } u = { x };
9242     return (u.i >> 48) & 0x8000;
9243 }
9244
9245
9246 /*********************************************************************
9247  *      _dpcomp (MSVCR120.@)
9248  */
9249 int CDECL _dpcomp(double x, double y)
9250 {
9251     if(isnan(x) || isnan(y))
9252         return 0;
9253
9254     if(x == y) return 2;
9255     return x < y ? 1 : 4;
9256 }
9257
9258 /*********************************************************************
9259  *      _fdpcomp (MSVCR120.@)
9260  */
9261 int CDECL _fdpcomp(float x, float y)
9262 {
9263     return _dpcomp(x, y);
9264 }
9265
9266 /*********************************************************************
9267  *      fminf (MSVCR120.@)
9268  */
9269 float CDECL fminf(float x, float y)
9270 {
9271     if(isnan(x))
9272         return y;
9273     if(isnan(y))
9274         return x;
9275     if(x==0 && y==0)
9276         return signbit(x) ? x : y;
9277     return x<y ? x : y;
9278 }
9279
9280 /*********************************************************************
9281  *      fmin (MSVCR120.@)
9282  */
9283 double CDECL fmin(double x, double y)
9284 {
9285     if(isnan(x))
9286         return y;
9287     if(isnan(y))
9288         return x;
9289     if(x==0 && y==0)
9290         return signbit(x) ? x : y;
9291     return x<y ? x : y;
9292 }
9293
9294 /*********************************************************************
9295  *      asinh (MSVCR120.@)
9296  *
9297  * Copied from musl: src/math/asinh.c
9298  */
9299 double CDECL asinh(double x)
9300 {
9301     UINT64 ux = *(UINT64*)&x;
9302     int e = ux >> 52 & 0x7ff;
9303     int s = ux >> 63;
9304
9305     /* |x| */
9306     ux &= (UINT64)-1 / 2;
9307     x = *(double*)&ux;
9308
9309     if (e >= 0x3ff + 26) /* |x| >= 0x1p26 or inf or nan */
9310         x = log(x) + 0.693147180559945309417232121458176568;
9311     else if (e >= 0x3ff + 1) /* |x| >= 2 */
9312         x = log(2 * x + 1 / (sqrt(x * x + 1) + x));
9313     else if (e >= 0x3ff - 26) /* |x| >= 0x1p-26 */
9314         x = log1p(x + x * x / (sqrt(x * x + 1) + 1));
9315     else /* |x| < 0x1p-26, raise inexact if x != 0 */
9316         fp_barrier(x + 0x1p120f);
9317     return s ? -x : x;
9318 }
9319
9320 /*********************************************************************
9321  *      asinhf (MSVCR120.@)
9322  *
9323  * Copied from musl: src/math/asinhf.c
9324  */
9325 float CDECL asinhf(float x)
9326 {
9327     UINT32 ux = *(UINT32*)&x;
9328     UINT32 i = ux & 0x7fffffff;
9329     int s = ux >> 31;
9330
9331     /* |x| */
9332     x = *(float*)&i;
9333
9334     if (i >= 0x3f800000 + (12 << 23))/* |x| >= 0x1p12 or inf or nan */
9335         x = logf(x) + 0.693147180559945309417232121458176568f;
9336     else if (i >= 0x3f800000 + (1 << 23)) /* |x| >= 2 */
9337         x = logf(2 * x + 1 / (sqrtf(x * x + 1) + x));
9338     else if (i >= 0x3f800000 - (12 << 23)) /* |x| >= 0x1p-12 */
9339         x = log1pf(x + x * x / (sqrtf(x * x + 1) + 1));
9340     else /* |x| < 0x1p-12, raise inexact if x!=0 */
9341         fp_barrierf(x + 0x1p120f);
9342     return s ? -x : x;
9343 }
9344
9345 /*********************************************************************
9346  *      acosh (MSVCR120.@)
9347  *
9348  * Copied from musl: src/math/acosh.c
9349  */
9350 double CDECL acosh(double x)
9351 {
9352     int e = *(UINT64*)&x >> 52 & 0x7ff;
9353
9354     if (x < 1)
9355     {
9356         *_errno() = EDOM;
9357         feraiseexcept(FE_INVALID);
9358         return NAN;
9359     }
9360
9361     if (e < 0x3ff + 1) /* |x| < 2, up to 2ulp error in [1,1.125] */
9362         return log1p(x - 1 + sqrt((x - 1) * (x - 1) + 2 * (x - 1)));
9363     if (e < 0x3ff + 26) /* |x| < 0x1p26 */
9364         return log(2 * x - 1 / (x + sqrt(x * x - 1)));
9365     /* |x| >= 0x1p26 or nan */
9366     return log(x) + 0.693147180559945309417232121458176568;
9367 }
9368
9369 /*********************************************************************
9370  *      acoshf (MSVCR120.@)
9371  *
9372  * Copied from musl: src/math/acoshf.c
9373  */
9374 float CDECL acoshf(float x)
9375 {
9376     UINT32 a = *(UINT32*)&x & 0x7fffffff;
9377
9378     if (x < 1)
9379     {
9380         *_errno() = EDOM;
9381         feraiseexcept(FE_INVALID);
9382         return NAN;
9383     }
9384
9385     if (a < 0x3f800000 + (1 << 23)) /* |x| < 2, up to 2ulp error in [1,1.125] */
9386         return log1pf(x - 1 + sqrtf((x - 1) * (x - 1) + 2 * (x - 1)));
9387     if (*(UINT32*)&x < 0x3f800000 + (12 << 23)) /* 2 <= x < 0x1p12 */
9388         return logf(2 * x - 1 / (x + sqrtf(x * x - 1)));
9389     /* x >= 0x1p12 or x <= -2 or nan */
9390     return logf(x) + 0.693147180559945309417232121458176568f;
9391 }
9392
9393 /*********************************************************************
9394  *      atanh (MSVCR120.@)
9395  *
9396  * Copied from musl: src/math/atanh.c
9397  */
9398 double CDECL atanh(double x)
9399 {
9400     UINT64 ux = *(UINT64*)&x;
9401     int e = ux >> 52 & 0x7ff;
9402     int s = ux >> 63;
9403
9404     /* |x| */
9405     ux &= (UINT64)-1 / 2;
9406     x = *(double*)&ux;
9407
9408     if (x > 1) {
9409         *_errno() = EDOM;
9410         feraiseexcept(FE_INVALID);
9411         return NAN;
9412     }
9413
9414     if (e < 0x3ff - 1) {
9415         if (e < 0x3ff - 32) {
9416             fp_barrier(x + 0x1p120f);
9417             if (e == 0) /* handle underflow */
9418                 fp_barrier(x * x);
9419         } else { /* |x| < 0.5, up to 1.7ulp error */
9420             x = 0.5 * log1p(2 * x + 2 * x * x / (1 - x));
9421         }
9422     } else { /* avoid overflow */
9423         x = 0.5 * log1p(2 * (x / (1 - x)));
9424         if (isinf(x)) *_errno() = ERANGE;
9425     }
9426     return s ? -x : x;
9427 }
9428
9429 /*********************************************************************
9430  *      atanhf (MSVCR120.@)
9431  *
9432  * Copied from musl: src/math/atanhf.c
9433  */
9434 float CDECL atanhf(float x)
9435 {
9436     UINT32 ux = *(UINT32*)&x;
9437     int s = ux >> 31;
9438
9439     /* |x| */
9440     ux &= 0x7fffffff;
9441     x = *(float*)&ux;
9442
9443     if (x > 1) {
9444         *_errno() = EDOM;
9445         feraiseexcept(FE_INVALID);
9446         return NAN;
9447     }
9448
9449     if (ux < 0x3f800000 - (1 << 23)) {
9450         if (ux < 0x3f800000 - (32 << 23)) {
9451             fp_barrierf(x + 0x1p120f);
9452             if (ux < (1 << 23)) /* handle underflow */
9453                 fp_barrierf(x * x);
9454         } else { /* |x| < 0.5, up to 1.7ulp error */
9455             x = 0.5f * log1pf(2 * x + 2 * x * x / (1 - x));
9456         }
9457     } else { /* avoid overflow */
9458         x = 0.5f * log1pf(2 * (x / (1 - x)));
9459         if (isinf(x)) *_errno() = ERANGE;
9460     }
9461     return s ? -x : x;
9462 }
9463
9464 #endif /* _MSVCR_VER>=120 */
9465
9466 /*********************************************************************
9467  *      _scalb  (MSVCRT.@)
9468  *      scalbn  (MSVCR120.@)
9469  *      scalbln (MSVCR120.@)
9470  */
9471 double CDECL _scalb(double num, __msvcrt_long power)
9472 {
9473   return ldexp(num, power);
9474 }
9475
9476 /*********************************************************************
9477  *      _scalbf  (MSVCRT.@)
9478  *      scalbnf  (MSVCR120.@)
9479  *      scalblnf (MSVCR120.@)
9480  */
9481 float CDECL _scalbf(float num, __msvcrt_long power)
9482 {
9483   return ldexp(num, power);
9484 }
9485
9486 #if _MSVCR_VER>=120
9487
9488 /*********************************************************************
9489  *      remainder (MSVCR120.@)
9490  *
9491  * Copied from musl: src/math/remainder.c
9492  */
9493 double CDECL remainder(double x, double y)
9494 {
9495     int q;
9496 #if _MSVCR_VER == 120 && defined(__x86_64__)
9497     if (isnan(x) || isnan(y)) *_errno() = EDOM;
9498 #endif
9499     return remquo(x, y, &q);
9500 }
9501
9502 /*********************************************************************
9503  *      remainderf (MSVCR120.@)
9504  *
9505  * Copied from musl: src/math/remainderf.c
9506  */
9507 float CDECL remainderf(float x, float y)
9508 {
9509     int q;
9510 #if _MSVCR_VER == 120 && defined(__x86_64__)
9511     if (isnan(x) || isnan(y)) *_errno() = EDOM;
9512 #endif
9513     return remquof(x, y, &q);
9514 }
9515
9516 /*********************************************************************
9517  *      remquo (MSVCR120.@)
9518  *
9519  * Copied from musl: src/math/remquo.c
9520  */
9521 double CDECL remquo(double x, double y, int *quo)
9522 {
9523     UINT64 uxi = *(UINT64*)&x;
9524     UINT64 uyi = *(UINT64*)&y;
9525     int ex = uxi >> 52 & 0x7ff;
9526     int ey = uyi >> 52 & 0x7ff;
9527     int sx = uxi >> 63;
9528     int sy = uyi >> 63;
9529     UINT32 q;
9530     UINT64 i;
9531
9532     *quo = 0;
9533     if (y == 0 || isinf(x)) *_errno() = EDOM;
9534     if (uyi << 1 == 0 || isnan(y) || ex == 0x7ff)
9535         return (x * y) / (x * y);
9536     if (uxi << 1 == 0)
9537         return x;
9538
9539     /* normalize x and y */
9540     if (!ex) {
9541         for (i = uxi << 12; i >> 63 == 0; ex--, i <<= 1);
9542         uxi <<= -ex + 1;
9543     } else {
9544         uxi &= -1ULL >> 12;
9545         uxi |= 1ULL << 52;
9546     }
9547     if (!ey) {
9548         for (i = uyi << 12; i >> 63 == 0; ey--, i <<= 1);
9549         uyi <<= -ey + 1;
9550     } else {
9551         uyi &= -1ULL >> 12;
9552         uyi |= 1ULL << 52;
9553     }
9554
9555     q = 0;
9556     if (ex < ey) {
9557         if (ex+1 == ey)
9558             goto end;
9559         return x;
9560     }
9561
9562     /* x mod y */
9563     for (; ex > ey; ex--) {
9564         i = uxi - uyi;
9565         if (i >> 63 == 0) {
9566             uxi = i;
9567             q++;
9568         }
9569         uxi <<= 1;
9570         q <<= 1;
9571     }
9572     i = uxi - uyi;
9573     if (i >> 63 == 0) {
9574         uxi = i;
9575         q++;
9576     }
9577     if (uxi == 0)
9578         ex = -60;
9579     else
9580         for (; uxi >> 52 == 0; uxi <<= 1, ex--);
9581 end:
9582     /* scale result and decide between |x| and |x|-|y| */
9583     if (ex > 0) {
9584         uxi -= 1ULL << 52;
9585         uxi |= (UINT64)ex << 52;
9586     } else {
9587         uxi >>= -ex + 1;
9588     }
9589     x = *(double*)&uxi;
9590     if (sy)
9591         y = -y;
9592     if (ex == ey || (ex + 1 == ey && (2 * x > y || (2 * x == y && q % 2)))) {
9593         x -= y;
9594         q++;
9595     }
9596     q &= 0x7fffffff;
9597     *quo = sx ^ sy ? -(int)q : (int)q;
9598     return sx ? -x : x;
9599 }
9600
9601 /*********************************************************************
9602  *      remquof (MSVCR120.@)
9603  *
9604  * Copied from musl: src/math/remquof.c
9605  */
9606 float CDECL remquof(float x, float y, int *quo)
9607 {
9608     UINT32 uxi = *(UINT32*)&x;
9609     UINT32 uyi = *(UINT32*)&y;
9610     int ex = uxi >> 23 & 0xff;
9611     int ey = uyi >> 23 & 0xff;
9612     int sx = uxi >> 31;
9613     int sy = uyi>> 31;
9614     UINT32 q, i;
9615
9616     *quo = 0;
9617     if (y == 0 || isinf(x)) *_errno() = EDOM;
9618     if (uyi << 1 == 0 || isnan(y) || ex == 0xff)
9619         return (x * y) / (x * y);
9620     if (uxi << 1 == 0)
9621         return x;
9622
9623     /* normalize x and y */
9624     if (!ex) {
9625         for (i = uxi << 9; i >> 31 == 0; ex--, i <<= 1);
9626         uxi <<= -ex + 1;
9627     } else {
9628         uxi &= -1U >> 9;
9629         uxi |= 1U << 23;
9630     }
9631     if (!ey) {
9632         for (i = uyi << 9; i >> 31 == 0; ey--, i <<= 1);
9633         uyi <<= -ey + 1;
9634     } else {
9635         uyi &= -1U >> 9;
9636         uyi |= 1U << 23;
9637     }
9638
9639     q = 0;
9640     if (ex < ey) {
9641         if (ex + 1 == ey)
9642             goto end;
9643         return x;
9644     }
9645
9646     /* x mod y */
9647     for (; ex > ey; ex--) {
9648         i = uxi - uyi;
9649         if (i >> 31 == 0) {
9650             uxi = i;
9651             q++;
9652         }
9653         uxi <<= 1;
9654         q <<= 1;
9655     }
9656     i = uxi - uyi;
9657     if (i >> 31 == 0) {
9658         uxi = i;
9659         q++;
9660     }
9661     if (uxi == 0)
9662         ex = -30;
9663     else
9664         for (; uxi >> 23 == 0; uxi <<= 1, ex--);
9665 end:
9666     /* scale result and decide between |x| and |x|-|y| */
9667     if (ex > 0) {
9668         uxi -= 1U << 23;
9669         uxi |= (UINT32)ex << 23;
9670     } else {
9671         uxi >>= -ex + 1;
9672     }
9673     x = *(float*)&uxi;
9674     if (sy)
9675         y = -y;
9676     if (ex == ey || (ex + 1 == ey && (2 * x > y || (2 * x == y && q % 2)))) {
9677         x -= y;
9678         q++;
9679     }
9680     q &= 0x7fffffff;
9681     *quo = sx ^ sy ? -(int)q : (int)q;
9682     return sx ? -x : x;
9683 }
9684
9685 /* sin(pi*x) assuming x > 2^-100, if sin(pi*x)==0 the sign is arbitrary */
9686 static double sin_pi(double x)
9687 {
9688     int n;
9689
9690     /* spurious inexact if odd int */
9691     x = 2.0 * (x * 0.5 - floor(x * 0.5)); /* x mod 2.0 */
9692
9693     n = x * 4.0;
9694     n = (n + 1) / 2;
9695     x -= n * 0.5f;
9696     x *= M_PI;
9697
9698     switch (n) {
9699     default: /* case 4: */
9700     case 0: return __sin(x, 0.0, 0);
9701     case 1: return __cos(x, 0.0);
9702     case 2: return __sin(-x, 0.0, 0);
9703     case 3: return -__cos(x, 0.0);
9704     }
9705 }
9706
9707 /*********************************************************************
9708  *      lgamma (MSVCR120.@)
9709  *
9710  * Copied from musl: src/math/lgamma_r.c
9711  */
9712 double CDECL lgamma(double x)
9713 {
9714     static const double pi = 3.14159265358979311600e+00,
9715         a0 = 7.72156649015328655494e-02,
9716         a1 = 3.22467033424113591611e-01,
9717         a2 = 6.73523010531292681824e-02,
9718         a3 = 2.05808084325167332806e-02,
9719         a4 = 7.38555086081402883957e-03,
9720         a5 = 2.89051383673415629091e-03,
9721         a6 = 1.19270763183362067845e-03,
9722         a7 = 5.10069792153511336608e-04,
9723         a8 = 2.20862790713908385557e-04,
9724         a9 = 1.08011567247583939954e-04,
9725         a10 = 2.52144565451257326939e-05,
9726         a11 = 4.48640949618915160150e-05,
9727         tc = 1.46163214496836224576e+00,
9728         tf = -1.21486290535849611461e-01,
9729         tt = -3.63867699703950536541e-18,
9730         t0 = 4.83836122723810047042e-01,
9731         t1 = -1.47587722994593911752e-01,
9732         t2 = 6.46249402391333854778e-02,
9733         t3 = -3.27885410759859649565e-02,
9734         t4 = 1.79706750811820387126e-02,
9735         t5 = -1.03142241298341437450e-02,
9736         t6 = 6.10053870246291332635e-03,
9737         t7 = -3.68452016781138256760e-03,
9738         t8 = 2.25964780900612472250e-03,
9739         t9 = -1.40346469989232843813e-03,
9740         t10 = 8.81081882437654011382e-04,
9741         t11 = -5.38595305356740546715e-04,
9742         t12 = 3.15632070903625950361e-04,
9743         t13 = -3.12754168375120860518e-04,
9744         t14 = 3.35529192635519073543e-04,
9745         u0 = -7.72156649015328655494e-02,
9746         u1 = 6.32827064025093366517e-01,
9747         u2 = 1.45492250137234768737e+00,
9748         u3 = 9.77717527963372745603e-01,
9749         u4 = 2.28963728064692451092e-01,
9750         u5 = 1.33810918536787660377e-02,
9751         v1 = 2.45597793713041134822e+00,
9752         v2 = 2.12848976379893395361e+00,
9753         v3 = 7.69285150456672783825e-01,
9754         v4 = 1.04222645593369134254e-01,
9755         v5 = 3.21709242282423911810e-03,
9756         s0 = -7.72156649015328655494e-02,
9757         s1 = 2.14982415960608852501e-01,
9758         s2 = 3.25778796408930981787e-01,
9759         s3 = 1.46350472652464452805e-01,
9760         s4 = 2.66422703033638609560e-02,
9761         s5 = 1.84028451407337715652e-03,
9762         s6 = 3.19475326584100867617e-05,
9763         r1 = 1.39200533467621045958e+00,
9764         r2 = 7.21935547567138069525e-01,
9765         r3 = 1.71933865632803078993e-01,
9766         r4 = 1.86459191715652901344e-02,
9767         r5 = 7.77942496381893596434e-04,
9768         r6 = 7.32668430744625636189e-06,
9769         w0 = 4.18938533204672725052e-01,
9770         w1 = 8.33333333333329678849e-02,
9771         w2 = -2.77777777728775536470e-03,
9772         w3 = 7.93650558643019558500e-04,
9773         w4 = -5.95187557450339963135e-04,
9774         w5 = 8.36339918996282139126e-04,
9775         w6 = -1.63092934096575273989e-03;
9776
9777     union {double f; UINT64 i;} u = {x};
9778     double t, y, z, nadj, p, p1, p2, p3, q, r, w;
9779     UINT32 ix;
9780     int sign,i;
9781
9782     /* purge off +-inf, NaN, +-0, tiny and negative arguments */
9783     sign = u.i >> 63;
9784     ix = u.i >> 32 & 0x7fffffff;
9785     if (ix >= 0x7ff00000)
9786         return x * x;
9787     if (ix < (0x3ff - 70) << 20) { /* |x|<2**-70, return -log(|x|) */
9788         if(sign)
9789             x = -x;
9790         return -log(x);
9791     }
9792     if (sign) {
9793         x = -x;
9794         t = sin_pi(x);
9795         if (t == 0.0) { /* -integer */
9796             *_errno() = ERANGE;
9797             return 1.0 / (x - x);
9798         }
9799         if (t <= 0.0)
9800             t = -t;
9801         nadj = log(pi / (t * x));
9802     }
9803
9804     /* purge off 1 and 2 */
9805     if ((ix == 0x3ff00000 || ix == 0x40000000) && (UINT32)u.i == 0)
9806         r = 0;
9807     /* for x < 2.0 */
9808     else if (ix < 0x40000000) {
9809         if (ix <= 0x3feccccc) { /* lgamma(x) = lgamma(x+1)-log(x) */
9810             r = -log(x);
9811             if (ix >= 0x3FE76944) {
9812                 y = 1.0 - x;
9813                 i = 0;
9814             } else if (ix >= 0x3FCDA661) {
9815                 y = x - (tc - 1.0);
9816                 i = 1;
9817             } else {
9818                 y = x;
9819                 i = 2;
9820             }
9821         } else {
9822             r = 0.0;
9823             if (ix >= 0x3FFBB4C3) { /* [1.7316,2] */
9824                 y = 2.0 - x;
9825                 i = 0;
9826             } else if(ix >= 0x3FF3B4C4) { /* [1.23,1.73] */
9827                 y = x - tc;
9828                 i = 1;
9829             } else {
9830                 y = x - 1.0;
9831                 i = 2;
9832             }
9833         }
9834         switch (i) {
9835         case 0:
9836             z = y * y;
9837             p1 = a0 + z * (a2 + z * (a4 + z * (a6 + z * (a8 + z * a10))));
9838             p2 = z * (a1 + z * (a3 + z * (a5 + z * (a7 + z * (a9 + z * a11)))));
9839             p = y * p1 + p2;
9840             r += (p - 0.5 * y);
9841             break;
9842         case 1:
9843             z = y * y;
9844             w = z * y;
9845             p1 = t0 + w * (t3 + w * (t6 + w * (t9 + w * t12))); /* parallel comp */
9846             p2 = t1 + w * (t4 + w * (t7 + w * (t10 + w * t13)));
9847             p3 = t2 + w * (t5 + w * (t8 + w * (t11 + w * t14)));
9848             p = z * p1 - (tt - w * (p2 + y * p3));
9849             r += tf + p;
9850             break;
9851         case 2:
9852             p1 = y * (u0 + y * (u1 + y * (u2 + y * (u3 + y * (u4 + y * u5)))));
9853             p2 = 1.0 + y * (v1 + y * (v2 + y * (v3 + y * (v4 + y * v5))));
9854             r += -0.5 * y + p1 / p2;
9855         }
9856     } else if (ix < 0x40200000) { /* x < 8.0 */
9857         i = (int)x;
9858         y = x - (double)i;
9859         p = y * (s0 + y * (s1 + y * (s2 + y * (s3 + y * (s4 + y * (s5 + y * s6))))));
9860         q = 1.0 + y * (r1 + y * (r2 + y * (r3 + y * (r4 + y * (r5 + y * r6)))));
9861         r = 0.5 * y + p / q;
9862         z = 1.0; /* lgamma(1+s) = log(s) + lgamma(s) */
9863         switch (i) {
9864         case 7: z *= y + 6.0; /* fall through */
9865         case 6: z *= y + 5.0; /* fall through */
9866         case 5: z *= y + 4.0; /* fall through */
9867         case 4: z *= y + 3.0; /* fall through */
9868         case 3:
9869             z *= y + 2.0;
9870             r += log(z);
9871             break;
9872         }
9873     } else if (ix < 0x43900000) { /* 8.0 <= x < 2**58 */
9874         t = log(x);
9875         z = 1.0 / x;
9876         y = z * z;
9877         w = w0 + z * (w1 + y * (w2 + y * (w3 + y * (w4 + y * (w5 + y * w6)))));
9878         r = (x - 0.5) * (t - 1.0) + w;
9879     } else /* 2**58 <= x <= inf */
9880         r = x * (log(x) - 1.0);
9881     if (sign)
9882         r = nadj - r;
9883     return r;
9884 }
9885
9886 /* sin(pi*x) assuming x > 2^-100, if sin(pi*x)==0 the sign is arbitrary */
9887 static float sinf_pi(float x)
9888 {
9889     double y;
9890     int n;
9891
9892     /* spurious inexact if odd int */
9893     x = 2 * (x * 0.5f - floorf(x * 0.5f)); /* x mod 2.0 */
9894
9895     n = (int)(x * 4);
9896     n = (n + 1) / 2;
9897     y = x - n * 0.5f;
9898     y *= M_PI;
9899     switch (n) {
9900     default: /* case 4: */
9901     case 0: return __sindf(y);
9902     case 1: return __cosdf(y);
9903     case 2: return __sindf(-y);
9904     case 3: return -__cosdf(y);
9905     }
9906 }
9907
9908 /*********************************************************************
9909  *      lgammaf (MSVCR120.@)
9910  *
9911  * Copied from musl: src/math/lgammaf_r.c
9912  */
9913 float CDECL lgammaf(float x)
9914 {
9915     static const float pi = 3.1415927410e+00,
9916         a0 = 7.7215664089e-02,
9917         a1 = 3.2246702909e-01,
9918         a2 = 6.7352302372e-02,
9919         a3 = 2.0580807701e-02,
9920         a4 = 7.3855509982e-03,
9921         a5 = 2.8905137442e-03,
9922         a6 = 1.1927076848e-03,
9923         a7 = 5.1006977446e-04,
9924         a8 = 2.2086278477e-04,
9925         a9 = 1.0801156895e-04,
9926         a10 = 2.5214456400e-05,
9927         a11 = 4.4864096708e-05,
9928         tc = 1.4616321325e+00,
9929         tf = -1.2148628384e-01,
9930         tt = 6.6971006518e-09,
9931         t0 = 4.8383611441e-01,
9932         t1 = -1.4758771658e-01,
9933         t2 = 6.4624942839e-02,
9934         t3 = -3.2788541168e-02,
9935         t4 = 1.7970675603e-02,
9936         t5 = -1.0314224288e-02,
9937         t6 = 6.1005386524e-03,
9938         t7 = -3.6845202558e-03,
9939         t8 = 2.2596477065e-03,
9940         t9 = -1.4034647029e-03,
9941         t10 = 8.8108185446e-04,
9942         t11 = -5.3859531181e-04,
9943         t12 = 3.1563205994e-04,
9944         t13 = -3.1275415677e-04,
9945         t14 = 3.3552918467e-04,
9946         u0 = -7.7215664089e-02,
9947         u1 = 6.3282704353e-01,
9948         u2 = 1.4549225569e+00,
9949         u3 = 9.7771751881e-01,
9950         u4 = 2.2896373272e-01,
9951         u5 = 1.3381091878e-02,
9952         v1 = 2.4559779167e+00,
9953         v2 = 2.1284897327e+00,
9954         v3 = 7.6928514242e-01,
9955         v4 = 1.0422264785e-01,
9956         v5 = 3.2170924824e-03,
9957         s0 = -7.7215664089e-02,
9958         s1 = 2.1498242021e-01,
9959         s2 = 3.2577878237e-01,
9960         s3 = 1.4635047317e-01,
9961         s4 = 2.6642270386e-02,
9962         s5 = 1.8402845599e-03,
9963         s6 = 3.1947532989e-05,
9964         r1 = 1.3920053244e+00,
9965         r2 = 7.2193557024e-01,
9966         r3 = 1.7193385959e-01,
9967         r4 = 1.8645919859e-02,
9968         r5 = 7.7794247773e-04,
9969         r6 = 7.3266842264e-06,
9970         w0 = 4.1893854737e-01,
9971         w1 = 8.3333335817e-02,
9972         w2 = -2.7777778450e-03,
9973         w3 = 7.9365057172e-04,
9974         w4 = -5.9518753551e-04,
9975         w5 = 8.3633989561e-04,
9976         w6 = -1.6309292987e-03;
9977
9978     union {float f; UINT32 i;} u = {x};
9979     float t, y, z, nadj, p, p1, p2, p3, q, r, w;
9980     UINT32 ix;
9981     int i, sign;
9982
9983     /* purge off +-inf, NaN, +-0, tiny and negative arguments */
9984     sign = u.i >> 31;
9985     ix = u.i & 0x7fffffff;
9986     if (ix >= 0x7f800000)
9987         return x * x;
9988     if (ix < 0x35000000) { /* |x| < 2**-21, return -log(|x|) */
9989         if (sign)
9990             x = -x;
9991         return -logf(x);
9992     }
9993     if (sign) {
9994         x = -x;
9995         t = sinf_pi(x);
9996         if (t == 0.0f) { /* -integer */
9997             *_errno() = ERANGE;
9998             return 1.0f / (x - x);
9999         }
10000         if (t <= 0.0f)
10001             t = -t;
10002         nadj = logf(pi / (t * x));
10003     }
10004
10005     /* purge off 1 and 2 */
10006     if (ix == 0x3f800000 || ix == 0x40000000)
10007         r = 0;
10008     /* for x < 2.0 */
10009     else if (ix < 0x40000000) {
10010         if (ix <= 0x3f666666) { /* lgamma(x) = lgamma(x+1)-log(x) */
10011             r = -logf(x);
10012             if (ix >= 0x3f3b4a20) {
10013                 y = 1.0f - x;
10014                 i = 0;
10015             } else if (ix >= 0x3e6d3308) {
10016                 y = x - (tc - 1.0f);
10017                 i = 1;
10018             } else {
10019                 y = x;
10020                 i = 2;
10021             }
10022         } else {
10023             r = 0.0f;
10024             if (ix >= 0x3fdda618) { /* [1.7316,2] */
10025                 y = 2.0f - x;
10026                 i = 0;
10027             } else if (ix >= 0x3F9da620) { /* [1.23,1.73] */
10028                 y = x - tc;
10029                 i = 1;
10030             } else {
10031                 y = x - 1.0f;
10032                 i = 2;
10033             }
10034         }
10035         switch(i) {
10036         case 0:
10037             z = y * y;
10038             p1 = a0 + z * (a2 + z * (a4 + z * (a6 + z * (a8 + z * a10))));
10039             p2 = z * (a1 + z * (a3 + z * (a5 + z * (a7 + z * (a9 + z * a11)))));
10040             p = y * p1 + p2;
10041             r += p - 0.5f * y;
10042             break;
10043         case 1:
10044             z = y * y;
10045             w = z * y;
10046             p1 = t0 + w * (t3 + w * (t6 + w * (t9 + w * t12))); /* parallel comp */
10047             p2 = t1 + w * (t4 + w * (t7 + w * (t10 + w * t13)));
10048             p3 = t2 + w * (t5 + w * (t8 + w * (t11 + w * t14)));
10049             p = z * p1 - (tt - w * (p2 + y * p3));
10050             r += (tf + p);
10051             break;
10052         case 2:
10053             p1 = y * (u0 + y * (u1 + y * (u2 + y * (u3 + y * (u4 + y * u5)))));
10054             p2 = 1.0f + y * (v1 + y * (v2 + y * (v3 + y * (v4 + y * v5))));
10055             r += -0.5f * y + p1 / p2;
10056         }
10057     } else if (ix < 0x41000000) { /* x < 8.0 */
10058         i = (int)x;
10059         y = x - (float)i;
10060         p = y * (s0 + y * (s1 + y * (s2 + y * (s3 + y * (s4 + y * (s5 + y * s6))))));
10061         q = 1.0f + y * (r1 + y * (r2 + y * (r3 + y * (r4 + y * (r5 + y * r6)))));
10062         r = 0.5f * y + p / q;
10063         z = 1.0f; /* lgamma(1+s) = log(s) + lgamma(s) */
10064         switch (i) {
10065         case 7: z *= y + 6.0f; /* fall through */
10066         case 6: z *= y + 5.0f; /* fall through */
10067         case 5: z *= y + 4.0f; /* fall through */
10068         case 4: z *= y + 3.0f; /* fall through */
10069         case 3:
10070             z *= y + 2.0f;
10071             r += logf(z);
10072             break;
10073         }
10074     } else if (ix < 0x5c800000) { /* 8.0 <= x < 2**58 */
10075         t = logf(x);
10076         z = 1.0f / x;
10077         y = z * z;
10078         w = w0 + z * (w1 + y * (w2 + y * (w3 + y * (w4 + y * (w5 + y * w6)))));
10079         r = (x - 0.5f) * (t - 1.0f) + w;
10080     } else /* 2**58 <= x <= inf */
10081         r = x * (logf(x) - 1.0f);
10082     if (sign)
10083         r = nadj - r;
10084     return r;
10085 }
10086
10087 static double tgamma_S(double x)
10088 {
10089     static const double Snum[] = {
10090         23531376880.410759688572007674451636754734846804940,
10091         42919803642.649098768957899047001988850926355848959,
10092         35711959237.355668049440185451547166705960488635843,
10093         17921034426.037209699919755754458931112671403265390,
10094         6039542586.3520280050642916443072979210699388420708,
10095         1439720407.3117216736632230727949123939715485786772,
10096         248874557.86205415651146038641322942321632125127801,
10097         31426415.585400194380614231628318205362874684987640,
10098         2876370.6289353724412254090516208496135991145378768,
10099         186056.26539522349504029498971604569928220784236328,
10100         8071.6720023658162106380029022722506138218516325024,
10101         210.82427775157934587250973392071336271166969580291,
10102         2.5066282746310002701649081771338373386264310793408,
10103     };
10104     static const double Sden[] = {
10105         0, 39916800, 120543840, 150917976, 105258076, 45995730, 13339535,
10106         2637558, 357423, 32670, 1925, 66, 1,
10107     };
10108
10109     double num = 0, den = 0;
10110     int i;
10111
10112     /* to avoid overflow handle large x differently */
10113     if (x < 8)
10114         for (i = ARRAY_SIZE(Snum) - 1; i >= 0; i--) {
10115             num = num * x + Snum[i];
10116             den = den * x + Sden[i];
10117         }
10118     else
10119         for (i = 0; i < ARRAY_SIZE(Snum); i++) {
10120             num = num / x + Snum[i];
10121             den = den / x + Sden[i];
10122         }
10123     return num / den;
10124 }
10125
10126 /*********************************************************************
10127  *      tgamma (MSVCR120.@)
10128  *
10129  * Copied from musl: src/math/tgamma.c
10130  */
10131 double CDECL tgamma(double x)
10132 {
10133     static const double gmhalf = 5.524680040776729583740234375;
10134     static const double fact[] = {
10135         1, 1, 2, 6, 24, 120, 720, 5040.0, 40320.0, 362880.0, 3628800.0, 39916800.0,
10136         479001600.0, 6227020800.0, 87178291200.0, 1307674368000.0, 20922789888000.0,
10137         355687428096000.0, 6402373705728000.0, 121645100408832000.0,
10138         2432902008176640000.0, 51090942171709440000.0, 1124000727777607680000.0,
10139     };
10140
10141     union {double f; UINT64 i;} u = {x};
10142     double absx, y, dy, z, r;
10143     UINT32 ix = u.i >> 32 & 0x7fffffff;
10144     int sign = u.i >> 63;
10145
10146     /* special cases */
10147     if (ix >= 0x7ff00000) {
10148         /* tgamma(nan)=nan, tgamma(inf)=inf, tgamma(-inf)=nan with invalid */
10149         if (u.i == 0xfff0000000000000ULL)
10150             *_errno() = EDOM;
10151         return x + INFINITY;
10152     }
10153     if (ix < (0x3ff - 54) << 20) {
10154         /* |x| < 2^-54: tgamma(x) ~ 1/x, +-0 raises div-by-zero */
10155         if (x == 0.0)
10156             *_errno() = ERANGE;
10157         return 1 / x;
10158     }
10159
10160     /* integer arguments */
10161     /* raise inexact when non-integer */
10162     if (x == floor(x)) {
10163         if (sign) {
10164             *_errno() = EDOM;
10165             return 0 / (x - x);
10166         }
10167         if (x <= ARRAY_SIZE(fact))
10168             return fact[(int)x - 1];
10169     }
10170
10171     /* x >= 172: tgamma(x)=inf with overflow */
10172     /* x =< -184: tgamma(x)=+-0 with underflow */
10173     if (ix >= 0x40670000) { /* |x| >= 184 */
10174         *_errno() = ERANGE;
10175         if (sign) {
10176             fp_barrierf(0x1p-126 / x);
10177             return 0;
10178         }
10179         x *= 0x1p1023;
10180         return x;
10181     }
10182
10183     absx = sign ? -x : x;
10184
10185     /* handle the error of x + g - 0.5 */
10186     y = absx + gmhalf;
10187     if (absx > gmhalf) {
10188         dy = y - absx;
10189         dy -= gmhalf;
10190     } else {
10191         dy = y - gmhalf;
10192         dy -= absx;
10193     }
10194
10195     z = absx - 0.5;
10196     r = tgamma_S(absx) * exp(-y);
10197     if (x < 0) {
10198         /* reflection formula for negative x */
10199         /* sinpi(absx) is not 0, integers are already handled */
10200         r = -M_PI / (sin_pi(absx) * absx * r);
10201         dy = -dy;
10202         z = -z;
10203     }
10204     r += dy * (gmhalf + 0.5) * r / y;
10205     z = pow(y, 0.5 * z);
10206     y = r * z * z;
10207     return y;
10208 }
10209
10210 /*********************************************************************
10211  *      tgammaf (MSVCR120.@)
10212  *
10213  * Copied from musl: src/math/tgammaf.c
10214  */
10215 float CDECL tgammaf(float x)
10216 {
10217     return tgamma(x);
10218 }
10219
10220 /*********************************************************************
10221  *      nan (MSVCR120.@)
10222  */
10223 double CDECL nan(const char *tagp)
10224 {
10225     /* Windows ignores input (MSDN) */
10226     return NAN;
10227 }
10228
10229 /*********************************************************************
10230  *      nanf (MSVCR120.@)
10231  */
10232 float CDECL nanf(const char *tagp)
10233 {
10234     return NAN;
10235 }
10236
10237 /*********************************************************************
10238  *      _except1 (MSVCR120.@)
10239  *  TODO:
10240  *   - find meaning of ignored cw and operation bits
10241  *   - unk parameter
10242  */
10243 double CDECL _except1(DWORD fpe, _FP_OPERATION_CODE op, double arg, double res, DWORD cw, void *unk)
10244 {
10245     ULONG_PTR exception_arg;
10246     DWORD exception = 0;
10247     DWORD fpword = 0;
10248     WORD operation;
10249     int raise = 0;
10250
10251     TRACE("(%x %x %lf %lf %x %p)\n", fpe, op, arg, res, cw, unk);
10252
10253 #ifdef _WIN64
10254     cw = ((cw >> 7) & 0x3f) | ((cw >> 3) & 0xc00);
10255 #endif
10256     operation = op << 5;
10257     exception_arg = (ULONG_PTR)&operation;
10258
10259     if (fpe & 0x1) { /* overflow */
10260         if ((fpe == 0x1 && (cw & 0x8)) || (fpe==0x11 && (cw & 0x28))) {
10261             /* 32-bit version also sets SW_INEXACT here */
10262             raise |= FE_OVERFLOW;
10263             if (fpe & 0x10) raise |= FE_INEXACT;
10264             res = signbit(res) ? -INFINITY : INFINITY;
10265         } else {
10266             exception = EXCEPTION_FLT_OVERFLOW;
10267         }
10268     } else if (fpe & 0x2) { /* underflow */
10269         if ((fpe == 0x2 && (cw & 0x10)) || (fpe==0x12 && (cw & 0x30))) {
10270             raise |= FE_UNDERFLOW;
10271             if (fpe & 0x10) raise |= FE_INEXACT;
10272             res = signbit(res) ? -0.0 : 0.0;
10273         } else {
10274             exception = EXCEPTION_FLT_UNDERFLOW;
10275         }
10276     } else if (fpe & 0x4) { /* zerodivide */
10277         if ((fpe == 0x4 && (cw & 0x4)) || (fpe==0x14 && (cw & 0x24))) {
10278             raise |= FE_DIVBYZERO;
10279             if (fpe & 0x10) raise |= FE_INEXACT;
10280         } else {
10281             exception = EXCEPTION_FLT_DIVIDE_BY_ZERO;
10282         }
10283     } else if (fpe & 0x8) { /* invalid */
10284         if (fpe == 0x8 && (cw & 0x1)) {
10285             raise |= FE_INVALID;
10286         } else {
10287             exception = EXCEPTION_FLT_INVALID_OPERATION;
10288         }
10289     } else if (fpe & 0x10) { /* inexact */
10290         if (fpe == 0x10 && (cw & 0x20)) {
10291             raise |= FE_INEXACT;
10292         } else {
10293             exception = EXCEPTION_FLT_INEXACT_RESULT;
10294         }
10295     }
10296
10297     if (exception)
10298         raise = 0;
10299     feraiseexcept(raise);
10300     if (exception)
10301         RaiseException(exception, 0, 1, &exception_arg);
10302
10303     if (cw & 0x1) fpword |= _EM_INVALID;
10304     if (cw & 0x2) fpword |= _EM_DENORMAL;
10305     if (cw & 0x4) fpword |= _EM_ZERODIVIDE;
10306     if (cw & 0x8) fpword |= _EM_OVERFLOW;
10307     if (cw & 0x10) fpword |= _EM_UNDERFLOW;
10308     if (cw & 0x20) fpword |= _EM_INEXACT;
10309     switch (cw & 0xc00)
10310     {
10311         case 0xc00: fpword |= _RC_UP|_RC_DOWN; break;
10312         case 0x800: fpword |= _RC_UP; break;
10313         case 0x400: fpword |= _RC_DOWN; break;
10314     }
10315     switch (cw & 0x300)
10316     {
10317         case 0x0:   fpword |= _PC_24; break;
10318         case 0x200: fpword |= _PC_53; break;
10319         case 0x300: fpword |= _PC_64; break;
10320     }
10321     if (cw & 0x1000) fpword |= _IC_AFFINE;
10322     _control87(fpword, 0xffffffff);
10323
10324     return res;
10325 }
10326
10327 _Dcomplex* CDECL _Cbuild(_Dcomplex *ret, double r, double i)
10328 {
10329     ret->_Val[0] = r;
10330     ret->_Val[1] = i;
10331     return ret;
10332 }
10333
10334 double CDECL MSVCR120_creal(_Dcomplex z)
10335 {
10336     return z._Val[0];
10337 }
10338
10339 /*********************************************************************
10340  *      ilogb (MSVCR120.@)
10341  */
10342 int CDECL ilogb(double x)
10343 {
10344     return __ilogb(x);
10345 }
10346
10347 /*********************************************************************
10348  *      ilogbf (MSVCR120.@)
10349  */
10350 int CDECL ilogbf(float x)
10351 {
10352     return __ilogbf(x);
10353 }
10354 #endif /* _MSVCR_VER>=120 */