2 * msvcrt.dll math functions
4 * Copyright 2000 Jon Griffiths
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 * For functions copied from musl libc (http://musl.libc.org/):
22 * ====================================================
23 * Copyright 2005-2020 Rich Felker, et al.
25 * Permission is hereby granted, free of charge, to any person obtaining
26 * a copy of this software and associated documentation files (the
27 * "Software"), to deal in the Software without restriction, including
28 * without limitation the rights to use, copy, modify, merge, publish,
29 * distribute, sublicense, and/or sell copies of the Software, and to
30 * permit persons to whom the Software is furnished to do so, subject to
31 * the following conditions:
33 * The above copyright notice and this permission notice shall be
34 * included in all copies or substantial portions of the Software.
35 * ====================================================
51 #include "wine/debug.h"
53 WINE_DEFAULT_DEBUG_CHANNEL(msvcrt
);
58 #define _DOMAIN 1 /* domain error in argument */
59 #define _SING 2 /* singularity */
60 #define _OVERFLOW 3 /* range overflow */
61 #define _UNDERFLOW 4 /* range underflow */
63 typedef int (CDECL
*MSVCRT_matherr_func
)(struct _exception
*);
65 static MSVCRT_matherr_func MSVCRT_default_matherr_func
= NULL
;
68 static BOOL sse2_enabled
;
70 void msvcrt_init_math( void *module
)
72 sse2_supported
= IsProcessorFeaturePresent( PF_XMMI64_INSTRUCTIONS_AVAILABLE
);
76 sse2_enabled
= sse2_supported
;
80 /* Copied from musl: src/internal/libm.h */
81 static inline float fp_barrierf(float x
)
87 static inline double fp_barrier(double x
)
89 volatile double y
= x
;
93 static inline double ret_nan( BOOL update_sw
)
96 if (!update_sw
) return -NAN
;
97 return (x
- x
) / (x
- x
);
100 #define SET_X87_CW(MASK) \
101 "subl $4, %esp\n\t" \
102 __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t") \
103 "fnstcw (%esp)\n\t" \
104 "movw (%esp), %ax\n\t" \
105 "movw %ax, 2(%esp)\n\t" \
106 "testw $" #MASK ", %ax\n\t" \
108 "andw $~" #MASK ", %ax\n\t" \
109 "movw %ax, 2(%esp)\n\t" \
110 "fldcw 2(%esp)\n\t" \
113 #define RESET_X87_CW \
114 "movw (%esp), %ax\n\t" \
115 "cmpw %ax, 2(%esp)\n\t" \
117 "fstpl 8(%esp)\n\t" \
122 "addl $4, %esp\n\t" \
123 __ASM_CFI(".cfi_adjust_cfa_offset -4\n\t")
125 /*********************************************************************
126 * _matherr (CRTDLL.@)
128 int CDECL
_matherr(struct _exception
*e
)
134 static double math_error(int type
, const char *name
, double arg1
, double arg2
, double retval
)
136 struct _exception exception
= {type
, (char *)name
, arg1
, arg2
, retval
};
138 TRACE("(%d, %s, %g, %g, %g)\n", type
, debugstr_a(name
), arg1
, arg2
, retval
);
140 if (MSVCRT_default_matherr_func
&& MSVCRT_default_matherr_func(&exception
))
141 return exception
.retval
;
146 /* don't set errno */
156 /* don't set errno */
159 ERR("Unhandled math error!\n");
162 return exception
.retval
;
165 /*********************************************************************
166 * __setusermatherr (MSVCRT.@)
168 void CDECL
__setusermatherr(MSVCRT_matherr_func func
)
170 MSVCRT_default_matherr_func
= func
;
171 TRACE("new matherr handler %p\n", func
);
174 /*********************************************************************
175 * _set_SSE2_enable (MSVCRT.@)
177 int CDECL
_set_SSE2_enable(int flag
)
179 sse2_enabled
= flag
&& sse2_supported
;
185 /*********************************************************************
186 * _get_FMA3_enable (UCRTBASE.@)
188 int CDECL
_get_FMA3_enable(void)
196 /*********************************************************************
197 * _set_FMA3_enable (MSVCR120.@)
199 int CDECL
_set_FMA3_enable(int flag
)
201 FIXME("(%x) stub\n", flag
);
207 #if !defined(__i386__) || _MSVCR_VER>=120
209 /*********************************************************************
210 * _chgsignf (MSVCRT.@)
212 float CDECL
_chgsignf( float num
)
214 union { float f
; UINT32 i
; } u
= { num
};
219 /*********************************************************************
220 * _copysignf (MSVCRT.@)
222 * Copied from musl: src/math/copysignf.c
224 float CDECL
_copysignf( float x
, float y
)
226 union { float f
; UINT32 i
; } ux
= { x
}, uy
= { y
};
228 ux
.i
|= uy
.i
& 0x80000000;
232 /*********************************************************************
233 * _nextafterf (MSVCRT.@)
235 * Copied from musl: src/math/nextafterf.c
237 float CDECL
_nextafterf( float x
, float y
)
239 unsigned int ix
= *(unsigned int*)&x
;
240 unsigned int iy
= *(unsigned int*)&y
;
241 unsigned int ax
, ay
, e
;
243 if (isnan(x
) || isnan(y
))
246 if (_fpclassf(y
) & (_FPCLASS_ND
| _FPCLASS_PD
| _FPCLASS_NZ
| _FPCLASS_PZ
))
250 ax
= ix
& 0x7fffffff;
251 ay
= iy
& 0x7fffffff;
255 ix
= (iy
& 0x80000000) | 1;
256 } else if (ax
> ay
|| ((ix
^ iy
) & 0x80000000))
261 /* raise overflow if ix is infinite and x is finite */
262 if (e
== 0x7f800000) {
266 /* raise underflow if ix is subnormal or zero */
269 fp_barrierf(x
* x
+ y
* y
);
275 /* Copied from musl: src/math/ilogbf.c */
276 static int __ilogbf(float x
)
278 union { float f
; UINT32 i
; } u
= { x
};
279 int e
= u
.i
>> 23 & 0xff;
284 if (u
.i
== 0) return FP_ILOGB0
;
286 for (e
= -0x7f; u
.i
>> 31 == 0; e
--, u
.i
<<= 1);
289 if (e
== 0xff) return u
.i
<< 9 ? FP_ILOGBNAN
: INT_MAX
;
293 /*********************************************************************
296 * Copied from musl: src/math/logbf.c
298 float CDECL
_logbf(float x
)
311 /* Copied from musl: src/math/scalbn.c */
312 static double __scalbn(double x
, int n
)
314 union {double f
; UINT64 i
;} u
;
326 } else if (n
< -1022) {
327 /* make sure final n < -53 to avoid double
328 rounding in the subnormal range */
329 y
*= 0x1p
-1022 * 0x1p
53;
332 y
*= 0x1p
-1022 * 0x1p
53;
338 u
.i
= (UINT64
)(0x3ff + n
) << 52;
343 /* Copied from musl: src/math/__rem_pio2_large.c */
344 static int __rem_pio2_large(double *x
, double *y
, int e0
, int nx
, int prec
)
346 static const int init_jk
[] = {3, 4};
347 static const INT32 ipio2
[] = {
348 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
349 0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
350 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
351 0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41,
352 0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8,
353 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,
354 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
355 0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08,
356 0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,
357 0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,
358 0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
360 static const double PIo2
[] = {
361 1.57079625129699707031e+00,
362 7.54978941586159635335e-08,
363 5.39030252995776476554e-15,
364 3.28200341580791294123e-22,
365 1.27065575308067607349e-29,
366 1.22933308981111328932e-36,
367 2.73370053816464559624e-44,
368 2.16741683877804819444e-51,
371 INT32 jz
, jx
, jv
, jp
, jk
, carry
, n
, iq
[20], i
, j
, k
, m
, q0
, ih
;
372 double z
, fw
, f
[20], fq
[20] = {0}, q
[20];
378 /* determine jx,jv,q0, note that 3>q0 */
382 q0
= e0
- 24 * (jv
+ 1);
384 /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
387 for (i
= 0; i
<= m
; i
++, j
++)
388 f
[i
] = j
< 0 ? 0.0 : (double)ipio2
[j
];
390 /* compute q[0],q[1],...q[jk] */
391 for (i
= 0; i
<= jk
; i
++) {
392 for (j
= 0, fw
= 0.0; j
<= jx
; j
++)
393 fw
+= x
[j
] * f
[jx
+ i
- j
];
399 /* distill q[] into iq[] reversingly */
400 for (i
= 0, j
= jz
, z
= q
[jz
]; j
> 0; i
++, j
--) {
401 fw
= (double)(INT32
)(0x1p
-24 * z
);
402 iq
[i
] = (INT32
)(z
- 0x1p
24 * fw
);
407 z
= __scalbn(z
, q0
); /* actual value of z */
408 z
-= 8.0 * floor(z
* 0.125); /* trim off integer >= 8 */
412 if (q0
> 0) { /* need iq[jz-1] to determine n */
413 i
= iq
[jz
- 1] >> (24 - q0
);
415 iq
[jz
- 1] -= i
<< (24 - q0
);
416 ih
= iq
[jz
- 1] >> (23 - q0
);
418 else if (q0
== 0) ih
= iq
[jz
- 1] >> 23;
419 else if (z
>= 0.5) ih
= 2;
421 if (ih
> 0) { /* q > 0.5 */
424 for (i
= 0; i
< jz
; i
++) { /* compute 1-q */
429 iq
[i
] = 0x1000000 - j
;
432 iq
[i
] = 0xffffff - j
;
434 if (q0
> 0) { /* rare case: chance is 1 in 12 */
437 iq
[jz
- 1] &= 0x7fffff;
440 iq
[jz
- 1] &= 0x3fffff;
447 z
-= __scalbn(1.0, q0
);
451 /* check if recomputation is needed */
454 for (i
= jz
- 1; i
>= jk
; i
--) j
|= iq
[i
];
455 if (j
== 0) { /* need recomputation */
456 for (k
= 1; iq
[jk
- k
] == 0; k
++); /* k = no. of terms needed */
458 for (i
= jz
+ 1; i
<= jz
+ k
; i
++) { /* add q[jz+1] to q[jz+k] */
459 f
[jx
+ i
] = (double)ipio2
[jv
+ i
];
460 for (j
= 0, fw
= 0.0; j
<= jx
; j
++)
461 fw
+= x
[j
] * f
[jx
+ i
- j
];
469 /* chop off zero terms */
473 while (iq
[jz
] == 0) {
477 } else { /* break z into 24-bit if necessary */
478 z
= __scalbn(z
, -q0
);
480 fw
= (double)(INT32
)(0x1p
-24 * z
);
481 iq
[jz
] = (INT32
)(z
- 0x1p
24 * fw
);
489 /* convert integer "bit" chunk to floating-point value */
490 fw
= __scalbn(1.0, q0
);
491 for (i
= jz
; i
>= 0; i
--) {
492 q
[i
] = fw
* (double)iq
[i
];
496 /* compute PIo2[0,...,jp]*q[jz,...,0] */
497 for(i
= jz
; i
>= 0; i
--) {
498 for (fw
= 0.0, k
= 0; k
<= jp
&& k
<= jz
- i
; k
++)
499 fw
+= PIo2
[k
] * q
[i
+ k
];
503 /* compress fq[] into y[] */
507 for (i
= jz
; i
>= 0; i
--)
509 y
[0] = ih
== 0 ? fw
: -fw
;
514 for (i
= jz
; i
>= 0; i
--)
517 y
[0] = ih
==0 ? fw
: -fw
;
519 for (i
= 1; i
<= jz
; i
++)
521 y
[1] = ih
== 0 ? fw
: -fw
;
523 case 3: /* painful */
524 for (i
= jz
; i
> 0; i
--) {
525 fw
= fq
[i
- 1] + fq
[i
];
526 fq
[i
] += fq
[i
- 1] - fw
;
529 for (i
= jz
; i
> 1; i
--) {
530 fw
= fq
[i
- 1] + fq
[i
];
531 fq
[i
] += fq
[i
- 1] - fw
;
534 for (fw
= 0.0, i
= jz
; i
>= 2; i
--)
549 /* Based on musl implementation: src/math/round.c */
550 static double __round(double x
)
552 ULONGLONG llx
= *(ULONGLONG
*)&x
, tmp
;
553 int e
= (llx
>> 52 & 0x7ff) - 0x3ff;
560 return signbit(x
) ? -1 : 1;
562 tmp
= 0x000fffffffffffffULL
>> e
;
565 llx
+= 0x0008000000000000ULL
>> e
;
567 return *(double*)&llx
;
570 #if !defined(__i386__) || _MSVCR_VER >= 120
571 /* Copied from musl: src/math/expm1f.c */
572 static float __expm1f(float x
)
574 static const float ln2_hi
= 6.9313812256e-01,
575 ln2_lo
= 9.0580006145e-06,
576 invln2
= 1.4426950216e+00,
577 Q1
= -3.3333212137e-2,
578 Q2
= 1.5807170421e-3;
580 float y
, hi
, lo
, c
, t
, e
, hxs
, hfx
, r1
, twopk
;
581 union {float f
; UINT32 i
;} u
= {x
};
582 UINT32 hx
= u
.i
& 0x7fffffff;
583 int k
, sign
= u
.i
>> 31;
585 /* filter out huge and non-finite argument */
586 if (hx
>= 0x4195b844) { /* if |x|>=27*ln2 */
587 if (hx
>= 0x7f800000) /* NaN */
588 return u
.i
== 0xff800000 ? -1 : x
;
590 return math_error(_UNDERFLOW
, "exp", x
, 0, -1);
591 if (hx
> 0x42b17217) /* x > log(FLT_MAX) */
592 return math_error(_OVERFLOW
, "exp", x
, 0, fp_barrierf(x
* FLT_MAX
));
595 /* argument reduction */
596 if (hx
> 0x3eb17218) { /* if |x| > 0.5 ln2 */
597 if (hx
< 0x3F851592) { /* and |x| < 1.5 ln2 */
608 k
= invln2
* x
+ (sign
? -0.5f
: 0.5f
);
610 hi
= x
- t
* ln2_hi
; /* t*ln2_hi is exact here */
615 } else if (hx
< 0x33000000) { /* when |x|<2**-25, return x */
622 /* x is now in primary range */
625 r1
= 1.0f
+ hxs
* (Q1
+ hxs
* Q2
);
627 e
= hxs
* ((r1
- t
) / (6.0f
- x
* t
));
628 if (k
== 0) /* c is 0 */
629 return x
- (x
* e
- hxs
);
632 /* exp(x) ~ 2^k (x_reduced - e + 1) */
634 return 0.5f
* (x
- e
) - 0.5f
;
637 return -2.0f
* (e
- (x
+ 0.5f
));
638 return 1.0f
+ 2.0f
* (x
- e
);
640 u
.i
= (0x7f + k
) << 23; /* 2^k */
642 if (k
< 0 || k
> 56) { /* suffice to return exp(x)-1 */
645 y
= y
* 2.0f
* 0x1p
127f
;
650 u
.i
= (0x7f-k
) << 23; /* 2^-k */
652 y
= (x
- e
+ (1 - u
.f
)) * twopk
;
654 y
= (x
- (e
+ u
.f
) + 1) * twopk
;
658 /* Copied from musl: src/math/__sindf.c */
659 static float __sindf(double x
)
661 static const double S1
= -0x1.5555555555555p
-3,
662 S2
= 0x1.1111111111111p
-7,
663 S3
= -0x1.a01a01a01a01ap
-13,
664 S4
= 0x1.71de3a556c734p
-19;
669 if (x
> -7.8175831586122513e-03 && x
< 7.8175831586122513e-03)
670 return x
* (1 + S1
* z
);
675 return (x
+ s
* (S1
+ z
* S2
)) + s
* w
* r
;
678 /* Copied from musl: src/math/__cosdf.c */
679 static float __cosdf(double x
)
681 static const double C0
= -0x1.0000000000000p
-1,
682 C1
= 0x1.5555555555555p
-5,
683 C2
= -0x1.6c16c16c16c17p
-10,
684 C3
= 0x1.a01a01a01a01ap
-16,
685 C4
= -0x1.27e4fb7789f5cp
-22;
689 if (x
> -7.8163146972656250e-03 && x
< 7.8163146972656250e-03)
691 return 1.0 + z
* (C0
+ z
* (C1
+ z
* (C2
+ z
* (C3
+ z
* C4
))));
694 static const UINT64 exp2f_T
[] = {
695 0x3ff0000000000000ULL
, 0x3fefd9b0d3158574ULL
, 0x3fefb5586cf9890fULL
, 0x3fef9301d0125b51ULL
,
696 0x3fef72b83c7d517bULL
, 0x3fef54873168b9aaULL
, 0x3fef387a6e756238ULL
, 0x3fef1e9df51fdee1ULL
,
697 0x3fef06fe0a31b715ULL
, 0x3feef1a7373aa9cbULL
, 0x3feedea64c123422ULL
, 0x3feece086061892dULL
,
698 0x3feebfdad5362a27ULL
, 0x3feeb42b569d4f82ULL
, 0x3feeab07dd485429ULL
, 0x3feea47eb03a5585ULL
,
699 0x3feea09e667f3bcdULL
, 0x3fee9f75e8ec5f74ULL
, 0x3feea11473eb0187ULL
, 0x3feea589994cce13ULL
,
700 0x3feeace5422aa0dbULL
, 0x3feeb737b0cdc5e5ULL
, 0x3feec49182a3f090ULL
, 0x3feed503b23e255dULL
,
701 0x3feee89f995ad3adULL
, 0x3feeff76f2fb5e47ULL
, 0x3fef199bdd85529cULL
, 0x3fef3720dcef9069ULL
,
702 0x3fef5818dcfba487ULL
, 0x3fef7c97337b9b5fULL
, 0x3fefa4afa2a490daULL
, 0x3fefd0765b6e4540ULL
706 /*********************************************************************
707 * _fdclass (MSVCR120.@)
709 * Copied from musl: src/math/__fpclassifyf.c
711 short CDECL
_fdclass(float x
)
713 union { float f
; UINT32 i
; } u
= { x
};
714 int e
= u
.i
>> 23 & 0xff;
716 if (!e
) return u
.i
<< 1 ? FP_SUBNORMAL
: FP_ZERO
;
717 if (e
== 0xff) return u
.i
<< 9 ? FP_NAN
: FP_INFINITE
;
721 /*********************************************************************
722 * _dclass (MSVCR120.@)
724 * Copied from musl: src/math/__fpclassify.c
726 short CDECL
_dclass(double x
)
728 union { double f
; UINT64 i
; } u
= { x
};
729 int e
= u
.i
>> 52 & 0x7ff;
731 if (!e
) return u
.i
<< 1 ? FP_SUBNORMAL
: FP_ZERO
;
732 if (e
== 0x7ff) return (u
.i
<< 12) ? FP_NAN
: FP_INFINITE
;
738 /*********************************************************************
739 * _fpclassf (MSVCRT.@)
741 int CDECL
_fpclassf( float num
)
743 union { float f
; UINT32 i
; } u
= { num
};
744 int e
= u
.i
>> 23 & 0xff;
750 if (u
.i
<< 1) return s
? _FPCLASS_ND
: _FPCLASS_PD
;
751 return s
? _FPCLASS_NZ
: _FPCLASS_PZ
;
753 if (u
.i
<< 9) return ((u
.i
>> 22) & 1) ? _FPCLASS_QNAN
: _FPCLASS_SNAN
;
754 return s
? _FPCLASS_NINF
: _FPCLASS_PINF
;
756 return s
? _FPCLASS_NN
: _FPCLASS_PN
;
760 /*********************************************************************
761 * _finitef (MSVCRT.@)
763 int CDECL
_finitef( float num
)
765 union { float f
; UINT32 i
; } u
= { num
};
766 return (u
.i
& 0x7fffffff) < 0x7f800000;
769 /*********************************************************************
772 int CDECL
_isnanf( float num
)
774 union { float f
; UINT32 i
; } u
= { num
};
775 return (u
.i
& 0x7fffffff) > 0x7f800000;
778 static float asinf_R(float z
)
780 /* coefficients for R(x^2) */
781 static const float p1
= 1.66666672e-01,
782 p2
= -5.11644611e-02,
783 p3
= -1.21124933e-02,
784 p4
= -3.58742251e-03,
785 q1
= -7.56982703e-01;
788 p
= z
* (p1
+ z
* (p2
+ z
* (p3
+ z
* p4
)));
793 /*********************************************************************
796 * Copied from musl: src/math/acosf.c
798 float CDECL
acosf( float x
)
800 static const double pio2_lo
= 6.12323399573676603587e-17;
801 static const double pio2_hi
= 1.57079632679489655800e+00;
803 float z
, w
, s
, c
, df
;
806 hx
= *(unsigned int*)&x
;
807 ix
= hx
& 0x7fffffff;
808 /* |x| >= 1 or nan */
809 if (ix
>= 0x3f800000) {
810 if (ix
== 0x3f800000) {
815 if (isnan(x
)) return x
;
816 return math_error(_DOMAIN
, "acosf", x
, 0, 0 / (x
- x
));
819 if (ix
< 0x3f000000) {
820 if (ix
<= 0x32800000) /* |x| < 2**-26 */
822 return pio2_hi
- (x
- (pio2_lo
- x
* asinf_R(x
* x
)));
828 return 2*(pio2_hi
- (s
+ (asinf_R(z
) * s
- pio2_lo
)));
833 hx
= *(unsigned int*)&s
& 0xffff0000;
835 c
= (z
- df
* df
) / (s
+ df
);
836 w
= asinf_R(z
) * s
+ c
;
840 /*********************************************************************
843 * Copied from musl: src/math/asinf.c
845 float CDECL
asinf( float x
)
847 static const double pio2
= 1.570796326794896558e+00;
848 static const float pio4_hi
= 0.785398125648;
849 static const float pio2_lo
= 7.54978941586e-08;
854 hx
= *(unsigned int*)&x
;
855 ix
= hx
& 0x7fffffff;
856 if (ix
>= 0x3f800000) { /* |x| >= 1 */
857 if (ix
== 0x3f800000) /* |x| == 1 */
858 return x
* pio2
+ 7.5231638453e-37; /* asin(+-1) = +-pi/2 with inexact */
859 if (isnan(x
)) return x
;
860 return math_error(_DOMAIN
, "asinf", x
, 0, 0 / (x
- x
));
862 if (ix
< 0x3f000000) { /* |x| < 0.5 */
863 /* if 0x1p-126 <= |x| < 0x1p-12, avoid raising underflow */
864 if (ix
< 0x39800000 && ix
>= 0x00800000)
866 return x
+ x
* asinf_R(x
* x
);
869 z
= (1 - fabsf(x
)) * 0.5f
;
872 *(unsigned int*)&f
= *(unsigned int*)&s
& 0xffff0000;
873 c
= (z
- f
* f
) / (s
+ f
);
874 x
= pio4_hi
- (2 * s
* asinf_R(z
) - (pio2_lo
- 2 * c
) - (pio4_hi
- 2 * f
));
880 /*********************************************************************
883 * Copied from musl: src/math/atanf.c
885 float CDECL
atanf( float x
)
887 static const float atanhi
[] = {
893 static const float atanlo
[] = {
899 static const float aT
[] = {
908 unsigned int ix
, sign
;
912 if (isnan(x
)) return math_error(_DOMAIN
, "atanf", x
, 0, x
);
915 ix
= *(unsigned int*)&x
;
918 if (ix
>= 0x4c800000) { /* if |x| >= 2**26 */
921 z
= atanhi
[3] + 7.5231638453e-37;
922 return sign
? -z
: z
;
924 if (ix
< 0x3ee00000) { /* |x| < 0.4375 */
925 if (ix
< 0x39800000) { /* |x| < 2**-12 */
927 /* raise underflow for subnormal x */
934 if (ix
< 0x3f980000) { /* |x| < 1.1875 */
935 if (ix
< 0x3f300000) { /* 7/16 <= |x| < 11/16 */
937 x
= (2.0f
* x
- 1.0f
) / (2.0f
+ x
);
938 } else { /* 11/16 <= |x| < 19/16 */
940 x
= (x
- 1.0f
) / (x
+ 1.0f
);
943 if (ix
< 0x401c0000) { /* |x| < 2.4375 */
945 x
= (x
- 1.5f
) / (1.0f
+ 1.5f
* x
);
946 } else { /* 2.4375 <= |x| < 2**26 */
952 /* end of argument reduction */
955 /* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
956 s1
= z
* (aT
[0] + w
* (aT
[2] + w
* aT
[4]));
957 s2
= w
* (aT
[1] + w
* aT
[3]);
959 return x
- x
* (s1
+ s2
);
960 z
= atanhi
[id
] - ((x
* (s1
+ s2
) - atanlo
[id
]) - x
);
961 return sign
? -z
: z
;
964 /*********************************************************************
967 * Copied from musl: src/math/atan2f.c
969 float CDECL
atan2f( float y
, float x
)
971 static const float pi
= 3.1415927410e+00,
972 pi_lo
= -8.7422776573e-08;
975 unsigned int m
, ix
, iy
;
977 if (isnan(x
) || isnan(y
))
979 ix
= *(unsigned int*)&x
;
980 iy
= *(unsigned int*)&y
;
981 if (ix
== 0x3f800000) /* x=1.0 */
983 m
= ((iy
>> 31) & 1) | ((ix
>> 30) & 2); /* 2*sign(x)+sign(y) */
991 case 1: return y
; /* atan(+-0,+anything)=+-0 */
992 case 2: return pi
; /* atan(+0,-anything) = pi */
993 case 3: return -pi
; /* atan(-0,-anything) =-pi */
998 return m
& 1 ? -pi
/ 2 : pi
/ 2;
1000 if (ix
== 0x7f800000) {
1001 if (iy
== 0x7f800000) {
1003 case 0: return pi
/ 4; /* atan(+INF,+INF) */
1004 case 1: return -pi
/ 4; /* atan(-INF,+INF) */
1005 case 2: return 3 * pi
/ 4; /*atan(+INF,-INF)*/
1006 case 3: return -3 * pi
/ 4; /*atan(-INF,-INF)*/
1010 case 0: return 0.0f
; /* atan(+...,+INF) */
1011 case 1: return -0.0f
; /* atan(-...,+INF) */
1012 case 2: return pi
; /* atan(+...,-INF) */
1013 case 3: return -pi
; /* atan(-...,-INF) */
1017 /* |y/x| > 0x1p26 */
1018 if (ix
+ (26 << 23) < iy
|| iy
== 0x7f800000)
1019 return m
& 1 ? -pi
/ 2 : pi
/ 2;
1021 /* z = atan(|y/x|) with correct underflow */
1022 if ((m
& 2) && iy
+ (26 << 23) < ix
) /*|y/x| < 0x1p-26, x < 0 */
1025 z
= atanf(fabsf(y
/ x
));
1027 case 0: return z
; /* atan(+,+) */
1028 case 1: return -z
; /* atan(-,+) */
1029 case 2: return pi
- (z
- pi_lo
); /* atan(+,-) */
1030 default: /* case 3 */
1031 return (z
- pi_lo
) - pi
; /* atan(-,-) */
1035 /* Copied from musl: src/math/__rem_pio2f.c */
1036 static int __rem_pio2f(float x
, double *y
)
1038 static const double toint
= 1.5 / DBL_EPSILON
,
1039 pio4
= 0x1.921fb6p
-1,
1040 invpio2
= 6.36619772367581382433e-01,
1041 pio2_1
= 1.57079631090164184570e+00,
1042 pio2_1t
= 1.58932547735281966916e-08;
1044 union {float f
; uint32_t i
;} u
= {x
};
1045 double tx
[1], ty
[1], fn
;
1049 ix
= u
.i
& 0x7fffffff;
1050 /* 25+53 bit pi is good enough for medium size */
1051 if (ix
< 0x4dc90fdb) { /* |x| ~< 2^28*(pi/2), medium size */
1052 /* Use a specialized rint() to get fn. */
1053 fn
= fp_barrier(x
* invpio2
+ toint
) - toint
;
1055 *y
= x
- fn
* pio2_1
- fn
* pio2_1t
;
1056 /* Matters with directed rounding. */
1060 *y
= x
- fn
* pio2_1
- fn
* pio2_1t
;
1061 } else if (*y
> pio4
) {
1064 *y
= x
- fn
* pio2_1
- fn
* pio2_1t
;
1068 if(ix
>= 0x7f800000) { /* x is inf or NaN */
1072 /* scale x into [2^23, 2^24-1] */
1074 e0
= (ix
>> 23) - (0x7f + 23); /* e0 = ilogb(|x|)-23, positive */
1075 u
.i
= ix
- (e0
<< 23);
1077 n
= __rem_pio2_large(tx
, ty
, e0
, 1, 0);
1086 /*********************************************************************
1089 * Copied from musl: src/math/cosf.c
1091 float CDECL
cosf( float x
)
1093 static const double c1pio2
= 1*M_PI_2
,
1106 if (ix
<= 0x3f490fda) { /* |x| ~<= pi/4 */
1107 if (ix
< 0x39800000) { /* |x| < 2**-12 */
1108 /* raise inexact if x != 0 */
1109 fp_barrierf(x
+ 0x1p
120f
);
1114 if (ix
<= 0x407b53d1) { /* |x| ~<= 5*pi/4 */
1115 if (ix
> 0x4016cbe3) /* |x| ~> 3*pi/4 */
1116 return -__cosdf(sign
? x
+ c2pio2
: x
- c2pio2
);
1119 return __sindf(x
+ c1pio2
);
1121 return __sindf(c1pio2
- x
);
1124 if (ix
<= 0x40e231d5) { /* |x| ~<= 9*pi/4 */
1125 if (ix
> 0x40afeddf) /* |x| ~> 7*pi/4 */
1126 return __cosdf(sign
? x
+ c4pio2
: x
- c4pio2
);
1129 return __sindf(-x
- c3pio2
);
1131 return __sindf(x
- c3pio2
);
1135 /* cos(Inf or NaN) is NaN */
1136 if (isinf(x
)) return math_error(_DOMAIN
, "cosf", x
, 0, x
- x
);
1137 if (ix
>= 0x7f800000)
1140 /* general argument reduction needed */
1141 n
= __rem_pio2f(x
, &y
);
1143 case 0: return __cosdf(y
);
1144 case 1: return __sindf(-y
);
1145 case 2: return -__cosdf(y
);
1146 default: return __sindf(y
);
1150 /* Copied from musl: src/math/__expo2f.c */
1151 static float __expo2f(float x
, float sign
)
1153 static const int k
= 235;
1154 static const float kln2
= 0x1.45c778p
+7f
;
1157 *(UINT32
*)&scale
= (UINT32
)(0x7f + k
/2) << 23;
1158 return expf(x
- kln2
) * (sign
* scale
) * scale
;
1161 /*********************************************************************
1164 * Copied from musl: src/math/coshf.c
1166 float CDECL
coshf( float x
)
1168 UINT32 ui
= *(UINT32
*)&x
;
1169 UINT32 sign
= ui
& 0x80000000;
1177 if (ui
< 0x3f317217) {
1178 if (ui
< 0x3f800000 - (12 << 23)) {
1179 fp_barrierf(x
+ 0x1p
120f
);
1183 return 1 + t
* t
/ (2 * (1 + t
));
1186 /* |x| < log(FLT_MAX) */
1187 if (ui
< 0x42b17217) {
1189 return 0.5f
* (t
+ 1 / t
);
1192 /* |x| > log(FLT_MAX) or nan */
1193 if (ui
> 0x7f800000)
1194 *(UINT32
*)&t
= ui
| sign
| 0x400000;
1196 t
= __expo2f(x
, 1.0f
);
1200 /*********************************************************************
1203 float CDECL
expf( float x
)
1205 static const double C
[] = {
1206 0x1.c6af84b912394p
-5 / (1 << 5) / (1 << 5) / (1 << 5),
1207 0x1.ebfce50fac4f3p
-3 / (1 << 5) / (1 << 5),
1208 0x1.62e42ff0c52d6p
-1 / (1 << 5)
1210 static const double invln2n
= 0x1.71547652b82fep
+0 * (1 << 5);
1212 double kd
, z
, r
, r2
, y
, s
;
1216 abstop
= (*(UINT32
*)&x
>> 20) & 0x7ff;
1217 if (abstop
>= 0x42b) {
1218 /* |x| >= 88 or x is nan. */
1219 if (*(UINT32
*)&x
== 0xff800000)
1221 if (abstop
>= 0x7f8)
1223 if (x
> 0x1.62e42ep6f
) /* x > log(0x1p128) ~= 88.72 */
1224 return math_error(_OVERFLOW
, "expf", x
, 0, x
* FLT_MAX
);
1225 if (x
< -0x1.9fe368p6f
) /* x < log(0x1p-150) ~= -103.97 */
1226 return math_error(_UNDERFLOW
, "expf", x
, 0, fp_barrierf(FLT_MIN
) * FLT_MIN
);
1229 /* x*N/Ln2 = k + r with r in [-1/2, 1/2] and int k. */
1232 /* Round and convert z to int, the result is in [-150*N, 128*N] and
1233 ideally ties-to-even rule is used, otherwise the magnitude of r
1234 can be bigger which gives larger approximation error. */
1239 /* exp(x) = 2^(k/N) * 2^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
1240 t
= exp2f_T
[ki
% (1 << 5)];
1241 t
+= ki
<< (52 - 5);
1243 z
= C
[0] * r
+ C
[1];
1251 /*********************************************************************
1254 * Copied from musl: src/math/fmodf.c
1256 float CDECL
fmodf( float x
, float y
)
1258 UINT32 xi
= *(UINT32
*)&x
;
1259 UINT32 yi
= *(UINT32
*)&y
;
1260 int ex
= xi
>>23 & 0xff;
1261 int ey
= yi
>>23 & 0xff;
1262 UINT32 sx
= xi
& 0x80000000;
1265 if (isinf(x
)) return math_error(_DOMAIN
, "fmodf", x
, y
, (x
* y
) / (x
* y
));
1266 if (yi
<< 1 == 0 || isnan(y
) || ex
== 0xff)
1267 return (x
* y
) / (x
* y
);
1268 if (xi
<< 1 <= yi
<< 1) {
1269 if (xi
<< 1 == yi
<< 1)
1274 /* normalize x and y */
1276 for (i
= xi
<< 9; i
>> 31 == 0; ex
--, i
<<= 1);
1283 for (i
= yi
<< 9; i
>> 31 == 0; ey
--, i
<<= 1);
1291 for (; ex
> ey
; ex
--) {
1306 for (; xi
>>23 == 0; xi
<<= 1, ex
--);
1308 /* scale result up */
1311 xi
|= (UINT32
)ex
<< 23;
1316 return *(float*)&xi
;
1319 /*********************************************************************
1322 * Copied from musl: src/math/logf.c src/math/logf_data.c
1324 float CDECL
logf( float x
)
1326 static const double Ln2
= 0x1.62e42fefa39efp
-1;
1327 static const double A
[] = {
1328 -0x1.00ea348b88334p
-2,
1329 0x1.5575b0be00b6ap
-2,
1330 -0x1.ffffef20a4123p
-2
1332 static const struct {
1335 { 0x1.661ec79f8f3bep
+0, -0x1.57bf7808caadep
-2 },
1336 { 0x1.571ed4aaf883dp
+0, -0x1.2bef0a7c06ddbp
-2 },
1337 { 0x1.49539f0f010bp
+0, -0x1.01eae7f513a67p
-2 },
1338 { 0x1.3c995b0b80385p
+0, -0x1.b31d8a68224e9p
-3 },
1339 { 0x1.30d190c8864a5p
+0, -0x1.6574f0ac07758p
-3 },
1340 { 0x1.25e227b0b8eap
+0, -0x1.1aa2bc79c81p
-3 },
1341 { 0x1.1bb4a4a1a343fp
+0, -0x1.a4e76ce8c0e5ep
-4 },
1342 { 0x1.12358f08ae5bap
+0, -0x1.1973c5a611cccp
-4 },
1343 { 0x1.0953f419900a7p
+0, -0x1.252f438e10c1ep
-5 },
1345 { 0x1.e608cfd9a47acp
-1, 0x1.aa5aa5df25984p
-5 },
1346 { 0x1.ca4b31f026aap
-1, 0x1.c5e53aa362eb4p
-4 },
1347 { 0x1.b2036576afce6p
-1, 0x1.526e57720db08p
-3 },
1348 { 0x1.9c2d163a1aa2dp
-1, 0x1.bc2860d22477p
-3 },
1349 { 0x1.886e6037841edp
-1, 0x1.1058bc8a07ee1p
-2 },
1350 { 0x1.767dcf5534862p
-1, 0x1.4043057b6ee09p
-2 }
1353 double z
, r
, r2
, y
, y0
, invc
, logc
;
1358 /* Fix sign of zero with downward rounding when x==1. */
1359 if (ix
== 0x3f800000)
1361 if (ix
- 0x00800000 >= 0x7f800000 - 0x00800000) {
1362 /* x < 0x1p-126 or inf or nan. */
1364 return math_error(_SING
, "logf", x
, 0, (ix
& 0x80000000 ? 1.0 : -1.0) / x
);
1365 if (ix
== 0x7f800000) /* log(inf) == inf. */
1367 if (ix
* 2 > 0xff000000)
1369 if (ix
& 0x80000000)
1370 return math_error(_DOMAIN
, "logf", x
, 0, (x
- x
) / (x
- x
));
1371 /* x is subnormal, normalize it. */
1377 /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
1378 The range is split into N subintervals.
1379 The ith subinterval contains z and c is near its center. */
1380 tmp
= ix
- 0x3f330000;
1381 i
= (tmp
>> (23 - 4)) % (1 << 4);
1382 k
= (INT32
)tmp
>> 23; /* arithmetic shift */
1383 iz
= ix
- (tmp
& (0x1ffu
<< 23));
1388 /* log(x) = log1p(z/c-1) + log(c) + k*Ln2 */
1390 y0
= logc
+ (double)k
* Ln2
;
1392 /* Pipelined polynomial evaluation to approximate log1p(r). */
1394 y
= A
[1] * r
+ A
[2];
1396 y
= y
* r2
+ (y0
+ r
);
1400 /*********************************************************************
1403 float CDECL
log10f( float x
)
1405 static const float ivln10hi
= 4.3432617188e-01,
1406 ivln10lo
= -3.1689971365e-05,
1407 log10_2hi
= 3.0102920532e-01,
1408 log10_2lo
= 7.9034151668e-07,
1409 Lg1
= 0xaaaaaa.0p
-24,
1410 Lg2
= 0xccce13.0p
-25,
1411 Lg3
= 0x91e9ee.0p
-25,
1412 Lg4
= 0xf89e26.0p
-26;
1414 union {float f
; UINT32 i
;} u
= {x
};
1415 float hfsq
, f
, s
, z
, R
, w
, t1
, t2
, dk
, hi
, lo
;
1421 if (ix
< 0x00800000 || ix
>> 31) { /* x < 2**-126 */
1423 return math_error(_SING
, "log10f", x
, 0, -1 / (x
* x
));
1424 if ((ix
& ~(1u << 31)) > 0x7f800000)
1427 return math_error(_DOMAIN
, "log10f", x
, 0, (x
- x
) / (x
- x
));
1428 /* subnormal number, scale up x */
1433 } else if (ix
>= 0x7f800000) {
1435 } else if (ix
== 0x3f800000)
1438 /* reduce x into [sqrt(2)/2, sqrt(2)] */
1439 ix
+= 0x3f800000 - 0x3f3504f3;
1440 k
+= (int)(ix
>> 23) - 0x7f;
1441 ix
= (ix
& 0x007fffff) + 0x3f3504f3;
1449 t1
= w
* (Lg2
+ w
* Lg4
);
1450 t2
= z
* (Lg1
+ w
* Lg3
);
1452 hfsq
= 0.5f
* f
* f
;
1458 lo
= f
- hi
- hfsq
+ s
* (hfsq
+ R
);
1460 return dk
* log10_2lo
+ (lo
+ hi
) * ivln10lo
+ lo
* ivln10hi
+ hi
* ivln10hi
+ dk
* log10_2hi
;
1463 /* Subnormal input is normalized so ix has negative biased exponent.
1464 Output is multiplied by POWF_SCALE (where 1 << 5). */
1465 static double powf_log2(UINT32 ix
)
1467 static const struct {
1470 { 0x1.661ec79f8f3bep
+0, -0x1.efec65b963019p
-2 * (1 << 5) },
1471 { 0x1.571ed4aaf883dp
+0, -0x1.b0b6832d4fca4p
-2 * (1 << 5) },
1472 { 0x1.49539f0f010bp
+0, -0x1.7418b0a1fb77bp
-2 * (1 << 5) },
1473 { 0x1.3c995b0b80385p
+0, -0x1.39de91a6dcf7bp
-2 * (1 << 5) },
1474 { 0x1.30d190c8864a5p
+0, -0x1.01d9bf3f2b631p
-2 * (1 << 5) },
1475 { 0x1.25e227b0b8eap
+0, -0x1.97c1d1b3b7afp
-3 * (1 << 5) },
1476 { 0x1.1bb4a4a1a343fp
+0, -0x1.2f9e393af3c9fp
-3 * (1 << 5) },
1477 { 0x1.12358f08ae5bap
+0, -0x1.960cbbf788d5cp
-4 * (1 << 5) },
1478 { 0x1.0953f419900a7p
+0, -0x1.a6f9db6475fcep
-5 * (1 << 5) },
1479 { 0x1p
+0, 0x0p
+0 * (1 << 4) },
1480 { 0x1.e608cfd9a47acp
-1, 0x1.338ca9f24f53dp
-4 * (1 << 5) },
1481 { 0x1.ca4b31f026aap
-1, 0x1.476a9543891bap
-3 * (1 << 5) },
1482 { 0x1.b2036576afce6p
-1, 0x1.e840b4ac4e4d2p
-3 * (1 << 5) },
1483 { 0x1.9c2d163a1aa2dp
-1, 0x1.40645f0c6651cp
-2 * (1 << 5) },
1484 { 0x1.886e6037841edp
-1, 0x1.88e9c2c1b9ff8p
-2 * (1 << 5) },
1485 { 0x1.767dcf5534862p
-1, 0x1.ce0a44eb17bccp
-2 * (1 << 5) }
1487 static const double A
[] = {
1488 0x1.27616c9496e0bp
-2 * (1 << 5), -0x1.71969a075c67ap
-2 * (1 << 5),
1489 0x1.ec70a6ca7baddp
-2 * (1 << 5), -0x1.7154748bef6c8p
-1 * (1 << 5),
1490 0x1.71547652ab82bp0
* (1 << 5)
1493 double z
, r
, r2
, r4
, p
, q
, y
, y0
, invc
, logc
;
1494 UINT32 iz
, top
, tmp
;
1497 /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
1498 The range is split into N subintervals.
1499 The ith subinterval contains z and c is near its center. */
1500 tmp
= ix
- 0x3f330000;
1501 i
= (tmp
>> (23 - 4)) % (1 << 4);
1502 top
= tmp
& 0xff800000;
1504 k
= (INT32
)top
>> (23 - 5); /* arithmetic shift */
1509 /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
1511 y0
= logc
+ (double)k
;
1513 /* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */
1515 y
= A
[0] * r
+ A
[1];
1516 p
= A
[2] * r
+ A
[3];
1524 /* The output of log2 and thus the input of exp2 is either scaled by N
1525 (in case of fast toint intrinsics) or not. The unscaled xd must be
1526 in [-1021,1023], sign_bias sets the sign of the result. */
1527 static float powf_exp2(double xd
, UINT32 sign_bias
)
1529 static const double C
[] = {
1530 0x1.c6af84b912394p
-5 / (1 << 5) / (1 << 5) / (1 << 5),
1531 0x1.ebfce50fac4f3p
-3 / (1 << 5) / (1 << 5),
1532 0x1.62e42ff0c52d6p
-1 / (1 << 5)
1536 double kd
, z
, r
, r2
, y
, s
;
1538 /* N*x = k + r with r in [-1/2, 1/2] */
1539 kd
= __round(xd
); /* k */
1543 /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
1544 t
= exp2f_T
[ki
% (1 << 5)];
1545 ski
= ki
+ sign_bias
;
1546 t
+= ski
<< (52 - 5);
1548 z
= C
[0] * r
+ C
[1];
1556 /* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
1557 the bit representation of a non-zero finite floating-point value. */
1558 static int powf_checkint(UINT32 iy
)
1560 int e
= iy
>> 23 & 0xff;
1565 if (iy
& ((1 << (0x7f + 23 - e
)) - 1))
1567 if (iy
& (1 << (0x7f + 23 - e
)))
1572 /*********************************************************************
1575 * Copied from musl: src/math/powf.c src/math/powf_data.c
1577 float CDECL
powf( float x
, float y
)
1579 UINT32 sign_bias
= 0;
1585 if (ix
- 0x00800000 >= 0x7f800000 - 0x00800000 ||
1586 2 * iy
- 1 >= 2u * 0x7f800000 - 1) {
1587 /* Either (x < 0x1p-126 or inf or nan) or (y is 0 or inf or nan). */
1588 if (2 * iy
- 1 >= 2u * 0x7f800000 - 1) {
1591 if (ix
== 0x3f800000)
1593 if (2 * ix
> 2u * 0x7f800000 || 2 * iy
> 2u * 0x7f800000)
1595 if (2 * ix
== 2 * 0x3f800000)
1597 if ((2 * ix
< 2 * 0x3f800000) == !(iy
& 0x80000000))
1598 return 0.0f
; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
1601 if (2 * ix
- 1 >= 2u * 0x7f800000 - 1) {
1603 if (ix
& 0x80000000 && powf_checkint(iy
) == 1)
1605 if (iy
& 0x80000000 && x2
== 0.0)
1606 return math_error(_SING
, "powf", x
, y
, 1 / x2
);
1607 /* Without the barrier some versions of clang hoist the 1/x2 and
1608 thus division by zero exception can be signaled spuriously. */
1609 return iy
& 0x80000000 ? fp_barrierf(1 / x2
) : x2
;
1611 /* x and y are non-zero finite. */
1612 if (ix
& 0x80000000) {
1614 int yint
= powf_checkint(iy
);
1616 return math_error(_DOMAIN
, "powf", x
, y
, 0 / (x
- x
));
1618 sign_bias
= 1 << (5 + 11);
1621 if (ix
< 0x00800000) {
1622 /* Normalize subnormal x so exponent becomes negative. */
1629 logx
= powf_log2(ix
);
1630 ylogx
= y
* logx
; /* cannot overflow, y is single prec. */
1631 if ((*(UINT64
*)&ylogx
>> 47 & 0xffff) >= 0x40af800000000000llu
>> 47) {
1632 /* |y*log(x)| >= 126. */
1633 if (ylogx
> 0x1.fffffffd1d571p
+6 * (1 << 5))
1634 return math_error(_OVERFLOW
, "powf", x
, y
, (sign_bias
? -1.0 : 1.0) * 0x1p
1023);
1635 if (ylogx
<= -150.0 * (1 << 5))
1636 return math_error(_UNDERFLOW
, "powf", x
, y
, (sign_bias
? -1.0 : 1.0) / 0x1p
1023);
1638 return powf_exp2(ylogx
, sign_bias
);
1641 /*********************************************************************
1644 * Copied from musl: src/math/sinf.c
1646 float CDECL
sinf( float x
)
1648 static const double s1pio2
= 1*M_PI_2
,
1661 if (ix
<= 0x3f490fda) { /* |x| ~<= pi/4 */
1662 if (ix
< 0x39800000) { /* |x| < 2**-12 */
1663 /* raise inexact if x!=0 and underflow if subnormal */
1664 fp_barrierf(ix
< 0x00800000 ? x
/ 0x1p
120f
: x
+ 0x1p
120f
);
1669 if (ix
<= 0x407b53d1) { /* |x| ~<= 5*pi/4 */
1670 if (ix
<= 0x4016cbe3) { /* |x| ~<= 3pi/4 */
1672 return -__cosdf(x
+ s1pio2
);
1674 return __cosdf(x
- s1pio2
);
1676 return __sindf(sign
? -(x
+ s2pio2
) : -(x
- s2pio2
));
1678 if (ix
<= 0x40e231d5) { /* |x| ~<= 9*pi/4 */
1679 if (ix
<= 0x40afeddf) { /* |x| ~<= 7*pi/4 */
1681 return __cosdf(x
+ s3pio2
);
1683 return -__cosdf(x
- s3pio2
);
1685 return __sindf(sign
? x
+ s4pio2
: x
- s4pio2
);
1688 /* sin(Inf or NaN) is NaN */
1690 return math_error(_DOMAIN
, "sinf", x
, 0, x
- x
);
1691 if (ix
>= 0x7f800000)
1694 /* general argument reduction needed */
1695 n
= __rem_pio2f(x
, &y
);
1697 case 0: return __sindf(y
);
1698 case 1: return __cosdf(y
);
1699 case 2: return __sindf(-y
);
1700 default: return -__cosdf(y
);
1704 /*********************************************************************
1707 float CDECL
sinhf( float x
)
1709 UINT32 ui
= *(UINT32
*)&x
;
1717 absx
= *(float*)&ui
;
1719 /* |x| < log(FLT_MAX) */
1720 if (ui
< 0x42b17217) {
1722 if (ui
< 0x3f800000) {
1723 if (ui
< 0x3f800000 - (12 << 23))
1725 return h
* (2 * t
- t
* t
/ (t
+ 1));
1727 return h
* (t
+ t
/ (t
+ 1));
1730 /* |x| > logf(FLT_MAX) or nan */
1731 if (ui
> 0x7f800000)
1732 *(DWORD
*)&t
= *(DWORD
*)&x
| 0x400000;
1734 t
= __expo2f(absx
, 2 * h
);
1738 static BOOL
sqrtf_validate( float *x
)
1740 short c
= _fdclass(*x
);
1742 if (c
== FP_ZERO
) return FALSE
;
1743 if (c
== FP_NAN
) return FALSE
;
1746 *x
= math_error(_DOMAIN
, "sqrtf", *x
, 0, ret_nan(TRUE
));
1749 if (c
== FP_INFINITE
) return FALSE
;
1753 #if defined(__x86_64__) || defined(__i386__)
1754 float CDECL
sse2_sqrtf(float);
1755 __ASM_GLOBAL_FUNC( sse2_sqrtf
,
1756 "sqrtss %xmm0, %xmm0\n\t"
1760 /*********************************************************************
1763 * Copied from musl: src/math/sqrtf.c
1765 float CDECL
sqrtf( float x
)
1768 if (!sqrtf_validate(&x
))
1771 return sse2_sqrtf(x
);
1773 static const float tiny
= 1.0e-30;
1781 if (!sqrtf_validate(&x
))
1786 if (m
== 0) { /* subnormal x */
1787 for (i
= 0; (ix
& 0x00800000) == 0; i
++)
1791 m
-= 127; /* unbias exponent */
1792 ix
= (ix
& 0x007fffff) | 0x00800000;
1793 if (m
& 1) /* odd m, double x to make it even */
1795 m
>>= 1; /* m = [m/2] */
1797 /* generate sqrt(x) bit by bit */
1799 q
= s
= 0; /* q = sqrt(x) */
1800 r
= 0x01000000; /* r = moving bit from right to left */
1813 /* use floating add to find out rounding direction */
1815 z
= 1.0f
- tiny
; /* raise inexact flag */
1824 ix
= (q
>> 1) + 0x3f000000;
1825 r
= ix
+ ((unsigned int)m
<< 23);
1831 /* Copied from musl: src/math/__tandf.c */
1832 static float __tandf(double x
, int odd
)
1834 static const double T
[] = {
1835 0x15554d3418c99f.0p
-54,
1836 0x1112fd38999f72.0p
-55,
1837 0x1b54c91d865afe.0p
-57,
1838 0x191df3908c33ce.0p
-58,
1839 0x185dadfcecf44e.0p
-61,
1840 0x1362b9bf971bcd.0p
-59,
1843 double z
, r
, w
, s
, t
, u
;
1846 r
= T
[4] + z
* T
[5];
1847 t
= T
[2] + z
* T
[3];
1850 u
= T
[0] + z
* T
[1];
1851 r
= (x
+ s
* u
) + (s
* w
) * (t
+ w
* r
);
1852 return odd
? -1.0 / r
: r
;
1855 /*********************************************************************
1858 * Copied from musl: src/math/tanf.c
1860 float CDECL
tanf( float x
)
1862 static const double t1pio2
= 1*M_PI_2
,
1875 if (ix
<= 0x3f490fda) { /* |x| ~<= pi/4 */
1876 if (ix
< 0x39800000) { /* |x| < 2**-12 */
1877 /* raise inexact if x!=0 and underflow if subnormal */
1878 fp_barrierf(ix
< 0x00800000 ? x
/ 0x1p
120f
: x
+ 0x1p
120f
);
1881 return __tandf(x
, 0);
1883 if (ix
<= 0x407b53d1) { /* |x| ~<= 5*pi/4 */
1884 if (ix
<= 0x4016cbe3) /* |x| ~<= 3pi/4 */
1885 return __tandf((sign
? x
+ t1pio2
: x
- t1pio2
), 1);
1887 return __tandf((sign
? x
+ t2pio2
: x
- t2pio2
), 0);
1889 if (ix
<= 0x40e231d5) { /* |x| ~<= 9*pi/4 */
1890 if (ix
<= 0x40afeddf) /* |x| ~<= 7*pi/4 */
1891 return __tandf((sign
? x
+ t3pio2
: x
- t3pio2
), 1);
1893 return __tandf((sign
? x
+ t4pio2
: x
- t4pio2
), 0);
1896 /* tan(Inf or NaN) is NaN */
1898 return math_error(_DOMAIN
, "tanf", x
, 0, x
- x
);
1899 if (ix
>= 0x7f800000)
1902 /* argument reduction */
1903 n
= __rem_pio2f(x
, &y
);
1904 return __tandf(y
, n
& 1);
1907 /*********************************************************************
1910 float CDECL
tanhf( float x
)
1912 UINT32 ui
= *(UINT32
*)&x
;
1913 UINT32 sign
= ui
& 0x80000000;
1920 if (ui
> 0x3f0c9f54) {
1921 /* |x| > log(3)/2 ~= 0.5493 or nan */
1922 if (ui
> 0x41200000) {
1923 if (ui
> 0x7f800000) {
1924 *(UINT32
*)&x
= ui
| sign
| 0x400000;
1925 #if _MSVCR_VER < 140
1926 return math_error(_DOMAIN
, "tanhf", x
, 0, x
);
1932 fp_barrierf(x
+ 0x1p
120f
);
1935 t
= __expm1f(2 * x
);
1936 t
= 1 - 2 / (t
+ 2);
1938 } else if (ui
> 0x3e82c578) {
1939 /* |x| > log(5/3)/2 ~= 0.2554 */
1940 t
= __expm1f(2 * x
);
1942 } else if (ui
>= 0x00800000) {
1943 /* |x| >= 0x1p-126 */
1944 t
= __expm1f(-2 * x
);
1947 /* |x| is subnormal */
1951 return sign
? -t
: t
;
1954 /*********************************************************************
1957 * Copied from musl: src/math/ceilf.c
1959 float CDECL
ceilf( float x
)
1961 union {float f
; UINT32 i
;} u
= {x
};
1962 int e
= (int)(u
.i
>> 23 & 0xff) - 0x7f;
1968 m
= 0x007fffff >> e
;
1983 /*********************************************************************
1986 * Copied from musl: src/math/floorf.c
1988 float CDECL
floorf( float x
)
1990 union {float f
; UINT32 i
;} u
= {x
};
1991 int e
= (int)(u
.i
>> 23 & 0xff) - 0x7f;
1997 m
= 0x007fffff >> e
;
2014 /*********************************************************************
2017 * Copied from musl: src/math/acos.c
2019 static double acos_R(double z
)
2021 static const double pS0
= 1.66666666666666657415e-01,
2022 pS1
= -3.25565818622400915405e-01,
2023 pS2
= 2.01212532134862925881e-01,
2024 pS3
= -4.00555345006794114027e-02,
2025 pS4
= 7.91534994289814532176e-04,
2026 pS5
= 3.47933107596021167570e-05,
2027 qS1
= -2.40339491173441421878e+00,
2028 qS2
= 2.02094576023350569471e+00,
2029 qS3
= -6.88283971605453293030e-01,
2030 qS4
= 7.70381505559019352791e-02;
2033 p
= z
* (pS0
+ z
* (pS1
+ z
* (pS2
+ z
* (pS3
+ z
* (pS4
+ z
* pS5
)))));
2034 q
= 1.0 + z
* (qS1
+ z
* (qS2
+ z
* (qS3
+ z
* qS4
)));
2038 double CDECL
acos( double x
)
2040 static const double pio2_hi
= 1.57079632679489655800e+00,
2041 pio2_lo
= 6.12323399573676603587e-17;
2043 double z
, w
, s
, c
, df
;
2044 unsigned int hx
, ix
;
2047 hx
= *(ULONGLONG
*)&x
>> 32;
2048 ix
= hx
& 0x7fffffff;
2049 /* |x| >= 1 or nan */
2050 if (ix
>= 0x3ff00000) {
2053 lx
= *(ULONGLONG
*)&x
;
2054 if (((ix
- 0x3ff00000) | lx
) == 0) {
2055 /* acos(1)=0, acos(-1)=pi */
2057 return 2 * pio2_hi
+ 7.5231638452626401e-37;
2060 if (isnan(x
)) return x
;
2061 return math_error(_DOMAIN
, "acos", x
, 0, 0 / (x
- x
));
2064 if (ix
< 0x3fe00000) {
2065 if (ix
<= 0x3c600000) /* |x| < 2**-57 */
2066 return pio2_hi
+ 7.5231638452626401e-37;
2067 return pio2_hi
- (x
- (pio2_lo
- x
* acos_R(x
* x
)));
2071 z
= (1.0 + x
) * 0.5;
2073 w
= acos_R(z
) * s
- pio2_lo
;
2074 return 2 * (pio2_hi
- (s
+ w
));
2077 z
= (1.0 - x
) * 0.5;
2080 llx
= (*(ULONGLONG
*)&df
>> 32) << 32;
2081 df
= *(double*)&llx
;
2082 c
= (z
- df
* df
) / (s
+ df
);
2083 w
= acos_R(z
) * s
+ c
;
2084 return 2 * (df
+ w
);
2087 /*********************************************************************
2090 * Copied from musl: src/math/asin.c
2092 static double asin_R(double z
)
2094 /* coefficients for R(x^2) */
2095 static const double pS0
= 1.66666666666666657415e-01,
2096 pS1
= -3.25565818622400915405e-01,
2097 pS2
= 2.01212532134862925881e-01,
2098 pS3
= -4.00555345006794114027e-02,
2099 pS4
= 7.91534994289814532176e-04,
2100 pS5
= 3.47933107596021167570e-05,
2101 qS1
= -2.40339491173441421878e+00,
2102 qS2
= 2.02094576023350569471e+00,
2103 qS3
= -6.88283971605453293030e-01,
2104 qS4
= 7.70381505559019352791e-02;
2107 p
= z
* (pS0
+ z
* (pS1
+ z
* (pS2
+ z
* (pS3
+ z
* (pS4
+ z
* pS5
)))));
2108 q
= 1.0 + z
* (qS1
+ z
* (qS2
+ z
* (qS3
+ z
* qS4
)));
2113 double CDECL
x87_asin(double);
2114 __ASM_GLOBAL_FUNC( x87_asin
,
2129 double CDECL
asin( double x
)
2131 static const double pio2_hi
= 1.57079632679489655800e+00,
2132 pio2_lo
= 6.12323399573676603587e-17;
2135 unsigned int hx
, ix
;
2138 unsigned int x87_cw
, sse2_cw
;
2141 hx
= *(ULONGLONG
*)&x
>> 32;
2142 ix
= hx
& 0x7fffffff;
2143 /* |x| >= 1 or nan */
2144 if (ix
>= 0x3ff00000) {
2146 lx
= *(ULONGLONG
*)&x
;
2147 if (((ix
- 0x3ff00000) | lx
) == 0)
2148 /* asin(1) = +-pi/2 with inexact */
2149 return x
* pio2_hi
+ 7.5231638452626401e-37;
2153 return math_error(_DOMAIN
, "asin", x
, 0, x
);
2158 return math_error(_DOMAIN
, "asin", x
, 0, 0 / (x
- x
));
2162 __control87_2(0, 0, &x87_cw
, &sse2_cw
);
2163 if (!sse2_enabled
|| (x87_cw
& _MCW_EM
) != _MCW_EM
2164 || (sse2_cw
& (_MCW_EM
| _MCW_RC
)) != _MCW_EM
)
2169 if (ix
< 0x3fe00000) {
2170 /* if 0x1p-1022 <= |x| < 0x1p-26, avoid raising underflow */
2171 if (ix
< 0x3e500000 && ix
>= 0x00100000)
2173 return x
+ x
* asin_R(x
* x
);
2175 /* 1 > |x| >= 0.5 */
2176 z
= (1 - fabs(x
)) * 0.5;
2179 if (ix
>= 0x3fef3333) { /* if |x| > 0.975 */
2180 x
= pio2_hi
- (2 * (s
+ s
* r
) - pio2_lo
);
2185 llx
= (*(ULONGLONG
*)&f
>> 32) << 32;
2187 c
= (z
- f
* f
) / (s
+ f
);
2188 x
= 0.5 * pio2_hi
- (2 * s
* r
- (pio2_lo
- 2 * c
) - (0.5 * pio2_hi
- 2 * f
));
2195 /*********************************************************************
2198 * Copied from musl: src/math/atan.c
2200 double CDECL
atan( double x
)
2202 static const double atanhi
[] = {
2203 4.63647609000806093515e-01,
2204 7.85398163397448278999e-01,
2205 9.82793723247329054082e-01,
2206 1.57079632679489655800e+00,
2208 static const double atanlo
[] = {
2209 2.26987774529616870924e-17,
2210 3.06161699786838301793e-17,
2211 1.39033110312309984516e-17,
2212 6.12323399573676603587e-17,
2214 static const double aT
[] = {
2215 3.33333333333329318027e-01,
2216 -1.99999999998764832476e-01,
2217 1.42857142725034663711e-01,
2218 -1.11111104054623557880e-01,
2219 9.09088713343650656196e-02,
2220 -7.69187620504482999495e-02,
2221 6.66107313738753120669e-02,
2222 -5.83357013379057348645e-02,
2223 4.97687799461593236017e-02,
2224 -3.65315727442169155270e-02,
2225 1.62858201153657823623e-02,
2228 double w
, s1
, s2
, z
;
2229 unsigned int ix
, sign
;
2233 if (isnan(x
)) return math_error(_DOMAIN
, "atan", x
, 0, x
);
2236 ix
= *(ULONGLONG
*)&x
>> 32;
2239 if (ix
>= 0x44100000) { /* if |x| >= 2^66 */
2242 z
= atanhi
[3] + 7.5231638452626401e-37;
2243 return sign
? -z
: z
;
2245 if (ix
< 0x3fdc0000) { /* |x| < 0.4375 */
2246 if (ix
< 0x3e400000) { /* |x| < 2^-27 */
2247 if (ix
< 0x00100000)
2248 /* raise underflow for subnormal x */
2249 fp_barrierf((float)x
);
2255 if (ix
< 0x3ff30000) { /* |x| < 1.1875 */
2256 if (ix
< 0x3fe60000) { /* 7/16 <= |x| < 11/16 */
2258 x
= (2.0 * x
- 1.0) / (2.0 + x
);
2259 } else { /* 11/16 <= |x| < 19/16 */
2261 x
= (x
- 1.0) / (x
+ 1.0);
2264 if (ix
< 0x40038000) { /* |x| < 2.4375 */
2266 x
= (x
- 1.5) / (1.0 + 1.5 * x
);
2267 } else { /* 2.4375 <= |x| < 2^66 */
2273 /* end of argument reduction */
2276 /* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
2277 s1
= z
* (aT
[0] + w
* (aT
[2] + w
* (aT
[4] + w
* (aT
[6] + w
* (aT
[8] + w
* aT
[10])))));
2278 s2
= w
* (aT
[1] + w
* (aT
[3] + w
* (aT
[5] + w
* (aT
[7] + w
* aT
[9]))));
2280 return x
- x
* (s1
+ s2
);
2281 z
= atanhi
[id
] - (x
* (s1
+ s2
) - atanlo
[id
] - x
);
2282 return sign
? -z
: z
;
2285 /*********************************************************************
2288 * Copied from musl: src/math/atan2.c
2290 double CDECL
atan2( double y
, double x
)
2292 static const double pi
= 3.1415926535897931160E+00,
2293 pi_lo
= 1.2246467991473531772E-16;
2296 unsigned int m
, lx
, ly
, ix
, iy
;
2298 if (isnan(x
) || isnan(y
))
2300 ix
= *(ULONGLONG
*)&x
>> 32;
2301 lx
= *(ULONGLONG
*)&x
;
2302 iy
= *(ULONGLONG
*)&y
>> 32;
2303 ly
= *(ULONGLONG
*)&y
;
2304 if (((ix
- 0x3ff00000) | lx
) == 0) /* x = 1.0 */
2306 m
= ((iy
>> 31) & 1) | ((ix
>> 30) & 2); /* 2*sign(x)+sign(y) */
2307 ix
= ix
& 0x7fffffff;
2308 iy
= iy
& 0x7fffffff;
2311 if ((iy
| ly
) == 0) {
2314 case 1: return y
; /* atan(+-0,+anything)=+-0 */
2315 case 2: return pi
; /* atan(+0,-anything) = pi */
2316 case 3: return -pi
; /* atan(-0,-anything) =-pi */
2321 return m
& 1 ? -pi
/ 2 : pi
/ 2;
2323 if (ix
== 0x7ff00000) {
2324 if (iy
== 0x7ff00000) {
2326 case 0: return pi
/ 4; /* atan(+INF,+INF) */
2327 case 1: return -pi
/ 4; /* atan(-INF,+INF) */
2328 case 2: return 3 * pi
/ 4; /* atan(+INF,-INF) */
2329 case 3: return -3 * pi
/ 4; /* atan(-INF,-INF) */
2333 case 0: return 0.0; /* atan(+...,+INF) */
2334 case 1: return -0.0; /* atan(-...,+INF) */
2335 case 2: return pi
; /* atan(+...,-INF) */
2336 case 3: return -pi
; /* atan(-...,-INF) */
2340 /* |y/x| > 0x1p64 */
2341 if (ix
+ (64 << 20) < iy
|| iy
== 0x7ff00000)
2342 return m
& 1 ? -pi
/ 2 : pi
/ 2;
2344 /* z = atan(|y/x|) without spurious underflow */
2345 if ((m
& 2) && iy
+ (64 << 20) < ix
) /* |y/x| < 0x1p-64, x<0 */
2348 z
= atan(fabs(y
/ x
));
2350 case 0: return z
; /* atan(+,+) */
2351 case 1: return -z
; /* atan(-,+) */
2352 case 2: return pi
- (z
- pi_lo
); /* atan(+,-) */
2353 default: /* case 3 */
2354 return (z
- pi_lo
) - pi
; /* atan(-,-) */
2358 /* Copied from musl: src/math/rint.c */
2359 static double __rint(double x
)
2361 static const double toint
= 1 / DBL_EPSILON
;
2363 ULONGLONG llx
= *(ULONGLONG
*)&x
;
2364 int e
= llx
>> 52 & 0x7ff;
2371 cw
= _controlfp(0, 0);
2372 if ((cw
& _MCW_PC
) != _PC_53
)
2373 _controlfp(_PC_53
, _MCW_PC
);
2375 y
= fp_barrier(x
- toint
) + toint
;
2377 y
= fp_barrier(x
+ toint
) - toint
;
2378 if ((cw
& _MCW_PC
) != _PC_53
)
2379 _controlfp(cw
, _MCW_PC
);
2381 return s
? -0.0 : 0;
2385 /* Copied from musl: src/math/__rem_pio2.c */
2386 static int __rem_pio2(double x
, double *y
)
2388 static const double pio4
= 0x1.921fb54442d18p
-1,
2389 invpio2
= 6.36619772367581382433e-01,
2390 pio2_1
= 1.57079632673412561417e+00,
2391 pio2_1t
= 6.07710050650619224932e-11,
2392 pio2_2
= 6.07710050630396597660e-11,
2393 pio2_2t
= 2.02226624879595063154e-21,
2394 pio2_3
= 2.02226624871116645580e-21,
2395 pio2_3t
= 8.47842766036889956997e-32;
2397 union {double f
; UINT64 i
;} u
= {x
};
2398 double z
, w
, t
, r
, fn
, tx
[3], ty
[2];
2400 int sign
, n
, ex
, ey
, i
;
2403 ix
= u
.i
>> 32 & 0x7fffffff;
2404 if (ix
<= 0x400f6a7a) { /* |x| ~<= 5pi/4 */
2405 if ((ix
& 0xfffff) == 0x921fb) /* |x| ~= pi/2 or 2pi/2 */
2406 goto medium
; /* cancellation -- use medium case */
2407 if (ix
<= 0x4002d97c) { /* |x| ~<= 3pi/4 */
2409 z
= x
- pio2_1
; /* one round good to 85 bits */
2411 y
[1] = (z
- y
[0]) - pio2_1t
;
2416 y
[1] = (z
- y
[0]) + pio2_1t
;
2422 y
[0] = z
- 2 * pio2_1t
;
2423 y
[1] = (z
- y
[0]) - 2 * pio2_1t
;
2427 y
[0] = z
+ 2 * pio2_1t
;
2428 y
[1] = (z
- y
[0]) + 2 * pio2_1t
;
2433 if (ix
<= 0x401c463b) { /* |x| ~<= 9pi/4 */
2434 if (ix
<= 0x4015fdbc) { /* |x| ~<= 7pi/4 */
2435 if (ix
== 0x4012d97c) /* |x| ~= 3pi/2 */
2439 y
[0] = z
- 3 * pio2_1t
;
2440 y
[1] = (z
- y
[0]) - 3 * pio2_1t
;
2444 y
[0] = z
+ 3 * pio2_1t
;
2445 y
[1] = (z
- y
[0]) + 3 * pio2_1t
;
2449 if (ix
== 0x401921fb) /* |x| ~= 4pi/2 */
2453 y
[0] = z
- 4 * pio2_1t
;
2454 y
[1] = (z
- y
[0]) - 4 * pio2_1t
;
2458 y
[0] = z
+ 4 * pio2_1t
;
2459 y
[1] = (z
- y
[0]) + 4 * pio2_1t
;
2464 if (ix
< 0x413921fb) { /* |x| ~< 2^20*(pi/2), medium size */
2466 fn
= __rint(x
* invpio2
);
2468 r
= x
- fn
* pio2_1
;
2469 w
= fn
* pio2_1t
; /* 1st round, good to 85 bits */
2470 /* Matters with directed rounding. */
2471 if (r
- w
< -pio4
) {
2474 r
= x
- fn
* pio2_1
;
2476 } else if (r
- w
> pio4
) {
2479 r
= x
- fn
* pio2_1
;
2484 ey
= u
.i
>> 52 & 0x7ff;
2486 if (ex
- ey
> 16) { /* 2nd round, good to 118 bits */
2490 w
= fn
* pio2_2t
- ((t
- r
) - w
);
2493 ey
= u
.i
>> 52 & 0x7ff;
2494 if (ex
- ey
> 49) { /* 3rd round, good to 151 bits, covers all cases */
2498 w
= fn
* pio2_3t
- ((t
- r
) - w
);
2502 y
[1] = (r
- y
[0]) - w
;
2506 * all other (large) arguments
2508 if (ix
>= 0x7ff00000) { /* x is inf or NaN */
2509 y
[0] = y
[1] = x
- x
;
2512 /* set z = scalbn(|x|,-ilogb(x)+23) */
2514 u
.i
&= (UINT64
)-1 >> 12;
2515 u
.i
|= (UINT64
)(0x3ff + 23) << 52;
2517 for (i
= 0; i
< 2; i
++) {
2518 tx
[i
] = (double)(INT32
)z
;
2519 z
= (z
- tx
[i
]) * 0x1p
24;
2522 /* skip zero terms, first term is non-zero */
2523 while (tx
[i
] == 0.0)
2525 n
= __rem_pio2_large(tx
, ty
, (int)(ix
>> 20) - (0x3ff + 23), i
+ 1, 1);
2536 /* Copied from musl: src/math/__sin.c */
2537 static double __sin(double x
, double y
, int iy
)
2539 static const double S1
= -1.66666666666666324348e-01,
2540 S2
= 8.33333333332248946124e-03,
2541 S3
= -1.98412698298579493134e-04,
2542 S4
= 2.75573137070700676789e-06,
2543 S5
= -2.50507602534068634195e-08,
2544 S6
= 1.58969099521155010221e-10;
2550 r
= S2
+ z
* (S3
+ z
* S4
) + z
* w
* (S5
+ z
* S6
);
2553 return x
+ v
* (S1
+ z
* r
);
2555 return x
- ((z
* (0.5 * y
- v
* r
) - y
) - v
* S1
);
2558 /* Copied from musl: src/math/__cos.c */
2559 static double __cos(double x
, double y
)
2561 static const double C1
= 4.16666666666666019037e-02,
2562 C2
= -1.38888888888741095749e-03,
2563 C3
= 2.48015872894767294178e-05,
2564 C4
= -2.75573143513906633035e-07,
2565 C5
= 2.08757232129817482790e-09,
2566 C6
= -1.13596475577881948265e-11;
2571 r
= z
* (C1
+ z
* (C2
+ z
* C3
)) + w
* w
* (C4
+ z
* (C5
+ z
* C6
));
2574 return w
+ (((1.0 - w
) - hz
) + (z
* r
- x
* y
));
2577 /*********************************************************************
2580 * Copied from musl: src/math/cos.c
2582 double CDECL
cos( double x
)
2588 ix
= *(ULONGLONG
*)&x
>> 32;
2592 if (ix
<= 0x3fe921fb) {
2593 if (ix
< 0x3e46a09e) { /* |x| < 2**-27 * sqrt(2) */
2594 /* raise inexact if x!=0 */
2595 fp_barrier(x
+ 0x1p
120f
);
2601 /* cos(Inf or NaN) is NaN */
2602 if (isinf(x
)) return math_error(_DOMAIN
, "cos", x
, 0, x
- x
);
2603 if (ix
>= 0x7ff00000)
2606 /* argument reduction */
2607 n
= __rem_pio2(x
, y
);
2609 case 0: return __cos(y
[0], y
[1]);
2610 case 1: return -__sin(y
[0], y
[1], 1);
2611 case 2: return -__cos(y
[0], y
[1]);
2612 default: return __sin(y
[0], y
[1], 1);
2616 /* Copied from musl: src/math/expm1.c */
2617 static double __expm1(double x
)
2619 static const double o_threshold
= 7.09782712893383973096e+02,
2620 ln2_hi
= 6.93147180369123816490e-01,
2621 ln2_lo
= 1.90821492927058770002e-10,
2622 invln2
= 1.44269504088896338700e+00,
2623 Q1
= -3.33333333333331316428e-02,
2624 Q2
= 1.58730158725481460165e-03,
2625 Q3
= -7.93650757867487942473e-05,
2626 Q4
= 4.00821782732936239552e-06,
2627 Q5
= -2.01099218183624371326e-07;
2629 double y
, hi
, lo
, c
, t
, e
, hxs
, hfx
, r1
, twopk
;
2630 union {double f
; UINT64 i
;} u
= {x
};
2631 UINT32 hx
= u
.i
>> 32 & 0x7fffffff;
2632 int k
, sign
= u
.i
>> 63;
2634 /* filter out huge and non-finite argument */
2635 if (hx
>= 0x4043687A) { /* if |x|>=56*ln2 */
2639 return sign
? -1 : x
;
2641 return math_error(_UNDERFLOW
, "exp", x
, 0, -1);
2642 if (x
> o_threshold
)
2643 return math_error(_OVERFLOW
, "exp", x
, 0, x
* 0x1p
1023);
2646 /* argument reduction */
2647 if (hx
> 0x3fd62e42) { /* if |x| > 0.5 ln2 */
2648 if (hx
< 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */
2659 k
= invln2
* x
+ (sign
? -0.5 : 0.5);
2661 hi
= x
- t
* ln2_hi
; /* t*ln2_hi is exact here */
2666 } else if (hx
< 0x3c900000) { /* |x| < 2**-54, return x */
2667 fp_barrier(x
+ 0x1p
120f
);
2668 if (hx
< 0x00100000)
2669 fp_barrier((float)x
);
2674 /* x is now in primary range */
2677 r1
= 1.0 + hxs
* (Q1
+ hxs
* (Q2
+ hxs
* (Q3
+ hxs
* (Q4
+ hxs
* Q5
))));
2679 e
= hxs
* ((r1
- t
) / (6.0 - x
* t
));
2680 if (k
== 0) /* c is 0 */
2681 return x
- (x
* e
- hxs
);
2682 e
= x
* (e
- c
) - c
;
2684 /* exp(x) ~ 2^k (x_reduced - e + 1) */
2686 return 0.5 * (x
- e
) - 0.5;
2689 return -2.0 * (e
- (x
+ 0.5));
2690 return 1.0 + 2.0 * (x
- e
);
2692 u
.i
= (UINT64
)(0x3ff + k
) << 52; /* 2^k */
2694 if (k
< 0 || k
> 56) { /* suffice to return exp(x)-1 */
2697 y
= y
* 2.0 * 0x1p
1023;
2702 u
.i
= (UINT64
)(0x3ff - k
) << 52; /* 2^-k */
2704 y
= (x
- e
+ (1 - u
.f
)) * twopk
;
2706 y
= (x
- (e
+ u
.f
) + 1) * twopk
;
2710 static double __expo2(double x
, double sign
)
2712 static const int k
= 2043;
2713 static const double kln2
= 0x1.62066151add8bp
+10;
2716 *(UINT64
*)&scale
= (UINT64
)(0x3ff + k
/ 2) << 52;
2717 return exp(x
- kln2
) * (sign
* scale
) * scale
;
2720 /*********************************************************************
2723 * Copied from musl: src/math/cosh.c
2725 double CDECL
cosh( double x
)
2727 UINT64 ux
= *(UINT64
*)&x
;
2728 UINT64 sign
= ux
& 0x8000000000000000ULL
;
2733 ux
&= (uint64_t)-1 / 2;
2738 if (w
< 0x3fe62e42) {
2739 if (w
< 0x3ff00000 - (26 << 20)) {
2740 fp_barrier(x
+ 0x1p
120f
);
2744 return 1 + t
* t
/ (2 * (1 + t
));
2747 /* |x| < log(DBL_MAX) */
2748 if (w
< 0x40862e42) {
2750 /* note: if x>log(0x1p26) then the 1/t is not needed */
2751 return 0.5 * (t
+ 1 / t
);
2754 /* |x| > log(DBL_MAX) or nan */
2755 /* note: the result is stored to handle overflow */
2756 if (ux
> 0x7ff0000000000000ULL
)
2757 *(UINT64
*)&t
= ux
| sign
| 0x0008000000000000ULL
;
2759 t
= __expo2(x
, 1.0);
2763 /* Copied from musl: src/math/exp_data.c */
2764 static const UINT64 exp_T
[] = {
2765 0x0ULL
, 0x3ff0000000000000ULL
,
2766 0x3c9b3b4f1a88bf6eULL
, 0x3feff63da9fb3335ULL
,
2767 0xbc7160139cd8dc5dULL
, 0x3fefec9a3e778061ULL
,
2768 0xbc905e7a108766d1ULL
, 0x3fefe315e86e7f85ULL
,
2769 0x3c8cd2523567f613ULL
, 0x3fefd9b0d3158574ULL
,
2770 0xbc8bce8023f98efaULL
, 0x3fefd06b29ddf6deULL
,
2771 0x3c60f74e61e6c861ULL
, 0x3fefc74518759bc8ULL
,
2772 0x3c90a3e45b33d399ULL
, 0x3fefbe3ecac6f383ULL
,
2773 0x3c979aa65d837b6dULL
, 0x3fefb5586cf9890fULL
,
2774 0x3c8eb51a92fdeffcULL
, 0x3fefac922b7247f7ULL
,
2775 0x3c3ebe3d702f9cd1ULL
, 0x3fefa3ec32d3d1a2ULL
,
2776 0xbc6a033489906e0bULL
, 0x3fef9b66affed31bULL
,
2777 0xbc9556522a2fbd0eULL
, 0x3fef9301d0125b51ULL
,
2778 0xbc5080ef8c4eea55ULL
, 0x3fef8abdc06c31ccULL
,
2779 0xbc91c923b9d5f416ULL
, 0x3fef829aaea92de0ULL
,
2780 0x3c80d3e3e95c55afULL
, 0x3fef7a98c8a58e51ULL
,
2781 0xbc801b15eaa59348ULL
, 0x3fef72b83c7d517bULL
,
2782 0xbc8f1ff055de323dULL
, 0x3fef6af9388c8deaULL
,
2783 0x3c8b898c3f1353bfULL
, 0x3fef635beb6fcb75ULL
,
2784 0xbc96d99c7611eb26ULL
, 0x3fef5be084045cd4ULL
,
2785 0x3c9aecf73e3a2f60ULL
, 0x3fef54873168b9aaULL
,
2786 0xbc8fe782cb86389dULL
, 0x3fef4d5022fcd91dULL
,
2787 0x3c8a6f4144a6c38dULL
, 0x3fef463b88628cd6ULL
,
2788 0x3c807a05b0e4047dULL
, 0x3fef3f49917ddc96ULL
,
2789 0x3c968efde3a8a894ULL
, 0x3fef387a6e756238ULL
,
2790 0x3c875e18f274487dULL
, 0x3fef31ce4fb2a63fULL
,
2791 0x3c80472b981fe7f2ULL
, 0x3fef2b4565e27cddULL
,
2792 0xbc96b87b3f71085eULL
, 0x3fef24dfe1f56381ULL
,
2793 0x3c82f7e16d09ab31ULL
, 0x3fef1e9df51fdee1ULL
,
2794 0xbc3d219b1a6fbffaULL
, 0x3fef187fd0dad990ULL
,
2795 0x3c8b3782720c0ab4ULL
, 0x3fef1285a6e4030bULL
,
2796 0x3c6e149289cecb8fULL
, 0x3fef0cafa93e2f56ULL
,
2797 0x3c834d754db0abb6ULL
, 0x3fef06fe0a31b715ULL
,
2798 0x3c864201e2ac744cULL
, 0x3fef0170fc4cd831ULL
,
2799 0x3c8fdd395dd3f84aULL
, 0x3feefc08b26416ffULL
,
2800 0xbc86a3803b8e5b04ULL
, 0x3feef6c55f929ff1ULL
,
2801 0xbc924aedcc4b5068ULL
, 0x3feef1a7373aa9cbULL
,
2802 0xbc9907f81b512d8eULL
, 0x3feeecae6d05d866ULL
,
2803 0xbc71d1e83e9436d2ULL
, 0x3feee7db34e59ff7ULL
,
2804 0xbc991919b3ce1b15ULL
, 0x3feee32dc313a8e5ULL
,
2805 0x3c859f48a72a4c6dULL
, 0x3feedea64c123422ULL
,
2806 0xbc9312607a28698aULL
, 0x3feeda4504ac801cULL
,
2807 0xbc58a78f4817895bULL
, 0x3feed60a21f72e2aULL
,
2808 0xbc7c2c9b67499a1bULL
, 0x3feed1f5d950a897ULL
,
2809 0x3c4363ed60c2ac11ULL
, 0x3feece086061892dULL
,
2810 0x3c9666093b0664efULL
, 0x3feeca41ed1d0057ULL
,
2811 0x3c6ecce1daa10379ULL
, 0x3feec6a2b5c13cd0ULL
,
2812 0x3c93ff8e3f0f1230ULL
, 0x3feec32af0d7d3deULL
,
2813 0x3c7690cebb7aafb0ULL
, 0x3feebfdad5362a27ULL
,
2814 0x3c931dbdeb54e077ULL
, 0x3feebcb299fddd0dULL
,
2815 0xbc8f94340071a38eULL
, 0x3feeb9b2769d2ca7ULL
,
2816 0xbc87deccdc93a349ULL
, 0x3feeb6daa2cf6642ULL
,
2817 0xbc78dec6bd0f385fULL
, 0x3feeb42b569d4f82ULL
,
2818 0xbc861246ec7b5cf6ULL
, 0x3feeb1a4ca5d920fULL
,
2819 0x3c93350518fdd78eULL
, 0x3feeaf4736b527daULL
,
2820 0x3c7b98b72f8a9b05ULL
, 0x3feead12d497c7fdULL
,
2821 0x3c9063e1e21c5409ULL
, 0x3feeab07dd485429ULL
,
2822 0x3c34c7855019c6eaULL
, 0x3feea9268a5946b7ULL
,
2823 0x3c9432e62b64c035ULL
, 0x3feea76f15ad2148ULL
,
2824 0xbc8ce44a6199769fULL
, 0x3feea5e1b976dc09ULL
,
2825 0xbc8c33c53bef4da8ULL
, 0x3feea47eb03a5585ULL
,
2826 0xbc845378892be9aeULL
, 0x3feea34634ccc320ULL
,
2827 0xbc93cedd78565858ULL
, 0x3feea23882552225ULL
,
2828 0x3c5710aa807e1964ULL
, 0x3feea155d44ca973ULL
,
2829 0xbc93b3efbf5e2228ULL
, 0x3feea09e667f3bcdULL
,
2830 0xbc6a12ad8734b982ULL
, 0x3feea012750bdabfULL
,
2831 0xbc6367efb86da9eeULL
, 0x3fee9fb23c651a2fULL
,
2832 0xbc80dc3d54e08851ULL
, 0x3fee9f7df9519484ULL
,
2833 0xbc781f647e5a3ecfULL
, 0x3fee9f75e8ec5f74ULL
,
2834 0xbc86ee4ac08b7db0ULL
, 0x3fee9f9a48a58174ULL
,
2835 0xbc8619321e55e68aULL
, 0x3fee9feb564267c9ULL
,
2836 0x3c909ccb5e09d4d3ULL
, 0x3feea0694fde5d3fULL
,
2837 0xbc7b32dcb94da51dULL
, 0x3feea11473eb0187ULL
,
2838 0x3c94ecfd5467c06bULL
, 0x3feea1ed0130c132ULL
,
2839 0x3c65ebe1abd66c55ULL
, 0x3feea2f336cf4e62ULL
,
2840 0xbc88a1c52fb3cf42ULL
, 0x3feea427543e1a12ULL
,
2841 0xbc9369b6f13b3734ULL
, 0x3feea589994cce13ULL
,
2842 0xbc805e843a19ff1eULL
, 0x3feea71a4623c7adULL
,
2843 0xbc94d450d872576eULL
, 0x3feea8d99b4492edULL
,
2844 0x3c90ad675b0e8a00ULL
, 0x3feeaac7d98a6699ULL
,
2845 0x3c8db72fc1f0eab4ULL
, 0x3feeace5422aa0dbULL
,
2846 0xbc65b6609cc5e7ffULL
, 0x3feeaf3216b5448cULL
,
2847 0x3c7bf68359f35f44ULL
, 0x3feeb1ae99157736ULL
,
2848 0xbc93091fa71e3d83ULL
, 0x3feeb45b0b91ffc6ULL
,
2849 0xbc5da9b88b6c1e29ULL
, 0x3feeb737b0cdc5e5ULL
,
2850 0xbc6c23f97c90b959ULL
, 0x3feeba44cbc8520fULL
,
2851 0xbc92434322f4f9aaULL
, 0x3feebd829fde4e50ULL
,
2852 0xbc85ca6cd7668e4bULL
, 0x3feec0f170ca07baULL
,
2853 0x3c71affc2b91ce27ULL
, 0x3feec49182a3f090ULL
,
2854 0x3c6dd235e10a73bbULL
, 0x3feec86319e32323ULL
,
2855 0xbc87c50422622263ULL
, 0x3feecc667b5de565ULL
,
2856 0x3c8b1c86e3e231d5ULL
, 0x3feed09bec4a2d33ULL
,
2857 0xbc91bbd1d3bcbb15ULL
, 0x3feed503b23e255dULL
,
2858 0x3c90cc319cee31d2ULL
, 0x3feed99e1330b358ULL
,
2859 0x3c8469846e735ab3ULL
, 0x3feede6b5579fdbfULL
,
2860 0xbc82dfcd978e9db4ULL
, 0x3feee36bbfd3f37aULL
,
2861 0x3c8c1a7792cb3387ULL
, 0x3feee89f995ad3adULL
,
2862 0xbc907b8f4ad1d9faULL
, 0x3feeee07298db666ULL
,
2863 0xbc55c3d956dcaebaULL
, 0x3feef3a2b84f15fbULL
,
2864 0xbc90a40e3da6f640ULL
, 0x3feef9728de5593aULL
,
2865 0xbc68d6f438ad9334ULL
, 0x3feeff76f2fb5e47ULL
,
2866 0xbc91eee26b588a35ULL
, 0x3fef05b030a1064aULL
,
2867 0x3c74ffd70a5fddcdULL
, 0x3fef0c1e904bc1d2ULL
,
2868 0xbc91bdfbfa9298acULL
, 0x3fef12c25bd71e09ULL
,
2869 0x3c736eae30af0cb3ULL
, 0x3fef199bdd85529cULL
,
2870 0x3c8ee3325c9ffd94ULL
, 0x3fef20ab5fffd07aULL
,
2871 0x3c84e08fd10959acULL
, 0x3fef27f12e57d14bULL
,
2872 0x3c63cdaf384e1a67ULL
, 0x3fef2f6d9406e7b5ULL
,
2873 0x3c676b2c6c921968ULL
, 0x3fef3720dcef9069ULL
,
2874 0xbc808a1883ccb5d2ULL
, 0x3fef3f0b555dc3faULL
,
2875 0xbc8fad5d3ffffa6fULL
, 0x3fef472d4a07897cULL
,
2876 0xbc900dae3875a949ULL
, 0x3fef4f87080d89f2ULL
,
2877 0x3c74a385a63d07a7ULL
, 0x3fef5818dcfba487ULL
,
2878 0xbc82919e2040220fULL
, 0x3fef60e316c98398ULL
,
2879 0x3c8e5a50d5c192acULL
, 0x3fef69e603db3285ULL
,
2880 0x3c843a59ac016b4bULL
, 0x3fef7321f301b460ULL
,
2881 0xbc82d52107b43e1fULL
, 0x3fef7c97337b9b5fULL
,
2882 0xbc892ab93b470dc9ULL
, 0x3fef864614f5a129ULL
,
2883 0x3c74b604603a88d3ULL
, 0x3fef902ee78b3ff6ULL
,
2884 0x3c83c5ec519d7271ULL
, 0x3fef9a51fbc74c83ULL
,
2885 0xbc8ff7128fd391f0ULL
, 0x3fefa4afa2a490daULL
,
2886 0xbc8dae98e223747dULL
, 0x3fefaf482d8e67f1ULL
,
2887 0x3c8ec3bc41aa2008ULL
, 0x3fefba1bee615a27ULL
,
2888 0x3c842b94c3a9eb32ULL
, 0x3fefc52b376bba97ULL
,
2889 0x3c8a64a931d185eeULL
, 0x3fefd0765b6e4540ULL
,
2890 0xbc8e37bae43be3edULL
, 0x3fefdbfdad9cbe14ULL
,
2891 0x3c77893b4d91cd9dULL
, 0x3fefe7c1819e90d8ULL
,
2892 0x3c5305c14160cc89ULL
, 0x3feff3c22b8f71f1ULL
2895 /*********************************************************************
2898 * Copied from musl: src/math/exp.c
2900 double CDECL
exp( double x
)
2902 static const double C
[] = {
2903 0x1.ffffffffffdbdp
-2,
2904 0x1.555555555543cp
-3,
2905 0x1.55555cf172b91p
-5,
2906 0x1.1111167a4d017p
-7
2908 static const double invln2N
= 0x1.71547652b82fep0
* (1 << 7),
2909 negln2hiN
= -0x1.62e42fefa0000p
-8,
2910 negln2loN
= -0x1.cf79abc9e3b3ap
-47;
2913 UINT64 ki
, idx
, top
, sbits
;
2914 double kd
, z
, r
, r2
, scale
, tail
, tmp
;
2916 abstop
= (*(UINT64
*)&x
>> 52) & 0x7ff;
2917 if (abstop
- 0x3c9 >= 0x408 - 0x3c9) {
2918 if (abstop
- 0x3c9 >= 0x80000000)
2919 /* Avoid spurious underflow for tiny x. */
2920 /* Note: 0 is common input. */
2922 if (abstop
>= 0x409) {
2923 if (*(UINT64
*)&x
== 0xfff0000000000000ULL
)
2926 if (*(UINT64
*)&x
> 0x7ff0000000000000ULL
)
2927 return math_error(_DOMAIN
, "exp", x
, 0, 1.0 + x
);
2929 if (abstop
>= 0x7ff)
2931 if (*(UINT64
*)&x
>> 63)
2932 return math_error(_UNDERFLOW
, "exp", x
, 0, fp_barrier(DBL_MIN
) * DBL_MIN
);
2934 return math_error(_OVERFLOW
, "exp", x
, 0, fp_barrier(DBL_MAX
) * DBL_MAX
);
2936 /* Large x is special cased below. */
2940 /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
2941 /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
2946 r
= x
+ kd
* negln2hiN
+ kd
* negln2loN
;
2947 /* 2^(k/N) ~= scale * (1 + tail). */
2948 idx
= 2 * (ki
% (1 << 7));
2949 top
= ki
<< (52 - 7);
2950 tail
= *(double*)&exp_T
[idx
];
2951 /* This is only a valid scale when -1023*N < k < 1024*N. */
2952 sbits
= exp_T
[idx
+ 1] + top
;
2953 /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
2954 /* Evaluation is optimized assuming superscalar pipelined execution. */
2956 /* Without fma the worst case error is 0.25/N ulp larger. */
2957 /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
2958 tmp
= tail
+ r
+ r2
* (C
[0] + r
* C
[1]) + r2
* r2
* (C
[2] + r
* C
[3]);
2960 /* Handle cases that may overflow or underflow when computing the result that
2961 is scale*(1+TMP) without intermediate rounding. The bit representation of
2962 scale is in SBITS, however it has a computed exponent that may have
2963 overflown into the sign bit so that needs to be adjusted before using it as
2964 a double. (int32_t)KI is the k used in the argument reduction and exponent
2965 adjustment of scale, positive k here means the result may overflow and
2966 negative k means the result may underflow. */
2969 if ((ki
& 0x80000000) == 0) {
2970 /* k > 0, the exponent of scale might have overflowed by <= 460. */
2971 sbits
-= 1009ull << 52;
2972 scale
= *(double*)&sbits
;
2973 y
= 0x1p
1009 * (scale
+ scale
* tmp
);
2975 return math_error(_OVERFLOW
, "exp", x
, 0, y
);
2978 /* k < 0, need special care in the subnormal range. */
2979 sbits
+= 1022ull << 52;
2980 scale
= *(double*)&sbits
;
2981 y
= scale
+ scale
* tmp
;
2983 /* Round y to the right precision before scaling it into the subnormal
2984 range to avoid double rounding that can cause 0.5+E/2 ulp error where
2985 E is the worst-case ulp error outside the subnormal range. So this
2986 is only useful if the goal is better than 1 ulp worst-case error. */
2988 lo
= scale
- y
+ scale
* tmp
;
2990 lo
= 1.0 - hi
+ y
+ lo
;
2992 /* Avoid -0.0 with downward rounding. */
2995 /* The underflow exception needs to be signaled explicitly. */
2996 fp_barrier(fp_barrier(0x1p
-1022) * 0x1p
-1022);
2998 return math_error(_UNDERFLOW
, "exp", x
, 0, y
);
3003 scale
= *(double*)&sbits
;
3004 /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
3005 is no spurious underflow here even without fma. */
3006 return scale
+ scale
* tmp
;
3009 /*********************************************************************
3012 * Copied from musl: src/math/fmod.c
3014 double CDECL
fmod( double x
, double y
)
3016 UINT64 xi
= *(UINT64
*)&x
;
3017 UINT64 yi
= *(UINT64
*)&y
;
3018 int ex
= xi
>> 52 & 0x7ff;
3019 int ey
= yi
>> 52 & 0x7ff;
3023 if (isinf(x
)) return math_error(_DOMAIN
, "fmod", x
, y
, (x
* y
) / (x
* y
));
3024 if (yi
<< 1 == 0 || isnan(y
) || ex
== 0x7ff)
3025 return (x
* y
) / (x
* y
);
3026 if (xi
<< 1 <= yi
<< 1) {
3027 if (xi
<< 1 == yi
<< 1)
3032 /* normalize x and y */
3034 for (i
= xi
<< 12; i
>> 63 == 0; ex
--, i
<<= 1);
3041 for (i
= yi
<< 12; i
>> 63 == 0; ey
--, i
<<= 1);
3049 for (; ex
> ey
; ex
--) {
3064 for (; xi
>> 52 == 0; xi
<<= 1, ex
--);
3069 xi
|= (UINT64
)ex
<< 52;
3073 xi
|= (UINT64
)sx
<< 63;
3074 return *(double*)&xi
;
3077 /*********************************************************************
3080 * Copied from musl: src/math/log.c src/math/log_data.c
3082 double CDECL
log( double x
)
3084 static const double Ln2hi
= 0x1.62e42fefa3800p
-1,
3085 Ln2lo
= 0x1.ef35793c76730p
-45;
3086 static const double A
[] = {
3087 -0x1.0000000000001p
-1,
3088 0x1.555555551305bp
-2,
3089 -0x1.fffffffeb459p
-3,
3090 0x1.999b324f10111p
-3,
3091 -0x1.55575e506c89fp
-3
3093 static const double B
[] = {
3095 0x1.5555555555577p
-2,
3096 -0x1.ffffffffffdcbp
-3,
3097 0x1.999999995dd0cp
-3,
3098 -0x1.55555556745a7p
-3,
3099 0x1.24924a344de3p
-3,
3100 -0x1.fffffa4423d65p
-4,
3101 0x1.c7184282ad6cap
-4,
3102 -0x1.999eb43b068ffp
-4,
3103 0x1.78182f7afd085p
-4,
3104 -0x1.5521375d145cdp
-4
3106 static const struct {
3109 {0x1.734f0c3e0de9fp
+0, -0x1.7cc7f79e69000p
-2},
3110 {0x1.713786a2ce91fp
+0, -0x1.76feec20d0000p
-2},
3111 {0x1.6f26008fab5a0p
+0, -0x1.713e31351e000p
-2},
3112 {0x1.6d1a61f138c7dp
+0, -0x1.6b85b38287800p
-2},
3113 {0x1.6b1490bc5b4d1p
+0, -0x1.65d5590807800p
-2},
3114 {0x1.69147332f0cbap
+0, -0x1.602d076180000p
-2},
3115 {0x1.6719f18224223p
+0, -0x1.5a8ca86909000p
-2},
3116 {0x1.6524f99a51ed9p
+0, -0x1.54f4356035000p
-2},
3117 {0x1.63356aa8f24c4p
+0, -0x1.4f637c36b4000p
-2},
3118 {0x1.614b36b9ddc14p
+0, -0x1.49da7fda85000p
-2},
3119 {0x1.5f66452c65c4cp
+0, -0x1.445923989a800p
-2},
3120 {0x1.5d867b5912c4fp
+0, -0x1.3edf439b0b800p
-2},
3121 {0x1.5babccb5b90dep
+0, -0x1.396ce448f7000p
-2},
3122 {0x1.59d61f2d91a78p
+0, -0x1.3401e17bda000p
-2},
3123 {0x1.5805612465687p
+0, -0x1.2e9e2ef468000p
-2},
3124 {0x1.56397cee76bd3p
+0, -0x1.2941b3830e000p
-2},
3125 {0x1.54725e2a77f93p
+0, -0x1.23ec58cda8800p
-2},
3126 {0x1.52aff42064583p
+0, -0x1.1e9e129279000p
-2},
3127 {0x1.50f22dbb2bddfp
+0, -0x1.1956d2b48f800p
-2},
3128 {0x1.4f38f4734ded7p
+0, -0x1.141679ab9f800p
-2},
3129 {0x1.4d843cfde2840p
+0, -0x1.0edd094ef9800p
-2},
3130 {0x1.4bd3ec078a3c8p
+0, -0x1.09aa518db1000p
-2},
3131 {0x1.4a27fc3e0258ap
+0, -0x1.047e65263b800p
-2},
3132 {0x1.4880524d48434p
+0, -0x1.feb224586f000p
-3},
3133 {0x1.46dce1b192d0bp
+0, -0x1.f474a7517b000p
-3},
3134 {0x1.453d9d3391854p
+0, -0x1.ea4443d103000p
-3},
3135 {0x1.43a2744b4845ap
+0, -0x1.e020d44e9b000p
-3},
3136 {0x1.420b54115f8fbp
+0, -0x1.d60a22977f000p
-3},
3137 {0x1.40782da3ef4b1p
+0, -0x1.cc00104959000p
-3},
3138 {0x1.3ee8f5d57fe8fp
+0, -0x1.c202956891000p
-3},
3139 {0x1.3d5d9a00b4ce9p
+0, -0x1.b81178d811000p
-3},
3140 {0x1.3bd60c010c12bp
+0, -0x1.ae2c9ccd3d000p
-3},
3141 {0x1.3a5242b75dab8p
+0, -0x1.a45402e129000p
-3},
3142 {0x1.38d22cd9fd002p
+0, -0x1.9a877681df000p
-3},
3143 {0x1.3755bc5847a1cp
+0, -0x1.90c6d69483000p
-3},
3144 {0x1.35dce49ad36e2p
+0, -0x1.87120a645c000p
-3},
3145 {0x1.34679984dd440p
+0, -0x1.7d68fb4143000p
-3},
3146 {0x1.32f5cceffcb24p
+0, -0x1.73cb83c627000p
-3},
3147 {0x1.3187775a10d49p
+0, -0x1.6a39a9b376000p
-3},
3148 {0x1.301c8373e3990p
+0, -0x1.60b3154b7a000p
-3},
3149 {0x1.2eb4ebb95f841p
+0, -0x1.5737d76243000p
-3},
3150 {0x1.2d50a0219a9d1p
+0, -0x1.4dc7b8fc23000p
-3},
3151 {0x1.2bef9a8b7fd2ap
+0, -0x1.4462c51d20000p
-3},
3152 {0x1.2a91c7a0c1babp
+0, -0x1.3b08abc830000p
-3},
3153 {0x1.293726014b530p
+0, -0x1.31b996b490000p
-3},
3154 {0x1.27dfa5757a1f5p
+0, -0x1.2875490a44000p
-3},
3155 {0x1.268b39b1d3bbfp
+0, -0x1.1f3b9f879a000p
-3},
3156 {0x1.2539d838ff5bdp
+0, -0x1.160c8252ca000p
-3},
3157 {0x1.23eb7aac9083bp
+0, -0x1.0ce7f57f72000p
-3},
3158 {0x1.22a012ba940b6p
+0, -0x1.03cdc49fea000p
-3},
3159 {0x1.2157996cc4132p
+0, -0x1.f57bdbc4b8000p
-4},
3160 {0x1.201201dd2fc9bp
+0, -0x1.e370896404000p
-4},
3161 {0x1.1ecf4494d480bp
+0, -0x1.d17983ef94000p
-4},
3162 {0x1.1d8f5528f6569p
+0, -0x1.bf9674ed8a000p
-4},
3163 {0x1.1c52311577e7cp
+0, -0x1.adc79202f6000p
-4},
3164 {0x1.1b17c74cb26e9p
+0, -0x1.9c0c3e7288000p
-4},
3165 {0x1.19e010c2c1ab6p
+0, -0x1.8a646b372c000p
-4},
3166 {0x1.18ab07bb670bdp
+0, -0x1.78d01b3ac0000p
-4},
3167 {0x1.1778a25efbcb6p
+0, -0x1.674f145380000p
-4},
3168 {0x1.1648d354c31dap
+0, -0x1.55e0e6d878000p
-4},
3169 {0x1.151b990275fddp
+0, -0x1.4485cdea1e000p
-4},
3170 {0x1.13f0ea432d24cp
+0, -0x1.333d94d6aa000p
-4},
3171 {0x1.12c8b7210f9dap
+0, -0x1.22079f8c56000p
-4},
3172 {0x1.11a3028ecb531p
+0, -0x1.10e4698622000p
-4},
3173 {0x1.107fbda8434afp
+0, -0x1.ffa6c6ad20000p
-5},
3174 {0x1.0f5ee0f4e6bb3p
+0, -0x1.dda8d4a774000p
-5},
3175 {0x1.0e4065d2a9fcep
+0, -0x1.bbcece4850000p
-5},
3176 {0x1.0d244632ca521p
+0, -0x1.9a1894012c000p
-5},
3177 {0x1.0c0a77ce2981ap
+0, -0x1.788583302c000p
-5},
3178 {0x1.0af2f83c636d1p
+0, -0x1.5715e67d68000p
-5},
3179 {0x1.09ddb98a01339p
+0, -0x1.35c8a49658000p
-5},
3180 {0x1.08cabaf52e7dfp
+0, -0x1.149e364154000p
-5},
3181 {0x1.07b9f2f4e28fbp
+0, -0x1.e72c082eb8000p
-6},
3182 {0x1.06ab58c358f19p
+0, -0x1.a55f152528000p
-6},
3183 {0x1.059eea5ecf92cp
+0, -0x1.63d62cf818000p
-6},
3184 {0x1.04949cdd12c90p
+0, -0x1.228fb8caa0000p
-6},
3185 {0x1.038c6c6f0ada9p
+0, -0x1.c317b20f90000p
-7},
3186 {0x1.02865137932a9p
+0, -0x1.419355daa0000p
-7},
3187 {0x1.0182427ea7348p
+0, -0x1.81203c2ec0000p
-8},
3188 {0x1.008040614b195p
+0, -0x1.0040979240000p
-9},
3189 {0x1.fe01ff726fa1ap
-1, 0x1.feff384900000p
-9},
3190 {0x1.fa11cc261ea74p
-1, 0x1.7dc41353d0000p
-7},
3191 {0x1.f6310b081992ep
-1, 0x1.3cea3c4c28000p
-6},
3192 {0x1.f25f63ceeadcdp
-1, 0x1.b9fc114890000p
-6},
3193 {0x1.ee9c8039113e7p
-1, 0x1.1b0d8ce110000p
-5},
3194 {0x1.eae8078cbb1abp
-1, 0x1.58a5bd001c000p
-5},
3195 {0x1.e741aa29d0c9bp
-1, 0x1.95c8340d88000p
-5},
3196 {0x1.e3a91830a99b5p
-1, 0x1.d276aef578000p
-5},
3197 {0x1.e01e009609a56p
-1, 0x1.07598e598c000p
-4},
3198 {0x1.dca01e577bb98p
-1, 0x1.253f5e30d2000p
-4},
3199 {0x1.d92f20b7c9103p
-1, 0x1.42edd8b380000p
-4},
3200 {0x1.d5cac66fb5ccep
-1, 0x1.606598757c000p
-4},
3201 {0x1.d272caa5ede9dp
-1, 0x1.7da76356a0000p
-4},
3202 {0x1.cf26e3e6b2ccdp
-1, 0x1.9ab434e1c6000p
-4},
3203 {0x1.cbe6da2a77902p
-1, 0x1.b78c7bb0d6000p
-4},
3204 {0x1.c8b266d37086dp
-1, 0x1.d431332e72000p
-4},
3205 {0x1.c5894bd5d5804p
-1, 0x1.f0a3171de6000p
-4},
3206 {0x1.c26b533bb9f8cp
-1, 0x1.067152b914000p
-3},
3207 {0x1.bf583eeece73fp
-1, 0x1.147858292b000p
-3},
3208 {0x1.bc4fd75db96c1p
-1, 0x1.2266ecdca3000p
-3},
3209 {0x1.b951e0c864a28p
-1, 0x1.303d7a6c55000p
-3},
3210 {0x1.b65e2c5ef3e2cp
-1, 0x1.3dfc33c331000p
-3},
3211 {0x1.b374867c9888bp
-1, 0x1.4ba366b7a8000p
-3},
3212 {0x1.b094b211d304ap
-1, 0x1.5933928d1f000p
-3},
3213 {0x1.adbe885f2ef7ep
-1, 0x1.66acd2418f000p
-3},
3214 {0x1.aaf1d31603da2p
-1, 0x1.740f8ec669000p
-3},
3215 {0x1.a82e63fd358a7p
-1, 0x1.815c0f51af000p
-3},
3216 {0x1.a5740ef09738bp
-1, 0x1.8e92954f68000p
-3},
3217 {0x1.a2c2a90ab4b27p
-1, 0x1.9bb3602f84000p
-3},
3218 {0x1.a01a01393f2d1p
-1, 0x1.a8bed1c2c0000p
-3},
3219 {0x1.9d79f24db3c1bp
-1, 0x1.b5b515c01d000p
-3},
3220 {0x1.9ae2505c7b190p
-1, 0x1.c2967ccbcc000p
-3},
3221 {0x1.9852ef297ce2fp
-1, 0x1.cf635d5486000p
-3},
3222 {0x1.95cbaeea44b75p
-1, 0x1.dc1bd3446c000p
-3},
3223 {0x1.934c69de74838p
-1, 0x1.e8c01b8cfe000p
-3},
3224 {0x1.90d4f2f6752e6p
-1, 0x1.f5509c0179000p
-3},
3225 {0x1.8e6528effd79dp
-1, 0x1.00e6c121fb800p
-2},
3226 {0x1.8bfce9fcc007cp
-1, 0x1.071b80e93d000p
-2},
3227 {0x1.899c0dabec30ep
-1, 0x1.0d46b9e867000p
-2},
3228 {0x1.87427aa2317fbp
-1, 0x1.13687334bd000p
-2},
3229 {0x1.84f00acb39a08p
-1, 0x1.1980d67234800p
-2},
3230 {0x1.82a49e8653e55p
-1, 0x1.1f8ffe0cc8000p
-2},
3231 {0x1.8060195f40260p
-1, 0x1.2595fd7636800p
-2},
3232 {0x1.7e22563e0a329p
-1, 0x1.2b9300914a800p
-2},
3233 {0x1.7beb377dcb5adp
-1, 0x1.3187210436000p
-2},
3234 {0x1.79baa679725c2p
-1, 0x1.377266dec1800p
-2},
3235 {0x1.77907f2170657p
-1, 0x1.3d54ffbaf3000p
-2},
3236 {0x1.756cadbd6130cp
-1, 0x1.432eee32fe000p
-2}
3238 static const struct {
3241 {0x1.61000014fb66bp
-1, 0x1.e026c91425b3cp
-56},
3242 {0x1.63000034db495p
-1, 0x1.dbfea48005d41p
-55},
3243 {0x1.650000d94d478p
-1, 0x1.e7fa786d6a5b7p
-55},
3244 {0x1.67000074e6fadp
-1, 0x1.1fcea6b54254cp
-57},
3245 {0x1.68ffffedf0faep
-1, -0x1.c7e274c590efdp
-56},
3246 {0x1.6b0000763c5bcp
-1, -0x1.ac16848dcda01p
-55},
3247 {0x1.6d0001e5cc1f6p
-1, 0x1.33f1c9d499311p
-55},
3248 {0x1.6efffeb05f63ep
-1, -0x1.e80041ae22d53p
-56},
3249 {0x1.710000e86978p
-1, 0x1.bff6671097952p
-56},
3250 {0x1.72ffffc67e912p
-1, 0x1.c00e226bd8724p
-55},
3251 {0x1.74fffdf81116ap
-1, -0x1.e02916ef101d2p
-57},
3252 {0x1.770000f679c9p
-1, -0x1.7fc71cd549c74p
-57},
3253 {0x1.78ffffa7ec835p
-1, 0x1.1bec19ef50483p
-55},
3254 {0x1.7affffe20c2e6p
-1, -0x1.07e1729cc6465p
-56},
3255 {0x1.7cfffed3fc9p
-1, -0x1.08072087b8b1cp
-55},
3256 {0x1.7efffe9261a76p
-1, 0x1.dc0286d9df9aep
-55},
3257 {0x1.81000049ca3e8p
-1, 0x1.97fd251e54c33p
-55},
3258 {0x1.8300017932c8fp
-1, -0x1.afee9b630f381p
-55},
3259 {0x1.850000633739cp
-1, 0x1.9bfbf6b6535bcp
-55},
3260 {0x1.87000204289c6p
-1, -0x1.bbf65f3117b75p
-55},
3261 {0x1.88fffebf57904p
-1, -0x1.9006ea23dcb57p
-55},
3262 {0x1.8b00022bc04dfp
-1, -0x1.d00df38e04b0ap
-56},
3263 {0x1.8cfffe50c1b8ap
-1, -0x1.8007146ff9f05p
-55},
3264 {0x1.8effffc918e43p
-1, 0x1.3817bd07a7038p
-55},
3265 {0x1.910001efa5fc7p
-1, 0x1.93e9176dfb403p
-55},
3266 {0x1.9300013467bb9p
-1, 0x1.f804e4b980276p
-56},
3267 {0x1.94fffe6ee076fp
-1, -0x1.f7ef0d9ff622ep
-55},
3268 {0x1.96fffde3c12d1p
-1, -0x1.082aa962638bap
-56},
3269 {0x1.98ffff4458a0dp
-1, -0x1.7801b9164a8efp
-55},
3270 {0x1.9afffdd982e3ep
-1, -0x1.740e08a5a9337p
-55},
3271 {0x1.9cfffed49fb66p
-1, 0x1.fce08c19bep
-60},
3272 {0x1.9f00020f19c51p
-1, -0x1.a3faa27885b0ap
-55},
3273 {0x1.a10001145b006p
-1, 0x1.4ff489958da56p
-56},
3274 {0x1.a300007bbf6fap
-1, 0x1.cbeab8a2b6d18p
-55},
3275 {0x1.a500010971d79p
-1, 0x1.8fecadd78793p
-55},
3276 {0x1.a70001df52e48p
-1, -0x1.f41763dd8abdbp
-55},
3277 {0x1.a90001c593352p
-1, -0x1.ebf0284c27612p
-55},
3278 {0x1.ab0002a4f3e4bp
-1, -0x1.9fd043cff3f5fp
-57},
3279 {0x1.acfffd7ae1ed1p
-1, -0x1.23ee7129070b4p
-55},
3280 {0x1.aefffee510478p
-1, 0x1.a063ee00edea3p
-57},
3281 {0x1.b0fffdb650d5bp
-1, 0x1.a06c8381f0ab9p
-58},
3282 {0x1.b2ffffeaaca57p
-1, -0x1.9011e74233c1dp
-56},
3283 {0x1.b4fffd995badcp
-1, -0x1.9ff1068862a9fp
-56},
3284 {0x1.b7000249e659cp
-1, 0x1.aff45d0864f3ep
-55},
3285 {0x1.b8ffff987164p
-1, 0x1.cfe7796c2c3f9p
-56},
3286 {0x1.bafffd204cb4fp
-1, -0x1.3ff27eef22bc4p
-57},
3287 {0x1.bcfffd2415c45p
-1, -0x1.cffb7ee3bea21p
-57},
3288 {0x1.beffff86309dfp
-1, -0x1.14103972e0b5cp
-55},
3289 {0x1.c0fffe1b57653p
-1, 0x1.bc16494b76a19p
-55},
3290 {0x1.c2ffff1fa57e3p
-1, -0x1.4feef8d30c6edp
-57},
3291 {0x1.c4fffdcbfe424p
-1, -0x1.43f68bcec4775p
-55},
3292 {0x1.c6fffed54b9f7p
-1, 0x1.47ea3f053e0ecp
-55},
3293 {0x1.c8fffeb998fd5p
-1, 0x1.383068df992f1p
-56},
3294 {0x1.cb0002125219ap
-1, -0x1.8fd8e64180e04p
-57},
3295 {0x1.ccfffdd94469cp
-1, 0x1.e7ebe1cc7ea72p
-55},
3296 {0x1.cefffeafdc476p
-1, 0x1.ebe39ad9f88fep
-55},
3297 {0x1.d1000169af82bp
-1, 0x1.57d91a8b95a71p
-56},
3298 {0x1.d30000d0ff71dp
-1, 0x1.9c1906970c7dap
-55},
3299 {0x1.d4fffea790fc4p
-1, -0x1.80e37c558fe0cp
-58},
3300 {0x1.d70002edc87e5p
-1, -0x1.f80d64dc10f44p
-56},
3301 {0x1.d900021dc82aap
-1, -0x1.47c8f94fd5c5cp
-56},
3302 {0x1.dafffd86b0283p
-1, 0x1.c7f1dc521617ep
-55},
3303 {0x1.dd000296c4739p
-1, 0x1.8019eb2ffb153p
-55},
3304 {0x1.defffe54490f5p
-1, 0x1.e00d2c652cc89p
-57},
3305 {0x1.e0fffcdabf694p
-1, -0x1.f8340202d69d2p
-56},
3306 {0x1.e2fffdb52c8ddp
-1, 0x1.b00c1ca1b0864p
-56},
3307 {0x1.e4ffff24216efp
-1, 0x1.2ffa8b094ab51p
-56},
3308 {0x1.e6fffe88a5e11p
-1, -0x1.7f673b1efbe59p
-58},
3309 {0x1.e9000119eff0dp
-1, -0x1.4808d5e0bc801p
-55},
3310 {0x1.eafffdfa51744p
-1, 0x1.80006d54320b5p
-56},
3311 {0x1.ed0001a127fa1p
-1, -0x1.002f860565c92p
-58},
3312 {0x1.ef00007babcc4p
-1, -0x1.540445d35e611p
-55},
3313 {0x1.f0ffff57a8d02p
-1, -0x1.ffb3139ef9105p
-59},
3314 {0x1.f30001ee58ac7p
-1, 0x1.a81acf2731155p
-55},
3315 {0x1.f4ffff5823494p
-1, 0x1.a3f41d4d7c743p
-55},
3316 {0x1.f6ffffca94c6bp
-1, -0x1.202f41c987875p
-57},
3317 {0x1.f8fffe1f9c441p
-1, 0x1.77dd1f477e74bp
-56},
3318 {0x1.fafffd2e0e37ep
-1, -0x1.f01199a7ca331p
-57},
3319 {0x1.fd0001c77e49ep
-1, 0x1.181ee4bceacb1p
-56},
3320 {0x1.feffff7e0c331p
-1, -0x1.e05370170875ap
-57},
3321 {0x1.00ffff465606ep
+0, -0x1.a7ead491c0adap
-55},
3322 {0x1.02ffff3867a58p
+0, -0x1.77f69c3fcb2ep
-54},
3323 {0x1.04ffffdfc0d17p
+0, 0x1.7bffe34cb945bp
-54},
3324 {0x1.0700003cd4d82p
+0, 0x1.20083c0e456cbp
-55},
3325 {0x1.08ffff9f2cbe8p
+0, -0x1.dffdfbe37751ap
-57},
3326 {0x1.0b000010cda65p
+0, -0x1.13f7faee626ebp
-54},
3327 {0x1.0d00001a4d338p
+0, 0x1.07dfa79489ff7p
-55},
3328 {0x1.0effffadafdfdp
+0, -0x1.7040570d66bcp
-56},
3329 {0x1.110000bbafd96p
+0, 0x1.e80d4846d0b62p
-55},
3330 {0x1.12ffffae5f45dp
+0, 0x1.dbffa64fd36efp
-54},
3331 {0x1.150000dd59ad9p
+0, 0x1.a0077701250aep
-54},
3332 {0x1.170000f21559ap
+0, 0x1.dfdf9e2e3deeep
-55},
3333 {0x1.18ffffc275426p
+0, 0x1.10030dc3b7273p
-54},
3334 {0x1.1b000123d3c59p
+0, 0x1.97f7980030188p
-54},
3335 {0x1.1cffff8299eb7p
+0, -0x1.5f932ab9f8c67p
-57},
3336 {0x1.1effff48ad4p
+0, 0x1.37fbf9da75bebp
-54},
3337 {0x1.210000c8b86a4p
+0, 0x1.f806b91fd5b22p
-54},
3338 {0x1.2300003854303p
+0, 0x1.3ffc2eb9fbf33p
-54},
3339 {0x1.24fffffbcf684p
+0, 0x1.601e77e2e2e72p
-56},
3340 {0x1.26ffff52921d9p
+0, 0x1.ffcbb767f0c61p
-56},
3341 {0x1.2900014933a3cp
+0, -0x1.202ca3c02412bp
-56},
3342 {0x1.2b00014556313p
+0, -0x1.2808233f21f02p
-54},
3343 {0x1.2cfffebfe523bp
+0, -0x1.8ff7e384fdcf2p
-55},
3344 {0x1.2f0000bb8ad96p
+0, -0x1.5ff51503041c5p
-55},
3345 {0x1.30ffffb7ae2afp
+0, -0x1.10071885e289dp
-55},
3346 {0x1.32ffffeac5f7fp
+0, -0x1.1ff5d3fb7b715p
-54},
3347 {0x1.350000ca66756p
+0, 0x1.57f82228b82bdp
-54},
3348 {0x1.3700011fbf721p
+0, 0x1.000bac40dd5ccp
-55},
3349 {0x1.38ffff9592fb9p
+0, -0x1.43f9d2db2a751p
-54},
3350 {0x1.3b00004ddd242p
+0, 0x1.57f6b707638e1p
-55},
3351 {0x1.3cffff5b2c957p
+0, 0x1.a023a10bf1231p
-56},
3352 {0x1.3efffeab0b418p
+0, 0x1.87f6d66b152bp
-54},
3353 {0x1.410001532aff4p
+0, 0x1.7f8375f198524p
-57},
3354 {0x1.4300017478b29p
+0, 0x1.301e672dc5143p
-55},
3355 {0x1.44fffe795b463p
+0, 0x1.9ff69b8b2895ap
-55},
3356 {0x1.46fffe80475ep
+0, -0x1.5c0b19bc2f254p
-54},
3357 {0x1.48fffef6fc1e7p
+0, 0x1.b4009f23a2a72p
-54},
3358 {0x1.4afffe5bea704p
+0, -0x1.4ffb7bf0d7d45p
-54},
3359 {0x1.4d000171027dep
+0, -0x1.9c06471dc6a3dp
-54},
3360 {0x1.4f0000ff03ee2p
+0, 0x1.77f890b85531cp
-54},
3361 {0x1.5100012dc4bd1p
+0, 0x1.004657166a436p
-57},
3362 {0x1.530001605277ap
+0, -0x1.6bfcece233209p
-54},
3363 {0x1.54fffecdb704cp
+0, -0x1.902720505a1d7p
-55},
3364 {0x1.56fffef5f54a9p
+0, 0x1.bbfe60ec96412p
-54},
3365 {0x1.5900017e61012p
+0, 0x1.87ec581afef9p
-55},
3366 {0x1.5b00003c93e92p
+0, -0x1.f41080abf0ccp
-54},
3367 {0x1.5d0001d4919bcp
+0, -0x1.8812afb254729p
-54},
3368 {0x1.5efffe7b87a89p
+0, -0x1.47eb780ed6904p
-54}
3371 double w
, z
, r
, r2
, r3
, y
, invc
, logc
, kd
, hi
, lo
;
3378 if (ix
- 0x3fee000000000000ULL
< 0x3090000000000ULL
) {
3381 /* Handle close to 1.0 inputs separately. */
3382 /* Fix sign of zero with downward rounding when x==1. */
3383 if (ix
== 0x3ff0000000000000ULL
)
3388 y
= r3
* (B
[1] + r
* B
[2] + r2
* B
[3] + r3
* (B
[4] + r
* B
[5] + r2
* B
[6] +
3389 r3
* (B
[7] + r
* B
[8] + r2
* B
[9] + r3
* B
[10])));
3390 /* Worst-case error is around 0.507 ULP. */
3394 w
= rhi
* rhi
* B
[0]; /* B[0] == -0.5. */
3397 lo
+= B
[0] * rlo
* (rhi
+ r
);
3402 if (top
- 0x0010 >= 0x7ff0 - 0x0010) {
3403 /* x < 0x1p-1022 or inf or nan. */
3405 return math_error(_SING
, "log", x
, 0, (top
& 0x8000 ? 1.0 : -1.0) / x
);
3406 if (ix
== 0x7ff0000000000000ULL
) /* log(inf) == inf. */
3408 if ((top
& 0x7ff0) == 0x7ff0 && (ix
& 0xfffffffffffffULL
))
3411 return math_error(_DOMAIN
, "log", x
, 0, (x
- x
) / (x
- x
));
3412 /* x is subnormal, normalize it. */
3418 /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
3419 The range is split into N subintervals.
3420 The ith subinterval contains z and c is near its center. */
3421 tmp
= ix
- 0x3fe6000000000000ULL
;
3422 i
= (tmp
>> (52 - 7)) % (1 << 7);
3423 k
= (INT64
)tmp
>> 52; /* arithmetic shift */
3424 iz
= ix
- (tmp
& 0xfffULL
<< 52);
3429 /* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */
3430 /* r ~= z/c - 1, |r| < 1/(2*N). */
3431 r
= (z
- T2
[i
].chi
- T2
[i
].clo
) * invc
;
3434 /* hi + lo = r + log(c) + k*Ln2. */
3435 w
= kd
* Ln2hi
+ logc
;
3437 lo
= w
- hi
+ r
+ kd
* Ln2lo
;
3439 /* log(x) = lo + (log1p(r) - r) + hi. */
3440 r2
= r
* r
; /* rounding error: 0x1p-54/N^2. */
3441 /* Worst case error if |y| > 0x1p-5:
3442 0.5 + 4.13/N + abs-poly-error*2^57 ULP (+ 0.002 ULP without fma)
3443 Worst case error if |y| > 0x1p-4:
3444 0.5 + 2.06/N + abs-poly-error*2^56 ULP (+ 0.001 ULP without fma). */
3445 y
= lo
+ r2
* A
[0] +
3446 r
* r2
* (A
[1] + r
* A
[2] + r2
* (A
[3] + r
* A
[4])) + hi
;
3450 /*********************************************************************
3453 double CDECL
log10( double x
)
3455 static const double ivln10hi
= 4.34294481878168880939e-01,
3456 ivln10lo
= 2.50829467116452752298e-11,
3457 log10_2hi
= 3.01029995663611771306e-01,
3458 log10_2lo
= 3.69423907715893078616e-13,
3459 Lg1
= 6.666666666666735130e-01,
3460 Lg2
= 3.999999999940941908e-01,
3461 Lg3
= 2.857142874366239149e-01,
3462 Lg4
= 2.222219843214978396e-01,
3463 Lg5
= 1.818357216161805012e-01,
3464 Lg6
= 1.531383769920937332e-01,
3465 Lg7
= 1.479819860511658591e-01;
3467 union {double f
; UINT64 i
;} u
= {x
};
3468 double hfsq
, f
, s
, z
, R
, w
, t1
, t2
, dk
, y
, hi
, lo
, val_hi
, val_lo
;
3474 if (hx
< 0x00100000 || hx
>> 31) {
3476 return math_error(_SING
, "log10", x
, 0, -1 / (x
* x
));
3477 if ((u
.i
& ~(1ULL << 63)) > 0x7ff0000000000000ULL
)
3480 return math_error(_DOMAIN
, "log10", x
, 0, (x
- x
) / (x
- x
));
3481 /* subnormal number, scale x up */
3486 } else if (hx
>= 0x7ff00000) {
3488 } else if (hx
== 0x3ff00000 && u
.i
<<32 == 0)
3491 /* reduce x into [sqrt(2)/2, sqrt(2)] */
3492 hx
+= 0x3ff00000 - 0x3fe6a09e;
3493 k
+= (int)(hx
>> 20) - 0x3ff;
3494 hx
= (hx
& 0x000fffff) + 0x3fe6a09e;
3495 u
.i
= (UINT64
)hx
<< 32 | (u
.i
& 0xffffffff);
3503 t1
= w
* (Lg2
+ w
* (Lg4
+ w
* Lg6
));
3504 t2
= z
* (Lg1
+ w
* (Lg3
+ w
* (Lg5
+ w
* Lg7
)));
3507 /* hi+lo = f - hfsq + s*(hfsq+R) ~ log(1+f) */
3510 u
.i
&= (UINT64
)-1 << 32;
3512 lo
= f
- hi
- hfsq
+ s
* (hfsq
+ R
);
3514 /* val_hi+val_lo ~ log10(1+f) + k*log10(2) */
3515 val_hi
= hi
* ivln10hi
;
3518 val_lo
= dk
* log10_2lo
+ (lo
+ hi
) * ivln10lo
+ lo
* ivln10hi
;
3521 * Extra precision in for adding y is not strictly needed
3522 * since there is no very large cancellation near x = sqrt(2) or
3523 * x = 1/sqrt(2), but we do it anyway since it costs little on CPUs
3524 * with some parallelism and it reduces the error for many args.
3527 val_lo
+= (y
- w
) + val_hi
;
3530 return val_lo
+ val_hi
;
3533 /* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about
3534 additional 15 bits precision. IX is the bit representation of x, but
3535 normalized in the subnormal range using the sign bit for the exponent. */
3536 static double pow_log(UINT64 ix
, double *tail
)
3538 static const struct {
3539 double invc
, logc
, logctail
;
3541 {0x1.6a00000000000p
+0, -0x1.62c82f2b9c800p
-2, 0x1.ab42428375680p
-48},
3542 {0x1.6800000000000p
+0, -0x1.5d1bdbf580800p
-2, -0x1.ca508d8e0f720p
-46},
3543 {0x1.6600000000000p
+0, -0x1.5767717455800p
-2, -0x1.362a4d5b6506dp
-45},
3544 {0x1.6400000000000p
+0, -0x1.51aad872df800p
-2, -0x1.684e49eb067d5p
-49},
3545 {0x1.6200000000000p
+0, -0x1.4be5f95777800p
-2, -0x1.41b6993293ee0p
-47},
3546 {0x1.6000000000000p
+0, -0x1.4618bc21c6000p
-2, 0x1.3d82f484c84ccp
-46},
3547 {0x1.5e00000000000p
+0, -0x1.404308686a800p
-2, 0x1.c42f3ed820b3ap
-50},
3548 {0x1.5c00000000000p
+0, -0x1.3a64c55694800p
-2, 0x1.0b1c686519460p
-45},
3549 {0x1.5a00000000000p
+0, -0x1.347dd9a988000p
-2, 0x1.5594dd4c58092p
-45},
3550 {0x1.5800000000000p
+0, -0x1.2e8e2bae12000p
-2, 0x1.67b1e99b72bd8p
-45},
3551 {0x1.5600000000000p
+0, -0x1.2895a13de8800p
-2, 0x1.5ca14b6cfb03fp
-46},
3552 {0x1.5600000000000p
+0, -0x1.2895a13de8800p
-2, 0x1.5ca14b6cfb03fp
-46},
3553 {0x1.5400000000000p
+0, -0x1.22941fbcf7800p
-2, -0x1.65a242853da76p
-46},
3554 {0x1.5200000000000p
+0, -0x1.1c898c1699800p
-2, -0x1.fafbc68e75404p
-46},
3555 {0x1.5000000000000p
+0, -0x1.1675cababa800p
-2, 0x1.f1fc63382a8f0p
-46},
3556 {0x1.4e00000000000p
+0, -0x1.1058bf9ae4800p
-2, -0x1.6a8c4fd055a66p
-45},
3557 {0x1.4c00000000000p
+0, -0x1.0a324e2739000p
-2, -0x1.c6bee7ef4030ep
-47},
3558 {0x1.4a00000000000p
+0, -0x1.0402594b4d000p
-2, -0x1.036b89ef42d7fp
-48},
3559 {0x1.4a00000000000p
+0, -0x1.0402594b4d000p
-2, -0x1.036b89ef42d7fp
-48},
3560 {0x1.4800000000000p
+0, -0x1.fb9186d5e4000p
-3, 0x1.d572aab993c87p
-47},
3561 {0x1.4600000000000p
+0, -0x1.ef0adcbdc6000p
-3, 0x1.b26b79c86af24p
-45},
3562 {0x1.4400000000000p
+0, -0x1.e27076e2af000p
-3, -0x1.72f4f543fff10p
-46},
3563 {0x1.4200000000000p
+0, -0x1.d5c216b4fc000p
-3, 0x1.1ba91bbca681bp
-45},
3564 {0x1.4000000000000p
+0, -0x1.c8ff7c79aa000p
-3, 0x1.7794f689f8434p
-45},
3565 {0x1.4000000000000p
+0, -0x1.c8ff7c79aa000p
-3, 0x1.7794f689f8434p
-45},
3566 {0x1.3e00000000000p
+0, -0x1.bc286742d9000p
-3, 0x1.94eb0318bb78fp
-46},
3567 {0x1.3c00000000000p
+0, -0x1.af3c94e80c000p
-3, 0x1.a4e633fcd9066p
-52},
3568 {0x1.3a00000000000p
+0, -0x1.a23bc1fe2b000p
-3, -0x1.58c64dc46c1eap
-45},
3569 {0x1.3a00000000000p
+0, -0x1.a23bc1fe2b000p
-3, -0x1.58c64dc46c1eap
-45},
3570 {0x1.3800000000000p
+0, -0x1.9525a9cf45000p
-3, -0x1.ad1d904c1d4e3p
-45},
3571 {0x1.3600000000000p
+0, -0x1.87fa06520d000p
-3, 0x1.bbdbf7fdbfa09p
-45},
3572 {0x1.3400000000000p
+0, -0x1.7ab890210e000p
-3, 0x1.bdb9072534a58p
-45},
3573 {0x1.3400000000000p
+0, -0x1.7ab890210e000p
-3, 0x1.bdb9072534a58p
-45},
3574 {0x1.3200000000000p
+0, -0x1.6d60fe719d000p
-3, -0x1.0e46aa3b2e266p
-46},
3575 {0x1.3000000000000p
+0, -0x1.5ff3070a79000p
-3, -0x1.e9e439f105039p
-46},
3576 {0x1.3000000000000p
+0, -0x1.5ff3070a79000p
-3, -0x1.e9e439f105039p
-46},
3577 {0x1.2e00000000000p
+0, -0x1.526e5e3a1b000p
-3, -0x1.0de8b90075b8fp
-45},
3578 {0x1.2c00000000000p
+0, -0x1.44d2b6ccb8000p
-3, 0x1.70cc16135783cp
-46},
3579 {0x1.2c00000000000p
+0, -0x1.44d2b6ccb8000p
-3, 0x1.70cc16135783cp
-46},
3580 {0x1.2a00000000000p
+0, -0x1.371fc201e9000p
-3, 0x1.178864d27543ap
-48},
3581 {0x1.2800000000000p
+0, -0x1.29552f81ff000p
-3, -0x1.48d301771c408p
-45},
3582 {0x1.2600000000000p
+0, -0x1.1b72ad52f6000p
-3, -0x1.e80a41811a396p
-45},
3583 {0x1.2600000000000p
+0, -0x1.1b72ad52f6000p
-3, -0x1.e80a41811a396p
-45},
3584 {0x1.2400000000000p
+0, -0x1.0d77e7cd09000p
-3, 0x1.a699688e85bf4p
-47},
3585 {0x1.2400000000000p
+0, -0x1.0d77e7cd09000p
-3, 0x1.a699688e85bf4p
-47},
3586 {0x1.2200000000000p
+0, -0x1.fec9131dbe000p
-4, -0x1.575545ca333f2p
-45},
3587 {0x1.2000000000000p
+0, -0x1.e27076e2b0000p
-4, 0x1.a342c2af0003cp
-45},
3588 {0x1.2000000000000p
+0, -0x1.e27076e2b0000p
-4, 0x1.a342c2af0003cp
-45},
3589 {0x1.1e00000000000p
+0, -0x1.c5e548f5bc000p
-4, -0x1.d0c57585fbe06p
-46},
3590 {0x1.1c00000000000p
+0, -0x1.a926d3a4ae000p
-4, 0x1.53935e85baac8p
-45},
3591 {0x1.1c00000000000p
+0, -0x1.a926d3a4ae000p
-4, 0x1.53935e85baac8p
-45},
3592 {0x1.1a00000000000p
+0, -0x1.8c345d631a000p
-4, 0x1.37c294d2f5668p
-46},
3593 {0x1.1a00000000000p
+0, -0x1.8c345d631a000p
-4, 0x1.37c294d2f5668p
-46},
3594 {0x1.1800000000000p
+0, -0x1.6f0d28ae56000p
-4, -0x1.69737c93373dap
-45},
3595 {0x1.1600000000000p
+0, -0x1.51b073f062000p
-4, 0x1.f025b61c65e57p
-46},
3596 {0x1.1600000000000p
+0, -0x1.51b073f062000p
-4, 0x1.f025b61c65e57p
-46},
3597 {0x1.1400000000000p
+0, -0x1.341d7961be000p
-4, 0x1.c5edaccf913dfp
-45},
3598 {0x1.1400000000000p
+0, -0x1.341d7961be000p
-4, 0x1.c5edaccf913dfp
-45},
3599 {0x1.1200000000000p
+0, -0x1.16536eea38000p
-4, 0x1.47c5e768fa309p
-46},
3600 {0x1.1000000000000p
+0, -0x1.f0a30c0118000p
-5, 0x1.d599e83368e91p
-45},
3601 {0x1.1000000000000p
+0, -0x1.f0a30c0118000p
-5, 0x1.d599e83368e91p
-45},
3602 {0x1.0e00000000000p
+0, -0x1.b42dd71198000p
-5, 0x1.c827ae5d6704cp
-46},
3603 {0x1.0e00000000000p
+0, -0x1.b42dd71198000p
-5, 0x1.c827ae5d6704cp
-46},
3604 {0x1.0c00000000000p
+0, -0x1.77458f632c000p
-5, -0x1.cfc4634f2a1eep
-45},
3605 {0x1.0c00000000000p
+0, -0x1.77458f632c000p
-5, -0x1.cfc4634f2a1eep
-45},
3606 {0x1.0a00000000000p
+0, -0x1.39e87b9fec000p
-5, 0x1.502b7f526feaap
-48},
3607 {0x1.0a00000000000p
+0, -0x1.39e87b9fec000p
-5, 0x1.502b7f526feaap
-48},
3608 {0x1.0800000000000p
+0, -0x1.f829b0e780000p
-6, -0x1.980267c7e09e4p
-45},
3609 {0x1.0800000000000p
+0, -0x1.f829b0e780000p
-6, -0x1.980267c7e09e4p
-45},
3610 {0x1.0600000000000p
+0, -0x1.7b91b07d58000p
-6, -0x1.88d5493faa639p
-45},
3611 {0x1.0400000000000p
+0, -0x1.fc0a8b0fc0000p
-7, -0x1.f1e7cf6d3a69cp
-50},
3612 {0x1.0400000000000p
+0, -0x1.fc0a8b0fc0000p
-7, -0x1.f1e7cf6d3a69cp
-50},
3613 {0x1.0200000000000p
+0, -0x1.fe02a6b100000p
-8, -0x1.9e23f0dda40e4p
-46},
3614 {0x1.0200000000000p
+0, -0x1.fe02a6b100000p
-8, -0x1.9e23f0dda40e4p
-46},
3615 {0x1.0000000000000p
+0, 0x0.0000000000000p
+0, 0x0.0000000000000p
+0},
3616 {0x1.0000000000000p
+0, 0x0.0000000000000p
+0, 0x0.0000000000000p
+0},
3617 {0x1.fc00000000000p
-1, 0x1.0101575890000p
-7, -0x1.0c76b999d2be8p
-46},
3618 {0x1.f800000000000p
-1, 0x1.0205658938000p
-6, -0x1.3dc5b06e2f7d2p
-45},
3619 {0x1.f400000000000p
-1, 0x1.8492528c90000p
-6, -0x1.aa0ba325a0c34p
-45},
3620 {0x1.f000000000000p
-1, 0x1.0415d89e74000p
-5, 0x1.111c05cf1d753p
-47},
3621 {0x1.ec00000000000p
-1, 0x1.466aed42e0000p
-5, -0x1.c167375bdfd28p
-45},
3622 {0x1.e800000000000p
-1, 0x1.894aa149fc000p
-5, -0x1.97995d05a267dp
-46},
3623 {0x1.e400000000000p
-1, 0x1.ccb73cdddc000p
-5, -0x1.a68f247d82807p
-46},
3624 {0x1.e200000000000p
-1, 0x1.eea31c006c000p
-5, -0x1.e113e4fc93b7bp
-47},
3625 {0x1.de00000000000p
-1, 0x1.1973bd1466000p
-4, -0x1.5325d560d9e9bp
-45},
3626 {0x1.da00000000000p
-1, 0x1.3bdf5a7d1e000p
-4, 0x1.cc85ea5db4ed7p
-45},
3627 {0x1.d600000000000p
-1, 0x1.5e95a4d97a000p
-4, -0x1.c69063c5d1d1ep
-45},
3628 {0x1.d400000000000p
-1, 0x1.700d30aeac000p
-4, 0x1.c1e8da99ded32p
-49},
3629 {0x1.d000000000000p
-1, 0x1.9335e5d594000p
-4, 0x1.3115c3abd47dap
-45},
3630 {0x1.cc00000000000p
-1, 0x1.b6ac88dad6000p
-4, -0x1.390802bf768e5p
-46},
3631 {0x1.ca00000000000p
-1, 0x1.c885801bc4000p
-4, 0x1.646d1c65aacd3p
-45},
3632 {0x1.c600000000000p
-1, 0x1.ec739830a2000p
-4, -0x1.dc068afe645e0p
-45},
3633 {0x1.c400000000000p
-1, 0x1.fe89139dbe000p
-4, -0x1.534d64fa10afdp
-45},
3634 {0x1.c000000000000p
-1, 0x1.1178e8227e000p
-3, 0x1.1ef78ce2d07f2p
-45},
3635 {0x1.be00000000000p
-1, 0x1.1aa2b7e23f000p
-3, 0x1.ca78e44389934p
-45},
3636 {0x1.ba00000000000p
-1, 0x1.2d1610c868000p
-3, 0x1.39d6ccb81b4a1p
-47},
3637 {0x1.b800000000000p
-1, 0x1.365fcb0159000p
-3, 0x1.62fa8234b7289p
-51},
3638 {0x1.b400000000000p
-1, 0x1.4913d8333b000p
-3, 0x1.5837954fdb678p
-45},
3639 {0x1.b200000000000p
-1, 0x1.527e5e4a1b000p
-3, 0x1.633e8e5697dc7p
-45},
3640 {0x1.ae00000000000p
-1, 0x1.6574ebe8c1000p
-3, 0x1.9cf8b2c3c2e78p
-46},
3641 {0x1.ac00000000000p
-1, 0x1.6f0128b757000p
-3, -0x1.5118de59c21e1p
-45},
3642 {0x1.aa00000000000p
-1, 0x1.7898d85445000p
-3, -0x1.c661070914305p
-46},
3643 {0x1.a600000000000p
-1, 0x1.8beafeb390000p
-3, -0x1.73d54aae92cd1p
-47},
3644 {0x1.a400000000000p
-1, 0x1.95a5adcf70000p
-3, 0x1.7f22858a0ff6fp
-47},
3645 {0x1.a000000000000p
-1, 0x1.a93ed3c8ae000p
-3, -0x1.8724350562169p
-45},
3646 {0x1.9e00000000000p
-1, 0x1.b31d8575bd000p
-3, -0x1.c358d4eace1aap
-47},
3647 {0x1.9c00000000000p
-1, 0x1.bd087383be000p
-3, -0x1.d4bc4595412b6p
-45},
3648 {0x1.9a00000000000p
-1, 0x1.c6ffbc6f01000p
-3, -0x1.1ec72c5962bd2p
-48},
3649 {0x1.9600000000000p
-1, 0x1.db13db0d49000p
-3, -0x1.aff2af715b035p
-45},
3650 {0x1.9400000000000p
-1, 0x1.e530effe71000p
-3, 0x1.212276041f430p
-51},
3651 {0x1.9200000000000p
-1, 0x1.ef5ade4dd0000p
-3, -0x1.a211565bb8e11p
-51},
3652 {0x1.9000000000000p
-1, 0x1.f991c6cb3b000p
-3, 0x1.bcbecca0cdf30p
-46},
3653 {0x1.8c00000000000p
-1, 0x1.07138604d5800p
-2, 0x1.89cdb16ed4e91p
-48},
3654 {0x1.8a00000000000p
-1, 0x1.0c42d67616000p
-2, 0x1.7188b163ceae9p
-45},
3655 {0x1.8800000000000p
-1, 0x1.1178e8227e800p
-2, -0x1.c210e63a5f01cp
-45},
3656 {0x1.8600000000000p
-1, 0x1.16b5ccbacf800p
-2, 0x1.b9acdf7a51681p
-45},
3657 {0x1.8400000000000p
-1, 0x1.1bf99635a6800p
-2, 0x1.ca6ed5147bdb7p
-45},
3658 {0x1.8200000000000p
-1, 0x1.214456d0eb800p
-2, 0x1.a87deba46baeap
-47},
3659 {0x1.7e00000000000p
-1, 0x1.2bef07cdc9000p
-2, 0x1.a9cfa4a5004f4p
-45},
3660 {0x1.7c00000000000p
-1, 0x1.314f1e1d36000p
-2, -0x1.8e27ad3213cb8p
-45},
3661 {0x1.7a00000000000p
-1, 0x1.36b6776be1000p
-2, 0x1.16ecdb0f177c8p
-46},
3662 {0x1.7800000000000p
-1, 0x1.3c25277333000p
-2, 0x1.83b54b606bd5cp
-46},
3663 {0x1.7600000000000p
-1, 0x1.419b423d5e800p
-2, 0x1.8e436ec90e09dp
-47},
3664 {0x1.7400000000000p
-1, 0x1.4718dc271c800p
-2, -0x1.f27ce0967d675p
-45},
3665 {0x1.7200000000000p
-1, 0x1.4c9e09e173000p
-2, -0x1.e20891b0ad8a4p
-45},
3666 {0x1.7000000000000p
-1, 0x1.522ae0738a000p
-2, 0x1.ebe708164c759p
-45},
3667 {0x1.6e00000000000p
-1, 0x1.57bf753c8d000p
-2, 0x1.fadedee5d40efp
-46},
3668 {0x1.6c00000000000p
-1, 0x1.5d5bddf596000p
-2, -0x1.a0b2a08a465dcp
-47},
3670 static const double A
[] = {
3672 0x1.555555555556p
-2 * -2,
3673 -0x1.0000000000006p
-2 * -2,
3674 0x1.999999959554ep
-3 * 4,
3675 -0x1.555555529a47ap
-3 * 4,
3676 0x1.2495b9b4845e9p
-3 * -8,
3677 -0x1.0002b8b263fc3p
-3 * -8
3679 static const double ln2hi
= 0x1.62e42fefa3800p
-1,
3680 ln2lo
= 0x1.ef35793c76730p
-45;
3682 double z
, r
, y
, invc
, logc
, logctail
, kd
, hi
, t1
, t2
, lo
, lo1
, lo2
, p
;
3683 double zhi
, zlo
, rhi
, rlo
, ar
, ar2
, ar3
, lo3
, lo4
, arhi
, arhi2
;
3687 /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
3688 The range is split into N subintervals.
3689 The ith subinterval contains z and c is near its center. */
3690 tmp
= ix
- 0x3fe6955500000000ULL
;
3691 i
= (tmp
>> (52 - 7)) % (1 << 7);
3692 k
= (INT64
)tmp
>> 52; /* arithmetic shift */
3693 iz
= ix
- (tmp
& 0xfffULL
<< 52);
3697 /* log(x) = k*Ln2 + log(c) + log1p(z/c-1). */
3700 logctail
= T
[i
].logctail
;
3702 /* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and
3703 |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible. */
3704 /* Split z such that rhi, rlo and rhi*rhi are exact and |rlo| <= |r|. */
3705 iz
= (iz
+ (1ULL << 31)) & (-1ULL << 32);
3706 zhi
= *(double*)&iz
;
3708 rhi
= zhi
* invc
- 1.0;
3712 /* k*Ln2 + log(c) + r. */
3713 t1
= kd
* ln2hi
+ logc
;
3715 lo1
= kd
* ln2lo
+ logctail
;
3718 /* Evaluation is optimized assuming superscalar pipelined execution. */
3719 ar
= A
[0] * r
; /* A[0] = -0.5. */
3722 /* k*Ln2 + log(c) + r + A[0]*r*r. */
3726 lo3
= rlo
* (ar
+ arhi
);
3727 lo4
= t2
- hi
+ arhi2
;
3728 /* p = log1p(r) - r - A[0]*r*r. */
3729 p
= (ar3
* (A
[1] + r
* A
[2] + ar2
* (A
[3] + r
* A
[4] + ar2
* (A
[5] + r
* A
[6]))));
3730 lo
= lo1
+ lo2
+ lo3
+ lo4
+ p
;
3732 *tail
= hi
- y
+ lo
;
3736 /* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
3737 The sign_bias argument is SIGN_BIAS or 0 and sets the sign to -1 or 1. */
3738 static double pow_exp(double argx
, double argy
, double x
, double xtail
, UINT32 sign_bias
)
3740 static const double C
[] = {
3741 0x1.ffffffffffdbdp
-2,
3742 0x1.555555555543cp
-3,
3743 0x1.55555cf172b91p
-5,
3744 0x1.1111167a4d017p
-7
3746 static const double invln2N
= 0x1.71547652b82fep0
* (1 << 7),
3747 negln2hiN
= -0x1.62e42fefa0000p
-8,
3748 negln2loN
= -0x1.cf79abc9e3b3ap
-47;
3751 UINT64 ki
, idx
, top
, sbits
;
3752 double kd
, z
, r
, r2
, scale
, tail
, tmp
;
3754 abstop
= (*(UINT64
*)&x
>> 52) & 0x7ff;
3755 if (abstop
- 0x3c9 >= 0x408 - 0x3c9) {
3756 if (abstop
- 0x3c9 >= 0x80000000) {
3757 /* Avoid spurious underflow for tiny x. */
3758 /* Note: 0 is common input. */
3759 double one
= 1.0 + x
;
3760 return sign_bias
? -one
: one
;
3762 if (abstop
>= 0x409) {
3763 /* Note: inf and nan are already handled. */
3764 if (*(UINT64
*)&x
>> 63)
3765 return math_error(_UNDERFLOW
, "pow", argx
, argy
, (sign_bias
? -DBL_MIN
: DBL_MIN
) * DBL_MIN
);
3766 return math_error(_OVERFLOW
, "pow", argx
, argy
, (sign_bias
? -DBL_MAX
: DBL_MAX
) * DBL_MAX
);
3768 /* Large x is special cased below. */
3772 /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
3773 /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
3777 r
= x
+ kd
* negln2hiN
+ kd
* negln2loN
;
3778 /* The code assumes 2^-200 < |xtail| < 2^-8/N. */
3780 /* 2^(k/N) ~= scale * (1 + tail). */
3781 idx
= 2 * (ki
% (1 << 7));
3782 top
= (ki
+ sign_bias
) << (52 - 7);
3783 tail
= *(double*)&exp_T
[idx
];
3784 /* This is only a valid scale when -1023*N < k < 1024*N. */
3785 sbits
= exp_T
[idx
+ 1] + top
;
3786 /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
3787 /* Evaluation is optimized assuming superscalar pipelined execution. */
3789 /* Without fma the worst case error is 0.25/N ulp larger. */
3790 /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
3791 tmp
= tail
+ r
+ r2
* (C
[0] + r
* C
[1]) + r2
* r2
* (C
[2] + r
* C
[3]);
3793 /* Handle cases that may overflow or underflow when computing the result that
3794 is scale*(1+TMP) without intermediate rounding. The bit representation of
3795 scale is in SBITS, however it has a computed exponent that may have
3796 overflown into the sign bit so that needs to be adjusted before using it as
3797 a double. (int32_t)KI is the k used in the argument reduction and exponent
3798 adjustment of scale, positive k here means the result may overflow and
3799 negative k means the result may underflow. */
3802 if ((ki
& 0x80000000) == 0) {
3803 /* k > 0, the exponent of scale might have overflowed by <= 460. */
3804 sbits
-= 1009ull << 52;
3805 scale
= *(double*)&sbits
;
3806 y
= 0x1p
1009 * (scale
+ scale
* tmp
);
3808 return math_error(_OVERFLOW
, "pow", argx
, argy
, y
);
3811 /* k < 0, need special care in the subnormal range. */
3812 sbits
+= 1022ull << 52;
3813 /* Note: sbits is signed scale. */
3814 scale
= *(double*)&sbits
;
3815 y
= scale
+ scale
* tmp
;
3816 if (fabs(y
) < 1.0) {
3817 /* Round y to the right precision before scaling it into the subnormal
3818 range to avoid double rounding that can cause 0.5+E/2 ulp error where
3819 E is the worst-case ulp error outside the subnormal range. So this
3820 is only useful if the goal is better than 1 ulp worst-case error. */
3821 double hi
, lo
, one
= 1.0;
3824 lo
= scale
- y
+ scale
* tmp
;
3826 lo
= one
- hi
+ y
+ lo
;
3828 /* Fix the sign of 0. */
3830 sbits
&= 0x8000000000000000ULL
;
3831 y
= *(double*)&sbits
;
3833 /* The underflow exception needs to be signaled explicitly. */
3834 fp_barrier(fp_barrier(0x1p
-1022) * 0x1p
-1022);
3836 return math_error(_UNDERFLOW
, "pow", argx
, argy
, y
);
3841 scale
= *(double*)&sbits
;
3842 /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
3843 is no spurious underflow here even without fma. */
3844 return scale
+ scale
* tmp
;
3847 /* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
3848 the bit representation of a non-zero finite floating-point value. */
3849 static inline int pow_checkint(UINT64 iy
)
3851 int e
= iy
>> 52 & 0x7ff;
3856 if (iy
& ((1ULL << (0x3ff + 52 - e
)) - 1))
3858 if (iy
& (1ULL << (0x3ff + 52 - e
)))
3863 /*********************************************************************
3866 * Copied from musl: src/math/pow.c
3868 double CDECL
pow( double x
, double y
)
3870 UINT32 sign_bias
= 0;
3873 double lo
, hi
, ehi
, elo
, yhi
, ylo
, lhi
, llo
;
3879 if (topx
- 0x001 >= 0x7ff - 0x001 ||
3880 (topy
& 0x7ff) - 0x3be >= 0x43e - 0x3be) {
3881 /* Note: if |y| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then pow(x,y) = inf/0
3882 and if |y| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then pow(x,y) = +-1. */
3883 /* Special cases: (x < 0x1p-126 or inf or nan) or
3884 (|y| < 0x1p-65 or |y| >= 0x1p63 or nan). */
3885 if (2 * iy
- 1 >= 2 * 0x7ff0000000000000ULL
- 1) {
3888 if (ix
== 0x3ff0000000000000ULL
)
3890 if (2 * ix
> 2 * 0x7ff0000000000000ULL
||
3891 2 * iy
> 2 * 0x7ff0000000000000ULL
)
3893 if (2 * ix
== 2 * 0x3ff0000000000000ULL
)
3895 if ((2 * ix
< 2 * 0x3ff0000000000000ULL
) == !(iy
>> 63))
3896 return 0.0; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
3899 if (2 * ix
- 1 >= 2 * 0x7ff0000000000000ULL
- 1) {
3901 if (ix
>> 63 && pow_checkint(iy
) == 1)
3903 if (iy
& 0x8000000000000000ULL
&& x2
== 0.0)
3904 return math_error(_SING
, "pow", x
, y
, 1 / x2
);
3905 /* Without the barrier some versions of clang hoist the 1/x2 and
3906 thus division by zero exception can be signaled spuriously. */
3907 return iy
>> 63 ? fp_barrier(1 / x2
) : x2
;
3909 /* Here x and y are non-zero finite. */
3912 int yint
= pow_checkint(iy
);
3914 return math_error(_DOMAIN
, "pow", x
, y
, 0 / (x
- x
));
3916 sign_bias
= 0x800 << 7;
3917 ix
&= 0x7fffffffffffffff;
3920 if ((topy
& 0x7ff) - 0x3be >= 0x43e - 0x3be) {
3921 /* Note: sign_bias == 0 here because y is not odd. */
3922 if (ix
== 0x3ff0000000000000ULL
)
3924 if ((topy
& 0x7ff) < 0x3be) {
3925 /* |y| < 2^-65, x^y ~= 1 + y*log(x). */
3926 return ix
> 0x3ff0000000000000ULL
? 1.0 + y
: 1.0 - y
;
3928 if ((ix
> 0x3ff0000000000000ULL
) == (topy
< 0x800))
3929 return math_error(_OVERFLOW
, "pow", x
, y
, fp_barrier(DBL_MAX
) * DBL_MAX
);
3930 return math_error(_UNDERFLOW
, "pow", x
, y
, fp_barrier(DBL_MIN
) * DBL_MIN
);
3933 /* Normalize subnormal x so exponent becomes negative. */
3936 ix
&= 0x7fffffffffffffff;
3941 hi
= pow_log(ix
, &lo
);
3943 yhi
= *(double*)&iy
;
3945 *(UINT64
*)&lhi
= *(UINT64
*)&hi
& -1ULL << 27;
3946 llo
= fp_barrier(hi
- lhi
+ lo
);
3948 elo
= ylo
* lhi
+ y
* llo
; /* |elo| < |ehi| * 2^-25. */
3949 return pow_exp(x
, y
, ehi
, elo
, sign_bias
);
3952 /*********************************************************************
3955 * Copied from musl: src/math/sin.c
3957 double CDECL
sin( double x
)
3963 ix
= *(ULONGLONG
*)&x
>> 32;
3967 if (ix
<= 0x3fe921fb) {
3968 if (ix
< 0x3e500000) { /* |x| < 2**-26 */
3969 /* raise inexact if x != 0 and underflow if subnormal*/
3970 fp_barrier(ix
< 0x00100000 ? x
/0x1p
120f
: x
+0x1p
120f
);
3973 return __sin(x
, 0.0, 0);
3976 /* sin(Inf or NaN) is NaN */
3978 return math_error(_DOMAIN
, "sin", x
, 0, x
- x
);
3979 if (ix
>= 0x7ff00000)
3982 /* argument reduction needed */
3983 n
= __rem_pio2(x
, y
);
3985 case 0: return __sin(y
[0], y
[1], 1);
3986 case 1: return __cos(y
[0], y
[1]);
3987 case 2: return -__sin(y
[0], y
[1], 1);
3988 default: return -__cos(y
[0], y
[1]);
3992 /*********************************************************************
3995 double CDECL
sinh( double x
)
3997 UINT64 ux
= *(UINT64
*)&x
;
3998 UINT64 sign
= ux
& 0x8000000000000000ULL
;
4006 ux
&= (UINT64
)-1 / 2;
4007 absx
= *(double*)&ux
;
4010 /* |x| < log(DBL_MAX) */
4011 if (w
< 0x40862e42) {
4013 if (w
< 0x3ff00000) {
4014 if (w
< 0x3ff00000 - (26 << 20))
4016 return h
* (2 * t
- t
* t
/ (t
+ 1));
4018 return h
* (t
+ t
/ (t
+ 1));
4021 /* |x| > log(DBL_MAX) or nan */
4022 /* note: the result is stored to handle overflow */
4023 if (ux
> 0x7ff0000000000000ULL
)
4024 *(UINT64
*)&t
= ux
| sign
| 0x0008000000000000ULL
;
4026 t
= __expo2(absx
, 2 * h
);
4030 static BOOL
sqrt_validate( double *x
, BOOL update_sw
)
4032 short c
= _dclass(*x
);
4034 if (c
== FP_ZERO
) return FALSE
;
4039 *x
= math_error(_DOMAIN
, "sqrt", *x
, 0, *x
);
4041 /* set signaling bit */
4042 *(ULONGLONG
*)x
|= 0x8000000000000ULL
;
4048 *x
= math_error(_DOMAIN
, "sqrt", *x
, 0, ret_nan(update_sw
));
4051 if (c
== FP_INFINITE
) return FALSE
;
4055 #if defined(__x86_64__) || defined(__i386__)
4056 double CDECL
sse2_sqrt(double);
4057 __ASM_GLOBAL_FUNC( sse2_sqrt
,
4058 "sqrtsd %xmm0, %xmm0\n\t"
4063 double CDECL
x87_sqrt(double);
4064 __ASM_GLOBAL_FUNC( x87_sqrt
,
4072 /*********************************************************************
4075 * Copied from musl: src/math/sqrt.c
4077 double CDECL
sqrt( double x
)
4080 if (!sqrt_validate(&x
, TRUE
))
4083 return sse2_sqrt(x
);
4084 #elif defined( __i386__ )
4085 if (!sqrt_validate(&x
, TRUE
))
4090 static const double tiny
= 1.0e-300;
4093 int sign
= 0x80000000;
4095 unsigned int r
,t1
,s1
,ix1
,q1
;
4098 if (!sqrt_validate(&x
, TRUE
))
4101 ix
= *(ULONGLONG
*)&x
;
4107 if (m
== 0) { /* subnormal x */
4113 for (i
=0; (ix0
& 0x00100000) == 0; i
++)
4116 ix0
|= ix1
>> (32 - i
);
4119 m
-= 1023; /* unbias exponent */
4120 ix0
= (ix0
& 0x000fffff) | 0x00100000;
4121 if (m
& 1) { /* odd m, double x to make it even */
4122 ix0
+= ix0
+ ((ix1
& sign
) >> 31);
4125 m
>>= 1; /* m = [m/2] */
4127 /* generate sqrt(x) bit by bit */
4128 ix0
+= ix0
+ ((ix1
& sign
) >> 31);
4130 q
= q1
= s0
= s1
= 0; /* [q,q1] = sqrt(x) */
4131 r
= 0x00200000; /* r = moving bit from right to left */
4140 ix0
+= ix0
+ ((ix1
& sign
) >> 31);
4149 if (t
< ix0
|| (t
== ix0
&& t1
<= ix1
)) {
4151 if ((t1
&sign
) == sign
&& (s1
& sign
) == 0)
4159 ix0
+= ix0
+ ((ix1
& sign
) >> 31);
4164 /* use floating add to find out rounding direction */
4165 if ((ix0
| ix1
) != 0) {
4166 z
= 1.0 - tiny
; /* raise inexact flag */
4169 if (q1
== (unsigned int)0xffffffff) {
4172 } else if (z
> 1.0) {
4173 if (q1
== (unsigned int)0xfffffffe)
4180 ix0
= (q
>> 1) + 0x3fe00000;
4184 ix
= ix0
+ ((unsigned int)m
<< 20);
4187 return *(double*)&ix
;
4191 /* Copied from musl: src/math/__tan.c */
4192 static double __tan(double x
, double y
, int odd
)
4194 static const double T
[] = {
4195 3.33333333333334091986e-01,
4196 1.33333333333201242699e-01,
4197 5.39682539762260521377e-02,
4198 2.18694882948595424599e-02,
4199 8.86323982359930005737e-03,
4200 3.59207910759131235356e-03,
4201 1.45620945432529025516e-03,
4202 5.88041240820264096874e-04,
4203 2.46463134818469906812e-04,
4204 7.81794442939557092300e-05,
4205 7.14072491382608190305e-05,
4206 -1.85586374855275456654e-05,
4207 2.59073051863633712884e-05,
4209 static const double pio4
= 7.85398163397448278999e-01;
4210 static const double pio4lo
= 3.06161699786838301793e-17;
4212 double z
, r
, v
, w
, s
, a
, w0
, a0
;
4216 hx
= *(ULONGLONG
*)&x
>> 32;
4217 big
= (hx
& 0x7fffffff) >= 0x3FE59428; /* |x| >= 0.6744 */
4224 x
= (pio4
- x
) + (pio4lo
- y
);
4229 r
= T
[1] + w
* (T
[3] + w
* (T
[5] + w
* (T
[7] + w
* (T
[9] + w
* T
[11]))));
4230 v
= z
* (T
[2] + w
* (T
[4] + w
* (T
[6] + w
* (T
[8] + w
* (T
[10] + w
* T
[12])))));
4232 r
= y
+ z
* (s
* (r
+ v
) + y
) + s
* T
[0];
4236 v
= s
- 2.0 * (x
+ (r
- w
* w
/ (w
+ s
)));
4237 return sign
? -v
: v
;
4241 /* -1.0/(x+r) has up to 2ulp error, so compute it accurately */
4243 *(LONGLONG
*)&w0
= *(LONGLONG
*)&w0
& 0xffffffff00000000ULL
;
4244 v
= r
- (w0
- x
); /* w0+v = r+x */
4246 *(LONGLONG
*)&a0
= *(LONGLONG
*)&a0
& 0xffffffff00000000ULL
;
4247 return a0
+ a
* (1.0 + a0
* w0
+ a0
* v
);
4250 /*********************************************************************
4253 * Copied from musl: src/math/tan.c
4255 double CDECL
tan( double x
)
4261 ix
= *(ULONGLONG
*)&x
>> 32;
4264 if (ix
<= 0x3fe921fb) { /* |x| ~< pi/4 */
4265 if (ix
< 0x3e400000) { /* |x| < 2**-27 */
4266 /* raise inexact if x!=0 and underflow if subnormal */
4267 fp_barrier(ix
< 0x00100000 ? x
/ 0x1p
120f
: x
+ 0x1p
120f
);
4270 return __tan(x
, 0.0, 0);
4274 return math_error(_DOMAIN
, "tan", x
, 0, x
- x
);
4275 if (ix
>= 0x7ff00000)
4278 n
= __rem_pio2(x
, y
);
4279 return __tan(y
[0], y
[1], n
& 1);
4282 /*********************************************************************
4285 double CDECL
tanh( double x
)
4287 UINT64 ui
= *(UINT64
*)&x
;
4288 UINT64 sign
= ui
& 0x8000000000000000ULL
;
4293 ui
&= (UINT64
)-1 / 2;
4297 if (w
> 0x3fe193ea) {
4298 /* |x| > log(3)/2 ~= 0.5493 or nan */
4299 if (w
> 0x40340000) {
4300 if (ui
> 0x7ff0000000000000ULL
) {
4301 *(UINT64
*)&x
= ui
| sign
| 0x0008000000000000ULL
;
4302 #if _MSVCR_VER < 140
4303 return math_error(_DOMAIN
, "tanh", x
, 0, x
);
4309 /* note: this branch avoids raising overflow */
4310 fp_barrier(x
+ 0x1p
120f
);
4314 t
= 1 - 2 / (t
+ 2);
4316 } else if (w
> 0x3fd058ae) {
4317 /* |x| > log(5/3)/2 ~= 0.2554 */
4320 } else if (w
>= 0x00100000) {
4321 /* |x| >= 0x1p-1022, up to 2ulp error in [0.1,0.2554] */
4322 t
= __expm1(-2 * x
);
4325 /* |x| is subnormal */
4326 /* note: the branch above would not raise underflow in [0x1p-1023,0x1p-1022) */
4327 fp_barrier((float)x
);
4330 return sign
? -t
: t
;
4334 #if (defined(__GNUC__) || defined(__clang__)) && defined(__i386__)
4336 #define CREATE_FPU_FUNC1(name, call) \
4337 __ASM_GLOBAL_FUNC(name, \
4339 __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t") \
4340 __ASM_CFI(".cfi_rel_offset %ebp,0\n\t") \
4341 "movl %esp, %ebp\n\t" \
4342 __ASM_CFI(".cfi_def_cfa_register %ebp\n\t") \
4343 "subl $68, %esp\n\t" /* sizeof(double)*8 + sizeof(int) */ \
4344 "fstpl (%esp)\n\t" /* store function argument */ \
4346 "movl $1, %ecx\n\t" /* empty FPU stack */ \
4350 "and $0x4500, %ax\n\t" \
4351 "cmp $0x4100, %ax\n\t" \
4353 "fstpl (%esp,%ecx,8)\n\t" \
4358 "movl %ecx, -4(%ebp)\n\t" \
4359 "call " __ASM_NAME( #call ) "\n\t" \
4360 "movl -4(%ebp), %ecx\n\t" \
4361 "fstpl (%esp)\n\t" /* save result */ \
4362 "3:\n\t" /* restore FPU stack */ \
4364 "fldl (%esp,%ecx,8)\n\t" \
4365 "cmpl $0, %ecx\n\t" \
4368 __ASM_CFI(".cfi_def_cfa %esp,4\n\t") \
4369 __ASM_CFI(".cfi_same_value %ebp\n\t") \
4372 #define CREATE_FPU_FUNC2(name, call) \
4373 __ASM_GLOBAL_FUNC(name, \
4375 __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t") \
4376 __ASM_CFI(".cfi_rel_offset %ebp,0\n\t") \
4377 "movl %esp, %ebp\n\t" \
4378 __ASM_CFI(".cfi_def_cfa_register %ebp\n\t") \
4379 "subl $68, %esp\n\t" /* sizeof(double)*8 + sizeof(int) */ \
4380 "fstpl 8(%esp)\n\t" /* store function argument */ \
4382 "fstpl (%esp)\n\t" \
4384 "movl $2, %ecx\n\t" /* empty FPU stack */ \
4388 "and $0x4500, %ax\n\t" \
4389 "cmp $0x4100, %ax\n\t" \
4391 "fstpl (%esp,%ecx,8)\n\t" \
4396 "movl %ecx, -4(%ebp)\n\t" \
4397 "call " __ASM_NAME( #call ) "\n\t" \
4398 "movl -4(%ebp), %ecx\n\t" \
4399 "fstpl 8(%esp)\n\t" /* save result */ \
4400 "3:\n\t" /* restore FPU stack */ \
4402 "fldl (%esp,%ecx,8)\n\t" \
4403 "cmpl $1, %ecx\n\t" \
4406 __ASM_CFI(".cfi_def_cfa %esp,4\n\t") \
4407 __ASM_CFI(".cfi_same_value %ebp\n\t") \
4410 CREATE_FPU_FUNC1(_CIacos
, acos
)
4411 CREATE_FPU_FUNC1(_CIasin
, asin
)
4412 CREATE_FPU_FUNC1(_CIatan
, atan
)
4413 CREATE_FPU_FUNC2(_CIatan2
, atan2
)
4414 CREATE_FPU_FUNC1(_CIcos
, cos
)
4415 CREATE_FPU_FUNC1(_CIcosh
, cosh
)
4416 CREATE_FPU_FUNC1(_CIexp
, exp
)
4417 CREATE_FPU_FUNC2(_CIfmod
, fmod
)
4418 CREATE_FPU_FUNC1(_CIlog
, log
)
4419 CREATE_FPU_FUNC1(_CIlog10
, log10
)
4420 CREATE_FPU_FUNC2(_CIpow
, pow
)
4421 CREATE_FPU_FUNC1(_CIsin
, sin
)
4422 CREATE_FPU_FUNC1(_CIsinh
, sinh
)
4423 CREATE_FPU_FUNC1(_CIsqrt
, sqrt
)
4424 CREATE_FPU_FUNC1(_CItan
, tan
)
4425 CREATE_FPU_FUNC1(_CItanh
, tanh
)
4427 __ASM_GLOBAL_FUNC(_ftol
,
4429 __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t")
4430 __ASM_CFI(".cfi_rel_offset %ebp,0\n\t")
4431 "movl %esp, %ebp\n\t"
4432 __ASM_CFI(".cfi_def_cfa_register %ebp\n\t")
4433 "subl $12, %esp\n\t" /* sizeof(LONGLONG) + 2*sizeof(WORD) */
4435 "mov (%esp), %ax\n\t"
4436 "or $0xc00, %ax\n\t"
4437 "mov %ax, 2(%esp)\n\t"
4439 "fistpq 4(%esp)\n\t"
4441 "movl 4(%esp), %eax\n\t"
4442 "movl 8(%esp), %edx\n\t"
4444 __ASM_CFI(".cfi_def_cfa %esp,4\n\t")
4445 __ASM_CFI(".cfi_same_value %ebp\n\t")
4448 #endif /* (defined(__GNUC__) || defined(__clang__)) && defined(__i386__) */
4450 /*********************************************************************
4451 * _fpclass (MSVCRT.@)
4453 int CDECL
_fpclass(double num
)
4455 union { double f
; UINT64 i
; } u
= { num
};
4456 int e
= u
.i
>> 52 & 0x7ff;
4462 if (u
.i
<< 1) return s
? _FPCLASS_ND
: _FPCLASS_PD
;
4463 return s
? _FPCLASS_NZ
: _FPCLASS_PZ
;
4465 if (u
.i
<< 12) return ((u
.i
>> 51) & 1) ? _FPCLASS_QNAN
: _FPCLASS_SNAN
;
4466 return s
? _FPCLASS_NINF
: _FPCLASS_PINF
;
4468 return s
? _FPCLASS_NN
: _FPCLASS_PN
;
4472 /*********************************************************************
4475 unsigned int CDECL
MSVCRT__rotl(unsigned int num
, int shift
)
4478 return (num
<< shift
) | (num
>> (32-shift
));
4481 /*********************************************************************
4484 __msvcrt_ulong CDECL
MSVCRT__lrotl(__msvcrt_ulong num
, int shift
)
4487 return (num
<< shift
) | (num
>> (32-shift
));
4490 /*********************************************************************
4493 __msvcrt_ulong CDECL
MSVCRT__lrotr(__msvcrt_ulong num
, int shift
)
4496 return (num
>> shift
) | (num
<< (32-shift
));
4499 /*********************************************************************
4502 unsigned int CDECL
MSVCRT__rotr(unsigned int num
, int shift
)
4505 return (num
>> shift
) | (num
<< (32-shift
));
4508 /*********************************************************************
4509 * _rotl64 (MSVCRT.@)
4511 unsigned __int64 CDECL
MSVCRT__rotl64(unsigned __int64 num
, int shift
)
4514 return (num
<< shift
) | (num
>> (64-shift
));
4517 /*********************************************************************
4518 * _rotr64 (MSVCRT.@)
4520 unsigned __int64 CDECL
MSVCRT__rotr64(unsigned __int64 num
, int shift
)
4523 return (num
>> shift
) | (num
<< (64-shift
));
4526 /*********************************************************************
4529 int CDECL
abs( int n
)
4531 return n
>= 0 ? n
: -n
;
4534 /*********************************************************************
4537 __msvcrt_long CDECL
labs( __msvcrt_long n
)
4539 return n
>= 0 ? n
: -n
;
4543 /*********************************************************************
4544 * llabs (MSVCR100.@)
4546 __int64 CDECL
llabs( __int64 n
)
4548 return n
>= 0 ? n
: -n
;
4553 /*********************************************************************
4554 * imaxabs (MSVCR120.@)
4556 intmax_t CDECL
imaxabs( intmax_t n
)
4558 return n
>= 0 ? n
: -n
;
4562 /*********************************************************************
4565 __int64 CDECL
_abs64( __int64 n
)
4567 return n
>= 0 ? n
: -n
;
4570 /* Copied from musl: src/math/ilogb.c */
4571 static int __ilogb(double x
)
4573 union { double f
; UINT64 i
; } u
= { x
};
4574 int e
= u
.i
>> 52 & 0x7ff;
4579 if (u
.i
== 0) return FP_ILOGB0
;
4581 for (e
= -0x3ff; u
.i
>> 63 == 0; e
--, u
.i
<<= 1);
4584 if (e
== 0x7ff) return u
.i
<< 12 ? FP_ILOGBNAN
: INT_MAX
;
4588 /*********************************************************************
4591 * Copied from musl: src/math/logb.c
4593 double CDECL
_logb(double x
)
4598 return math_error(_SING
, "_logb", x
, 0, -1 / (x
* x
));
4602 static void sq(double *hi
, double *lo
, double x
)
4606 xc
= x
* (0x1p
27 + 1);
4610 *lo
= xh
* xh
- *hi
+ 2 * xh
* xl
+ xl
* xl
;
4613 /*********************************************************************
4616 * Copied from musl: src/math/hypot.c
4618 double CDECL
_hypot(double x
, double y
)
4620 UINT64 ux
= *(UINT64
*)&x
, uy
= *(UINT64
*)&y
, ut
;
4621 double hx
, lx
, hy
, ly
, z
;
4624 /* arrange |x| >= |y| */
4638 /* note: hypot(inf,nan) == inf */
4641 if (ex
== 0x7ff || uy
== 0)
4643 /* note: hypot(x,y) ~= x + y*y/x/2 with inexact for small y/x */
4644 /* 64 difference is enough for ld80 double_t */
4648 /* precise sqrt argument in nearest rounding mode without overflow */
4649 /* xh*xh must not overflow and xl*xl must not underflow in sq */
4651 if (ex
> 0x3ff + 510) {
4655 } else if (ey
< 0x3ff - 450) {
4662 return z
* sqrt(ly
+ lx
+ hy
+ hx
);
4665 /*********************************************************************
4666 * _hypotf (MSVCRT.@)
4668 * Copied from musl: src/math/hypotf.c
4670 float CDECL
_hypotf(float x
, float y
)
4672 UINT32 ux
= *(UINT32
*)&x
, uy
= *(UINT32
*)&y
, ut
;
4685 if (uy
== 0xff << 23)
4687 if (ux
>= 0xff << 23 || uy
== 0 || ux
- uy
>= 25 << 23)
4691 if (ux
>= (0x7f + 60) << 23) {
4695 } else if (uy
< (0x7f - 60) << 23) {
4700 return z
* sqrtf((double)x
* x
+ (double)y
* y
);
4703 /*********************************************************************
4706 * Based on musl: src/math/ceilf.c
4708 double CDECL
ceil( double x
)
4710 union {double f
; UINT64 i
;} u
= {x
};
4711 int e
= (u
.i
>> 52 & 0x7ff) - 0x3ff;
4717 m
= 0x000fffffffffffffULL
>> e
;
4732 /*********************************************************************
4735 * Based on musl: src/math/floorf.c
4737 double CDECL
floor( double x
)
4739 union {double f
; UINT64 i
;} u
= {x
};
4740 int e
= (int)(u
.i
>> 52 & 0x7ff) - 0x3ff;
4746 m
= 0x000fffffffffffffULL
>> e
;
4761 /*********************************************************************
4764 * Copied from musl: src/math/fma.c
4773 static struct fma_num
normalize(double x
)
4775 UINT64 ix
= *(UINT64
*)&x
;
4777 int sign
= e
& 0x800;
4784 e
= ix
>> 52 & 0x7ff;
4785 e
= e
? e
- 63 : 0x800;
4787 ix
&= (1ull << 52) - 1;
4790 e
-= 0x3ff + 52 + 1;
4798 static void mul(UINT64
*hi
, UINT64
*lo
, UINT64 x
, UINT64 y
)
4801 UINT64 xlo
= (UINT32
)x
, xhi
= x
>> 32;
4802 UINT64 ylo
= (UINT32
)y
, yhi
= y
>> 32;
4805 t2
= xlo
* yhi
+ xhi
* ylo
;
4807 *lo
= t1
+ (t2
<< 32);
4808 *hi
= t3
+ (t2
>> 32) + (t1
> *lo
);
4811 double CDECL
fma( double x
, double y
, double z
)
4813 int e
, d
, sign
, samesign
, nonzero
;
4814 UINT64 rhi
, rlo
, zhi
, zlo
;
4815 struct fma_num nx
, ny
, nz
;
4819 /* normalize so top 10bits and last bit are 0 */
4824 if (nx
.e
>= 0x7ff - 0x3ff - 52 - 1 || ny
.e
>= 0x7ff - 0x3ff - 52 - 1) {
4826 if (!isnan(x
) && !isnan(y
) && !isnan(z
) && isnan(r
)) *_errno() = EDOM
;
4829 if (nz
.e
>= 0x7ff - 0x3ff - 52 - 1) {
4830 if (nz
.e
> 0x7ff - 0x3ff - 52 - 1) {/* z==0 */
4832 if (!isnan(x
) && !isnan(y
) && isnan(r
)) *_errno() = EDOM
;
4839 mul(&rhi
, &rlo
, nx
.m
, ny
.m
);
4840 /* either top 20 or 21 bits of rhi and last 2 bits of rlo are 0 */
4842 /* align exponents */
4845 /* shift bits z<<=kz, r>>=kr, so kz+kr == d, set e = e+kr (== ez-kz) */
4849 zhi
= nz
.m
>> (64 - d
);
4856 rlo
= rhi
<< (64 - d
) | rlo
>> d
| !!(rlo
<< (64 - d
));
4868 } else if (d
< 64) {
4869 zlo
= nz
.m
>> d
| !!(nz
.m
<< (64 - d
));
4876 sign
= nx
.sign
^ ny
.sign
;
4877 samesign
= !(sign
^ nz
.sign
);
4882 rhi
+= zhi
+ (rlo
< zlo
);
4887 rhi
= rhi
- zhi
- (t
< rlo
);
4896 /* set rhi to top 63bit of the result (last bit is sticky) */
4900 BitScanReverse((DWORD
*)&d
, rhi
>> 32);
4903 BitScanReverse((DWORD
*)&d
, rhi
);
4907 rhi
= rhi
<< d
| rlo
>> (64 - d
) | !!(rlo
<< d
);
4910 BitScanReverse((DWORD
*)&d
, rlo
>> 32);
4913 BitScanReverse((DWORD
*)&d
, rlo
);
4917 rhi
= rlo
>> 1 | (rlo
& 1);
4926 /* convert to double */
4927 i
= rhi
; /* i is in [1<<62,(1<<63)-1] */
4930 r
= i
; /* |r| is in [0x1p62,0x1p63] */
4932 if (e
< -1022 - 62) {
4933 /* result is subnormal before rounding */
4934 if (e
== -1022 - 63) {
4939 /* min normal after rounding, underflow depends
4940 on arch behaviour which can be imitated by
4941 a double to float conversion */
4942 float fltmin
= 0x0.ffffff8p
-63 * FLT_MIN
* r
;
4943 return DBL_MIN
/ FLT_MIN
* fltmin
;
4945 /* one bit is lost when scaled, add another top bit to
4946 only round once at conversion if it is inexact */
4950 i
= rhi
>> 1 | (rhi
& 1) | 1ull << 62;
4954 r
= 2 * r
- c
; /* remove top bit */
4956 /* raise underflow portably, such that it
4957 cannot be optimized away */
4958 tiny
= DBL_MIN
/ FLT_MIN
* r
;
4959 r
+= (double)(tiny
* tiny
) * (r
- r
);
4962 /* only round once when scaled */
4964 i
= (rhi
>> d
| !!(rhi
<< (64 - d
))) << d
;
4970 return __scalbn(r
, e
);
4973 /*********************************************************************
4976 * Copied from musl: src/math/fmaf.c
4978 float CDECL
fmaf( float x
, float y
, float z
)
4980 union { double f
; UINT64 i
; } u
;
4986 e
= u
.i
>>52 & 0x7ff;
4987 /* Common case: The double precision result is fine. */
4988 if ((u
.i
& 0x1fffffff) != 0x10000000 || /* not a halfway case */
4989 e
== 0x7ff || /* NaN */
4990 (u
.f
- xy
== z
&& u
.f
- z
== xy
) || /* exact */
4991 (_controlfp(0, 0) & _MCW_RC
) != _RC_NEAR
) /* not round-to-nearest */
4993 if (!isnan(x
) && !isnan(y
) && !isnan(z
) && isnan(u
.f
)) *_errno() = EDOM
;
4995 /* underflow may not be raised correctly, example:
4996 fmaf(0x1p-120f, 0x1p-120f, 0x1p-149f) */
4997 if (e
< 0x3ff-126 && e
>= 0x3ff-149 && _statusfp() & _SW_INEXACT
)
4998 fp_barrierf((float)u
.f
* (float)u
.f
);
5003 * If result is inexact, and exactly halfway between two float values,
5004 * we need to adjust the low-order bit in the direction of the error.
5007 if (neg
== (z
> xy
))
5011 if (neg
== (err
< 0))
5018 #if defined(__i386__) || defined(__x86_64__)
5019 static void _setfp_sse( unsigned int *cw
, unsigned int cw_mask
,
5020 unsigned int *sw
, unsigned int sw_mask
)
5022 #if defined(__GNUC__) || defined(__clang__)
5023 unsigned long old_fpword
, fpword
;
5026 __asm__
__volatile__( "stmxcsr %0" : "=m" (fpword
) );
5027 old_fpword
= fpword
;
5029 cw_mask
&= _MCW_EM
| _MCW_RC
| _MCW_DN
;
5035 if (fpword
& 0x1) flags
|= _SW_INVALID
;
5036 if (fpword
& 0x2) flags
|= _SW_DENORMAL
;
5037 if (fpword
& 0x4) flags
|= _SW_ZERODIVIDE
;
5038 if (fpword
& 0x8) flags
|= _SW_OVERFLOW
;
5039 if (fpword
& 0x10) flags
|= _SW_UNDERFLOW
;
5040 if (fpword
& 0x20) flags
|= _SW_INEXACT
;
5042 *sw
= (flags
& ~sw_mask
) | (*sw
& sw_mask
);
5043 TRACE("sse2 update sw %08x to %08x\n", flags
, *sw
);
5045 if (*sw
& _SW_INVALID
) fpword
|= 0x1;
5046 if (*sw
& _SW_DENORMAL
) fpword
|= 0x2;
5047 if (*sw
& _SW_ZERODIVIDE
) fpword
|= 0x4;
5048 if (*sw
& _SW_OVERFLOW
) fpword
|= 0x8;
5049 if (*sw
& _SW_UNDERFLOW
) fpword
|= 0x10;
5050 if (*sw
& _SW_INEXACT
) fpword
|= 0x20;
5057 if (fpword
& 0x80) flags
|= _EM_INVALID
;
5058 if (fpword
& 0x100) flags
|= _EM_DENORMAL
;
5059 if (fpword
& 0x200) flags
|= _EM_ZERODIVIDE
;
5060 if (fpword
& 0x400) flags
|= _EM_OVERFLOW
;
5061 if (fpword
& 0x800) flags
|= _EM_UNDERFLOW
;
5062 if (fpword
& 0x1000) flags
|= _EM_INEXACT
;
5063 switch (fpword
& 0x6000)
5065 case 0x6000: flags
|= _RC_UP
|_RC_DOWN
; break;
5066 case 0x4000: flags
|= _RC_UP
; break;
5067 case 0x2000: flags
|= _RC_DOWN
; break;
5069 switch (fpword
& 0x8040)
5071 case 0x0040: flags
|= _DN_FLUSH_OPERANDS_SAVE_RESULTS
; break;
5072 case 0x8000: flags
|= _DN_SAVE_OPERANDS_FLUSH_RESULTS
; break;
5073 case 0x8040: flags
|= _DN_FLUSH
; break;
5076 *cw
= (flags
& ~cw_mask
) | (*cw
& cw_mask
);
5077 TRACE("sse2 update cw %08x to %08x\n", flags
, *cw
);
5079 if (*cw
& _EM_INVALID
) fpword
|= 0x80;
5080 if (*cw
& _EM_DENORMAL
) fpword
|= 0x100;
5081 if (*cw
& _EM_ZERODIVIDE
) fpword
|= 0x200;
5082 if (*cw
& _EM_OVERFLOW
) fpword
|= 0x400;
5083 if (*cw
& _EM_UNDERFLOW
) fpword
|= 0x800;
5084 if (*cw
& _EM_INEXACT
) fpword
|= 0x1000;
5085 switch (*cw
& _MCW_RC
)
5087 case _RC_UP
|_RC_DOWN
: fpword
|= 0x6000; break;
5088 case _RC_UP
: fpword
|= 0x4000; break;
5089 case _RC_DOWN
: fpword
|= 0x2000; break;
5091 switch (*cw
& _MCW_DN
)
5093 case _DN_FLUSH_OPERANDS_SAVE_RESULTS
: fpword
|= 0x0040; break;
5094 case _DN_SAVE_OPERANDS_FLUSH_RESULTS
: fpword
|= 0x8000; break;
5095 case _DN_FLUSH
: fpword
|= 0x8040; break;
5098 /* clear status word if anything changes */
5099 if (fpword
!= old_fpword
&& !sw
)
5101 TRACE("sse2 clear status word\n");
5106 if (fpword
!= old_fpword
)
5107 __asm__
__volatile__( "ldmxcsr %0" : : "m" (fpword
) );
5109 FIXME("not implemented\n");
5116 static void _setfp( unsigned int *cw
, unsigned int cw_mask
,
5117 unsigned int *sw
, unsigned int sw_mask
)
5119 #if (defined(__GNUC__) || defined(__clang__)) && defined(__i386__)
5120 unsigned long oldcw
= 0, newcw
= 0;
5121 unsigned long oldsw
= 0, newsw
= 0;
5124 cw_mask
&= _MCW_EM
| _MCW_IC
| _MCW_RC
| _MCW_PC
;
5129 __asm__
__volatile__( "fstsw %0" : "=m" (newsw
) );
5133 if (newsw
& 0x1) flags
|= _SW_INVALID
;
5134 if (newsw
& 0x2) flags
|= _SW_DENORMAL
;
5135 if (newsw
& 0x4) flags
|= _SW_ZERODIVIDE
;
5136 if (newsw
& 0x8) flags
|= _SW_OVERFLOW
;
5137 if (newsw
& 0x10) flags
|= _SW_UNDERFLOW
;
5138 if (newsw
& 0x20) flags
|= _SW_INEXACT
;
5140 *sw
= (flags
& ~sw_mask
) | (*sw
& sw_mask
);
5141 TRACE("x86 update sw %08x to %08x\n", flags
, *sw
);
5143 if (*sw
& _SW_INVALID
) newsw
|= 0x1;
5144 if (*sw
& _SW_DENORMAL
) newsw
|= 0x2;
5145 if (*sw
& _SW_ZERODIVIDE
) newsw
|= 0x4;
5146 if (*sw
& _SW_OVERFLOW
) newsw
|= 0x8;
5147 if (*sw
& _SW_UNDERFLOW
) newsw
|= 0x10;
5148 if (*sw
& _SW_INEXACT
) newsw
|= 0x20;
5154 __asm__
__volatile__( "fstcw %0" : "=m" (newcw
) );
5158 if (newcw
& 0x1) flags
|= _EM_INVALID
;
5159 if (newcw
& 0x2) flags
|= _EM_DENORMAL
;
5160 if (newcw
& 0x4) flags
|= _EM_ZERODIVIDE
;
5161 if (newcw
& 0x8) flags
|= _EM_OVERFLOW
;
5162 if (newcw
& 0x10) flags
|= _EM_UNDERFLOW
;
5163 if (newcw
& 0x20) flags
|= _EM_INEXACT
;
5164 switch (newcw
& 0xc00)
5166 case 0xc00: flags
|= _RC_UP
|_RC_DOWN
; break;
5167 case 0x800: flags
|= _RC_UP
; break;
5168 case 0x400: flags
|= _RC_DOWN
; break;
5170 switch (newcw
& 0x300)
5172 case 0x0: flags
|= _PC_24
; break;
5173 case 0x200: flags
|= _PC_53
; break;
5174 case 0x300: flags
|= _PC_64
; break;
5176 if (newcw
& 0x1000) flags
|= _IC_AFFINE
;
5178 *cw
= (flags
& ~cw_mask
) | (*cw
& cw_mask
);
5179 TRACE("x86 update cw %08x to %08x\n", flags
, *cw
);
5181 if (*cw
& _EM_INVALID
) newcw
|= 0x1;
5182 if (*cw
& _EM_DENORMAL
) newcw
|= 0x2;
5183 if (*cw
& _EM_ZERODIVIDE
) newcw
|= 0x4;
5184 if (*cw
& _EM_OVERFLOW
) newcw
|= 0x8;
5185 if (*cw
& _EM_UNDERFLOW
) newcw
|= 0x10;
5186 if (*cw
& _EM_INEXACT
) newcw
|= 0x20;
5187 switch (*cw
& _MCW_RC
)
5189 case _RC_UP
|_RC_DOWN
: newcw
|= 0xc00; break;
5190 case _RC_UP
: newcw
|= 0x800; break;
5191 case _RC_DOWN
: newcw
|= 0x400; break;
5193 switch (*cw
& _MCW_PC
)
5195 case _PC_64
: newcw
|= 0x300; break;
5196 case _PC_53
: newcw
|= 0x200; break;
5197 case _PC_24
: newcw
|= 0x0; break;
5199 if (*cw
& _IC_AFFINE
) newcw
|= 0x1000;
5202 if (oldsw
!= newsw
&& (newsw
& 0x3f))
5211 DWORD instruction_pointer
;
5221 __asm__
__volatile__( "fnstenv %0" : "=m" (fenv
) );
5222 fenv
.control_word
= newcw
;
5223 fenv
.status_word
= newsw
;
5224 __asm__
__volatile__( "fldenv %0" : : "m" (fenv
) : "st", "st(1)",
5225 "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)" );
5230 __asm__
__volatile__( "fnclex" );
5232 __asm__
__volatile__( "fldcw %0" : : "m" (newcw
) );
5233 #elif defined(__x86_64__)
5234 _setfp_sse(cw
, cw_mask
, sw
, sw_mask
);
5235 #elif defined(__aarch64__)
5236 ULONG_PTR old_fpsr
= 0, fpsr
= 0, old_fpcr
= 0, fpcr
= 0;
5239 cw_mask
&= _MCW_EM
| _MCW_RC
;
5244 __asm__
__volatile__( "mrs %0, fpsr" : "=r" (fpsr
) );
5248 if (fpsr
& 0x1) flags
|= _SW_INVALID
;
5249 if (fpsr
& 0x2) flags
|= _SW_ZERODIVIDE
;
5250 if (fpsr
& 0x4) flags
|= _SW_OVERFLOW
;
5251 if (fpsr
& 0x8) flags
|= _SW_UNDERFLOW
;
5252 if (fpsr
& 0x10) flags
|= _SW_INEXACT
;
5253 if (fpsr
& 0x80) flags
|= _SW_DENORMAL
;
5255 *sw
= (flags
& ~sw_mask
) | (*sw
& sw_mask
);
5256 TRACE("aarch64 update sw %08x to %08x\n", flags
, *sw
);
5258 if (*sw
& _SW_INVALID
) fpsr
|= 0x1;
5259 if (*sw
& _SW_ZERODIVIDE
) fpsr
|= 0x2;
5260 if (*sw
& _SW_OVERFLOW
) fpsr
|= 0x4;
5261 if (*sw
& _SW_UNDERFLOW
) fpsr
|= 0x8;
5262 if (*sw
& _SW_INEXACT
) fpsr
|= 0x10;
5263 if (*sw
& _SW_DENORMAL
) fpsr
|= 0x80;
5269 __asm__
__volatile__( "mrs %0, fpcr" : "=r" (fpcr
) );
5273 if (!(fpcr
& 0x100)) flags
|= _EM_INVALID
;
5274 if (!(fpcr
& 0x200)) flags
|= _EM_ZERODIVIDE
;
5275 if (!(fpcr
& 0x400)) flags
|= _EM_OVERFLOW
;
5276 if (!(fpcr
& 0x800)) flags
|= _EM_UNDERFLOW
;
5277 if (!(fpcr
& 0x1000)) flags
|= _EM_INEXACT
;
5278 if (!(fpcr
& 0x8000)) flags
|= _EM_DENORMAL
;
5279 switch (fpcr
& 0xc00000)
5281 case 0x400000: flags
|= _RC_UP
; break;
5282 case 0x800000: flags
|= _RC_DOWN
; break;
5283 case 0xc00000: flags
|= _RC_CHOP
; break;
5286 *cw
= (flags
& ~cw_mask
) | (*cw
& cw_mask
);
5287 TRACE("aarch64 update cw %08x to %08x\n", flags
, *cw
);
5288 fpcr
&= ~0xc09f00ul
;
5289 if (!(*cw
& _EM_INVALID
)) fpcr
|= 0x100;
5290 if (!(*cw
& _EM_ZERODIVIDE
)) fpcr
|= 0x200;
5291 if (!(*cw
& _EM_OVERFLOW
)) fpcr
|= 0x400;
5292 if (!(*cw
& _EM_UNDERFLOW
)) fpcr
|= 0x800;
5293 if (!(*cw
& _EM_INEXACT
)) fpcr
|= 0x1000;
5294 if (!(*cw
& _EM_DENORMAL
)) fpcr
|= 0x8000;
5295 switch (*cw
& _MCW_RC
)
5297 case _RC_CHOP
: fpcr
|= 0xc00000; break;
5298 case _RC_UP
: fpcr
|= 0x400000; break;
5299 case _RC_DOWN
: fpcr
|= 0x800000; break;
5303 /* mask exceptions if needed */
5304 if (old_fpcr
!= fpcr
&& ~(old_fpcr
>> 8) & fpsr
& 0x9f != fpsr
& 0x9f)
5306 ULONG_PTR mask
= fpcr
& ~0x9f00;
5307 __asm__
__volatile__( "msr fpcr, %0" :: "r" (mask
) );
5310 if (old_fpsr
!= fpsr
)
5311 __asm__
__volatile__( "msr fpsr, %0" :: "r" (fpsr
) );
5312 if (old_fpcr
!= fpcr
)
5313 __asm__
__volatile__( "msr fpcr, %0" :: "r" (fpcr
) );
5314 #elif defined(__arm__) && !defined(__SOFTFP__)
5315 DWORD old_fpscr
, fpscr
;
5318 __asm__
__volatile__( "vmrs %0, fpscr" : "=r" (fpscr
) );
5321 cw_mask
&= _MCW_EM
| _MCW_RC
;
5327 if (fpscr
& 0x1) flags
|= _SW_INVALID
;
5328 if (fpscr
& 0x2) flags
|= _SW_ZERODIVIDE
;
5329 if (fpscr
& 0x4) flags
|= _SW_OVERFLOW
;
5330 if (fpscr
& 0x8) flags
|= _SW_UNDERFLOW
;
5331 if (fpscr
& 0x10) flags
|= _SW_INEXACT
;
5332 if (fpscr
& 0x80) flags
|= _SW_DENORMAL
;
5334 *sw
= (flags
& ~sw_mask
) | (*sw
& sw_mask
);
5335 TRACE("arm update sw %08x to %08x\n", flags
, *sw
);
5337 if (*sw
& _SW_INVALID
) fpscr
|= 0x1;
5338 if (*sw
& _SW_ZERODIVIDE
) fpscr
|= 0x2;
5339 if (*sw
& _SW_OVERFLOW
) fpscr
|= 0x4;
5340 if (*sw
& _SW_UNDERFLOW
) fpscr
|= 0x8;
5341 if (*sw
& _SW_INEXACT
) fpscr
|= 0x10;
5342 if (*sw
& _SW_DENORMAL
) fpscr
|= 0x80;
5349 if (!(fpscr
& 0x100)) flags
|= _EM_INVALID
;
5350 if (!(fpscr
& 0x200)) flags
|= _EM_ZERODIVIDE
;
5351 if (!(fpscr
& 0x400)) flags
|= _EM_OVERFLOW
;
5352 if (!(fpscr
& 0x800)) flags
|= _EM_UNDERFLOW
;
5353 if (!(fpscr
& 0x1000)) flags
|= _EM_INEXACT
;
5354 if (!(fpscr
& 0x8000)) flags
|= _EM_DENORMAL
;
5355 switch (fpscr
& 0xc00000)
5357 case 0x400000: flags
|= _RC_UP
; break;
5358 case 0x800000: flags
|= _RC_DOWN
; break;
5359 case 0xc00000: flags
|= _RC_CHOP
; break;
5362 *cw
= (flags
& ~cw_mask
) | (*cw
& cw_mask
);
5363 TRACE("arm update cw %08x to %08x\n", flags
, *cw
);
5364 fpscr
&= ~0xc09f00ul
;
5365 if (!(*cw
& _EM_INVALID
)) fpscr
|= 0x100;
5366 if (!(*cw
& _EM_ZERODIVIDE
)) fpscr
|= 0x200;
5367 if (!(*cw
& _EM_OVERFLOW
)) fpscr
|= 0x400;
5368 if (!(*cw
& _EM_UNDERFLOW
)) fpscr
|= 0x800;
5369 if (!(*cw
& _EM_INEXACT
)) fpscr
|= 0x1000;
5370 if (!(*cw
& _EM_DENORMAL
)) fpscr
|= 0x8000;
5371 switch (*cw
& _MCW_RC
)
5373 case _RC_CHOP
: fpscr
|= 0xc00000; break;
5374 case _RC_UP
: fpscr
|= 0x400000; break;
5375 case _RC_DOWN
: fpscr
|= 0x800000; break;
5379 if (old_fpscr
!= fpscr
)
5380 __asm__
__volatile__( "vmsr fpscr, %0" :: "r" (fpscr
) );
5382 FIXME("not implemented\n");
5388 /**********************************************************************
5389 * _statusfp2 (MSVCR80.@)
5391 #if defined(__i386__)
5392 void CDECL
_statusfp2( unsigned int *x86_sw
, unsigned int *sse2_sw
)
5395 _setfp(NULL
, 0, x86_sw
, 0);
5396 if (!sse2_sw
) return;
5398 _setfp_sse(NULL
, 0, sse2_sw
, 0);
5403 /**********************************************************************
5404 * _statusfp (MSVCRT.@)
5406 unsigned int CDECL
_statusfp(void)
5408 unsigned int flags
= 0;
5409 #if defined(__i386__)
5410 unsigned int x86_sw
, sse2_sw
;
5412 _statusfp2( &x86_sw
, &sse2_sw
);
5413 /* FIXME: there's no definition for ambiguous status, just return all status bits for now */
5414 flags
= x86_sw
| sse2_sw
;
5416 _setfp(NULL
, 0, &flags
, 0);
5421 /*********************************************************************
5422 * _clearfp (MSVCRT.@)
5424 unsigned int CDECL
_clearfp(void)
5426 unsigned int flags
= 0;
5428 _setfp(NULL
, 0, &flags
, _MCW_EM
);
5431 unsigned int sse_sw
= 0;
5433 _setfp_sse(NULL
, 0, &sse_sw
, _MCW_EM
);
5437 _setfp(NULL
, 0, &flags
, _MCW_EM
);
5442 /*********************************************************************
5443 * __fpecode (MSVCRT.@)
5445 int * CDECL
__fpecode(void)
5447 return &msvcrt_get_thread_data()->fpecode
;
5450 /*********************************************************************
5453 double CDECL
ldexp(double num
, int exp
)
5455 double z
= __scalbn(num
, exp
);
5457 if (isfinite(num
) && !isfinite(z
))
5458 return math_error(_OVERFLOW
, "ldexp", num
, exp
, z
);
5459 if (num
&& isfinite(num
) && !z
)
5460 return math_error(_UNDERFLOW
, "ldexp", num
, exp
, z
);
5464 /*********************************************************************
5467 double CDECL
_cabs(struct _complex num
)
5469 return sqrt(num
.x
* num
.x
+ num
.y
* num
.y
);
5472 /*********************************************************************
5473 * _chgsign (MSVCRT.@)
5475 double CDECL
_chgsign(double num
)
5477 union { double f
; UINT64 i
; } u
= { num
};
5482 /*********************************************************************
5483 * __control87_2 (MSVCR80.@)
5485 * Not exported by native msvcrt, added in msvcr80.
5488 int CDECL
__control87_2( unsigned int newval
, unsigned int mask
,
5489 unsigned int *x86_cw
, unsigned int *sse2_cw
)
5494 _setfp(x86_cw
, mask
, NULL
, 0);
5497 if (!sse2_cw
) return 1;
5502 _setfp_sse(sse2_cw
, mask
, NULL
, 0);
5510 /*********************************************************************
5511 * _control87 (MSVCRT.@)
5513 unsigned int CDECL
_control87(unsigned int newval
, unsigned int mask
)
5515 unsigned int flags
= 0;
5517 unsigned int sse2_cw
;
5519 __control87_2( newval
, mask
, &flags
, &sse2_cw
);
5523 if ((flags
^ sse2_cw
) & (_MCW_EM
| _MCW_RC
)) flags
|= _EM_AMBIGUOUS
;
5528 _setfp(&flags
, mask
, NULL
, 0);
5533 /*********************************************************************
5534 * _controlfp (MSVCRT.@)
5536 unsigned int CDECL
_controlfp(unsigned int newval
, unsigned int mask
)
5538 return _control87( newval
, mask
& ~_EM_DENORMAL
);
5541 /*********************************************************************
5542 * _set_controlfp (MSVCRT.@)
5544 void CDECL
_set_controlfp( unsigned int newval
, unsigned int mask
)
5546 _controlfp( newval
, mask
);
5549 /*********************************************************************
5550 * _controlfp_s (MSVCRT.@)
5552 int CDECL
_controlfp_s(unsigned int *cur
, unsigned int newval
, unsigned int mask
)
5554 static const unsigned int all_flags
= (_MCW_EM
| _MCW_IC
| _MCW_RC
|
5558 if (!MSVCRT_CHECK_PMT( !(newval
& mask
& ~all_flags
) ))
5560 if (cur
) *cur
= _controlfp( 0, 0 ); /* retrieve it anyway */
5563 val
= _controlfp( newval
, mask
);
5564 if (cur
) *cur
= val
;
5568 #if _MSVCR_VER >= 140 && (defined(__i386__) || defined(__x86_64__))
5571 FENV_X_INVALID
= 0x00100010,
5572 FENV_X_DENORMAL
= 0x00200020,
5573 FENV_X_ZERODIVIDE
= 0x00080008,
5574 FENV_X_OVERFLOW
= 0x00040004,
5575 FENV_X_UNDERFLOW
= 0x00020002,
5576 FENV_X_INEXACT
= 0x00010001,
5577 FENV_X_AFFINE
= 0x00004000,
5578 FENV_X_UP
= 0x00800200,
5579 FENV_X_DOWN
= 0x00400100,
5580 FENV_X_24
= 0x00002000,
5581 FENV_X_53
= 0x00001000,
5582 FENV_Y_INVALID
= 0x10000010,
5583 FENV_Y_DENORMAL
= 0x20000020,
5584 FENV_Y_ZERODIVIDE
= 0x08000008,
5585 FENV_Y_OVERFLOW
= 0x04000004,
5586 FENV_Y_UNDERFLOW
= 0x02000002,
5587 FENV_Y_INEXACT
= 0x01000001,
5588 FENV_Y_UP
= 0x80000200,
5589 FENV_Y_DOWN
= 0x40000100,
5590 FENV_Y_FLUSH
= 0x00000400,
5591 FENV_Y_FLUSH_SAVE
= 0x00000800
5594 /* encodes x87/sse control/status word in ulong */
5595 static __msvcrt_ulong
fenv_encode(unsigned int x
, unsigned int y
)
5597 __msvcrt_ulong ret
= 0;
5600 if (x
& _EM_INVALID
) ret
|= FENV_X_INVALID
;
5601 if (x
& _EM_DENORMAL
) ret
|= FENV_X_DENORMAL
;
5602 if (x
& _EM_ZERODIVIDE
) ret
|= FENV_X_ZERODIVIDE
;
5603 if (x
& _EM_OVERFLOW
) ret
|= FENV_X_OVERFLOW
;
5604 if (x
& _EM_UNDERFLOW
) ret
|= FENV_X_UNDERFLOW
;
5605 if (x
& _EM_INEXACT
) ret
|= FENV_X_INEXACT
;
5606 if (x
& _IC_AFFINE
) ret
|= FENV_X_AFFINE
;
5607 if (x
& _RC_UP
) ret
|= FENV_X_UP
;
5608 if (x
& _RC_DOWN
) ret
|= FENV_X_DOWN
;
5609 if (x
& _PC_24
) ret
|= FENV_X_24
;
5610 if (x
& _PC_53
) ret
|= FENV_X_53
;
5612 x
&= ~(_MCW_EM
| _MCW_IC
| _MCW_RC
| _MCW_PC
);
5614 if (y
& _EM_INVALID
) ret
|= FENV_Y_INVALID
;
5615 if (y
& _EM_DENORMAL
) ret
|= FENV_Y_DENORMAL
;
5616 if (y
& _EM_ZERODIVIDE
) ret
|= FENV_Y_ZERODIVIDE
;
5617 if (y
& _EM_OVERFLOW
) ret
|= FENV_Y_OVERFLOW
;
5618 if (y
& _EM_UNDERFLOW
) ret
|= FENV_Y_UNDERFLOW
;
5619 if (y
& _EM_INEXACT
) ret
|= FENV_Y_INEXACT
;
5620 if (y
& _RC_UP
) ret
|= FENV_Y_UP
;
5621 if (y
& _RC_DOWN
) ret
|= FENV_Y_DOWN
;
5622 if (y
& _DN_FLUSH
) ret
|= FENV_Y_FLUSH
;
5623 if (y
& _DN_FLUSH_OPERANDS_SAVE_RESULTS
) ret
|= FENV_Y_FLUSH_SAVE
;
5624 y
&= ~(_MCW_EM
| _MCW_IC
| _MCW_RC
| _MCW_DN
);
5626 if(x
|| y
) FIXME("unsupported flags: %x, %x\n", x
, y
);
5630 /* decodes x87/sse control/status word, returns FALSE on error */
5631 static BOOL
fenv_decode(__msvcrt_ulong enc
, unsigned int *x
, unsigned int *y
)
5634 if ((enc
& FENV_X_INVALID
) == FENV_X_INVALID
) *x
|= _EM_INVALID
;
5635 if ((enc
& FENV_X_DENORMAL
) == FENV_X_DENORMAL
) *x
|= _EM_DENORMAL
;
5636 if ((enc
& FENV_X_ZERODIVIDE
) == FENV_X_ZERODIVIDE
) *x
|= _EM_ZERODIVIDE
;
5637 if ((enc
& FENV_X_OVERFLOW
) == FENV_X_OVERFLOW
) *x
|= _EM_OVERFLOW
;
5638 if ((enc
& FENV_X_UNDERFLOW
) == FENV_X_UNDERFLOW
) *x
|= _EM_UNDERFLOW
;
5639 if ((enc
& FENV_X_INEXACT
) == FENV_X_INEXACT
) *x
|= _EM_INEXACT
;
5640 if ((enc
& FENV_X_AFFINE
) == FENV_X_AFFINE
) *x
|= _IC_AFFINE
;
5641 if ((enc
& FENV_X_UP
) == FENV_X_UP
) *x
|= _RC_UP
;
5642 if ((enc
& FENV_X_DOWN
) == FENV_X_DOWN
) *x
|= _RC_DOWN
;
5643 if ((enc
& FENV_X_24
) == FENV_X_24
) *x
|= _PC_24
;
5644 if ((enc
& FENV_X_53
) == FENV_X_53
) *x
|= _PC_53
;
5646 if ((enc
& FENV_Y_INVALID
) == FENV_Y_INVALID
) *y
|= _EM_INVALID
;
5647 if ((enc
& FENV_Y_DENORMAL
) == FENV_Y_DENORMAL
) *y
|= _EM_DENORMAL
;
5648 if ((enc
& FENV_Y_ZERODIVIDE
) == FENV_Y_ZERODIVIDE
) *y
|= _EM_ZERODIVIDE
;
5649 if ((enc
& FENV_Y_OVERFLOW
) == FENV_Y_OVERFLOW
) *y
|= _EM_OVERFLOW
;
5650 if ((enc
& FENV_Y_UNDERFLOW
) == FENV_Y_UNDERFLOW
) *y
|= _EM_UNDERFLOW
;
5651 if ((enc
& FENV_Y_INEXACT
) == FENV_Y_INEXACT
) *y
|= _EM_INEXACT
;
5652 if ((enc
& FENV_Y_UP
) == FENV_Y_UP
) *y
|= _RC_UP
;
5653 if ((enc
& FENV_Y_DOWN
) == FENV_Y_DOWN
) *y
|= _RC_DOWN
;
5654 if ((enc
& FENV_Y_FLUSH
) == FENV_Y_FLUSH
) *y
|= _DN_FLUSH
;
5655 if ((enc
& FENV_Y_FLUSH_SAVE
) == FENV_Y_FLUSH_SAVE
) *y
|= _DN_FLUSH_OPERANDS_SAVE_RESULTS
;
5657 if (fenv_encode(*x
, *y
) != enc
)
5659 WARN("can't decode: %lx\n", enc
);
5664 #elif _MSVCR_VER >= 120
5665 static __msvcrt_ulong
fenv_encode(unsigned int x
, unsigned int y
)
5667 if (y
& _EM_DENORMAL
)
5668 y
= (y
& ~_EM_DENORMAL
) | 0x20;
5673 static BOOL
fenv_decode(__msvcrt_ulong enc
, unsigned int *x
, unsigned int *y
)
5676 enc
= (enc
& ~0x20) | _EM_DENORMAL
;
5684 /*********************************************************************
5685 * fegetenv (MSVCR120.@)
5687 int CDECL
fegetenv(fenv_t
*env
)
5689 #if _MSVCR_VER>=140 && defined(__i386__)
5690 unsigned int x87
, sse
;
5691 __control87_2(0, 0, &x87
, &sse
);
5692 env
->_Fe_ctl
= fenv_encode(x87
, sse
);
5693 _statusfp2(&x87
, &sse
);
5694 env
->_Fe_stat
= fenv_encode(x87
, sse
);
5695 #elif _MSVCR_VER>=140
5696 env
->_Fe_ctl
= fenv_encode(0, _control87(0, 0));
5697 env
->_Fe_stat
= fenv_encode(0, _statusfp());
5699 env
->_Fe_ctl
= _controlfp(0, 0) & (_EM_INEXACT
| _EM_UNDERFLOW
|
5700 _EM_OVERFLOW
| _EM_ZERODIVIDE
| _EM_INVALID
| _MCW_RC
);
5701 env
->_Fe_stat
= _statusfp();
5706 /*********************************************************************
5707 * feupdateenv (MSVCR120.@)
5709 int CDECL
feupdateenv(const fenv_t
*env
)
5713 set
._Fe_ctl
= env
->_Fe_ctl
;
5714 set
._Fe_stat
|= env
->_Fe_stat
;
5715 return fesetenv(&set
);
5718 /*********************************************************************
5719 * fetestexcept (MSVCR120.@)
5721 int CDECL
fetestexcept(int flags
)
5723 return _statusfp() & flags
;
5726 /*********************************************************************
5727 * fesetexceptflag (MSVCR120.@)
5729 int CDECL
fesetexceptflag(const fexcept_t
*status
, int excepts
)
5733 excepts
&= FE_ALL_EXCEPT
;
5738 env
._Fe_stat
&= ~fenv_encode(excepts
, excepts
);
5739 env
._Fe_stat
|= *status
& fenv_encode(excepts
, excepts
);
5740 return fesetenv(&env
);
5743 /*********************************************************************
5744 * feraiseexcept (MSVCR120.@)
5746 int CDECL
feraiseexcept(int flags
)
5750 flags
&= FE_ALL_EXCEPT
;
5752 env
._Fe_stat
|= fenv_encode(flags
, flags
);
5753 return fesetenv(&env
);
5756 /*********************************************************************
5757 * feclearexcept (MSVCR120.@)
5759 int CDECL
feclearexcept(int flags
)
5764 flags
&= FE_ALL_EXCEPT
;
5765 env
._Fe_stat
&= ~fenv_encode(flags
, flags
);
5766 return fesetenv(&env
);
5769 /*********************************************************************
5770 * fegetexceptflag (MSVCR120.@)
5772 int CDECL
fegetexceptflag(fexcept_t
*status
, int excepts
)
5774 #if _MSVCR_VER>=140 && defined(__i386__)
5775 unsigned int x87
, sse
;
5776 _statusfp2(&x87
, &sse
);
5777 *status
= fenv_encode(x87
& excepts
, sse
& excepts
);
5779 *status
= fenv_encode(0, _statusfp() & excepts
);
5786 /*********************************************************************
5787 * __fpe_flt_rounds (UCRTBASE.@)
5789 int CDECL
__fpe_flt_rounds(void)
5791 unsigned int fpc
= _controlfp(0, 0) & _RC_CHOP
;
5796 case _RC_CHOP
: return 0;
5797 case _RC_NEAR
: return 1;
5798 case _RC_UP
: return 2;
5806 /*********************************************************************
5807 * fegetround (MSVCR120.@)
5809 int CDECL
fegetround(void)
5811 return _controlfp(0, 0) & _MCW_RC
;
5814 /*********************************************************************
5815 * fesetround (MSVCR120.@)
5817 int CDECL
fesetround(int round_mode
)
5819 if (round_mode
& (~_MCW_RC
))
5821 _controlfp(round_mode
, _MCW_RC
);
5825 #endif /* _MSVCR_VER>=120 */
5827 /*********************************************************************
5828 * _copysign (MSVCRT.@)
5830 * Copied from musl: src/math/copysign.c
5832 double CDECL
_copysign( double x
, double y
)
5834 union { double f
; UINT64 i
; } ux
= { x
}, uy
= { y
};
5836 ux
.i
|= uy
.i
& 1ull << 63;
5840 /*********************************************************************
5841 * _finite (MSVCRT.@)
5843 int CDECL
_finite(double num
)
5845 union { double f
; UINT64 i
; } u
= { num
};
5846 return (u
.i
& ~0ull >> 1) < 0x7ffull
<< 52;
5849 /*********************************************************************
5850 * _fpreset (MSVCRT.@)
5852 void CDECL
_fpreset(void)
5854 #if (defined(__GNUC__) || defined(__clang__)) && defined(__i386__)
5855 const unsigned int x86_cw
= 0x27f;
5856 __asm__
__volatile__( "fninit; fldcw %0" : : "m" (x86_cw
) );
5859 unsigned int cw
= _MCW_EM
, sw
= 0;
5860 _setfp_sse(&cw
, ~0, &sw
, ~0);
5863 unsigned int cw
= _MCW_EM
, sw
= 0;
5864 _setfp(&cw
, ~0, &sw
, ~0);
5869 /*********************************************************************
5870 * fesetenv (MSVCR120.@)
5872 int CDECL
fesetenv(const fenv_t
*env
)
5874 unsigned int x87_cw
, cw
, x87_stat
, stat
;
5877 TRACE( "(%p)\n", env
);
5879 if (!env
->_Fe_ctl
&& !env
->_Fe_stat
) {
5884 if (!fenv_decode(env
->_Fe_ctl
, &x87_cw
, &cw
))
5886 if (!fenv_decode(env
->_Fe_stat
, &x87_stat
, &stat
))
5889 #if _MSVCR_VER >= 140
5892 mask
= _EM_INEXACT
| _EM_UNDERFLOW
| _EM_OVERFLOW
5893 | _EM_ZERODIVIDE
| _EM_INVALID
| _MCW_RC
;
5897 _setfp(&x87_cw
, mask
, &x87_stat
, ~0);
5899 _setfp_sse(&cw
, mask
, &stat
, ~0);
5902 _setfp(&cw
, mask
, &stat
, ~0);
5908 /*********************************************************************
5911 int CDECL
_isnan(double num
)
5913 union { double f
; UINT64 i
; } u
= { num
};
5914 return (u
.i
& ~0ull >> 1) > 0x7ffull
<< 52;
5917 static double pzero(double x
)
5919 static const double pR8
[6] = { /* for x in [inf, 8]=1/[0,0.125] */
5920 0.00000000000000000000e+00,
5921 -7.03124999999900357484e-02,
5922 -8.08167041275349795626e+00,
5923 -2.57063105679704847262e+02,
5924 -2.48521641009428822144e+03,
5925 -5.25304380490729545272e+03,
5927 1.16534364619668181717e+02,
5928 3.83374475364121826715e+03,
5929 4.05978572648472545552e+04,
5930 1.16752972564375915681e+05,
5931 4.76277284146730962675e+04,
5932 }, pR5
[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */
5933 -1.14125464691894502584e-11,
5934 -7.03124940873599280078e-02,
5935 -4.15961064470587782438e+00,
5936 -6.76747652265167261021e+01,
5937 -3.31231299649172967747e+02,
5938 -3.46433388365604912451e+02,
5940 6.07539382692300335975e+01,
5941 1.05125230595704579173e+03,
5942 5.97897094333855784498e+03,
5943 9.62544514357774460223e+03,
5944 2.40605815922939109441e+03,
5945 }, pR3
[6] = {/* for x in [4.547,2.8571]=1/[0.2199,0.35001] */
5946 -2.54704601771951915620e-09,
5947 -7.03119616381481654654e-02,
5948 -2.40903221549529611423e+00,
5949 -2.19659774734883086467e+01,
5950 -5.80791704701737572236e+01,
5951 -3.14479470594888503854e+01,
5953 3.58560338055209726349e+01,
5954 3.61513983050303863820e+02,
5955 1.19360783792111533330e+03,
5956 1.12799679856907414432e+03,
5957 1.73580930813335754692e+02,
5958 }, pR2
[6] = {/* for x in [2.8570,2]=1/[0.3499,0.5] */
5959 -8.87534333032526411254e-08,
5960 -7.03030995483624743247e-02,
5961 -1.45073846780952986357e+00,
5962 -7.63569613823527770791e+00,
5963 -1.11931668860356747786e+01,
5964 -3.23364579351335335033e+00,
5966 2.22202997532088808441e+01,
5967 1.36206794218215208048e+02,
5968 2.70470278658083486789e+02,
5969 1.53875394208320329881e+02,
5970 1.46576176948256193810e+01,
5973 const double *p
, *q
;
5977 ix
= *(ULONGLONG
*)&x
>> 32;
5979 if (ix
>= 0x40200000) {
5982 } else if (ix
>= 0x40122E8B) {
5985 } else if (ix
>= 0x4006DB6D) {
5988 } else /*ix >= 0x40000000*/ {
5994 r
= p
[0] + z
* (p
[1] + z
* (p
[2] + z
* (p
[3] + z
* (p
[4] + z
* p
[5]))));
5995 s
= 1.0 + z
* (q
[0] + z
* (q
[1] + z
* (q
[2] + z
* (q
[3] + z
* q
[4]))));
5999 static double qzero(double x
)
6001 static const double qR8
[6] = { /* for x in [inf, 8]=1/[0,0.125] */
6002 0.00000000000000000000e+00,
6003 7.32421874999935051953e-02,
6004 1.17682064682252693899e+01,
6005 5.57673380256401856059e+02,
6006 8.85919720756468632317e+03,
6007 3.70146267776887834771e+04,
6009 1.63776026895689824414e+02,
6010 8.09834494656449805916e+03,
6011 1.42538291419120476348e+05,
6012 8.03309257119514397345e+05,
6013 8.40501579819060512818e+05,
6014 -3.43899293537866615225e+05,
6015 }, qR5
[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */
6016 1.84085963594515531381e-11,
6017 7.32421766612684765896e-02,
6018 5.83563508962056953777e+00,
6019 1.35111577286449829671e+02,
6020 1.02724376596164097464e+03,
6021 1.98997785864605384631e+03,
6023 8.27766102236537761883e+01,
6024 2.07781416421392987104e+03,
6025 1.88472887785718085070e+04,
6026 5.67511122894947329769e+04,
6027 3.59767538425114471465e+04,
6028 -5.35434275601944773371e+03,
6029 }, qR3
[6] = {/* for x in [4.547,2.8571]=1/[0.2199,0.35001] */
6030 4.37741014089738620906e-09,
6031 7.32411180042911447163e-02,
6032 3.34423137516170720929e+00,
6033 4.26218440745412650017e+01,
6034 1.70808091340565596283e+02,
6035 1.66733948696651168575e+02,
6037 4.87588729724587182091e+01,
6038 7.09689221056606015736e+02,
6039 3.70414822620111362994e+03,
6040 6.46042516752568917582e+03,
6041 2.51633368920368957333e+03,
6042 -1.49247451836156386662e+02,
6043 }, qR2
[6] = {/* for x in [2.8570,2]=1/[0.3499,0.5] */
6044 1.50444444886983272379e-07,
6045 7.32234265963079278272e-02,
6046 1.99819174093815998816e+00,
6047 1.44956029347885735348e+01,
6048 3.16662317504781540833e+01,
6049 1.62527075710929267416e+01,
6051 3.03655848355219184498e+01,
6052 2.69348118608049844624e+02,
6053 8.44783757595320139444e+02,
6054 8.82935845112488550512e+02,
6055 2.12666388511798828631e+02,
6056 -5.31095493882666946917e+00,
6059 const double *p
, *q
;
6063 ix
= *(ULONGLONG
*)&x
>> 32;
6065 if (ix
>= 0x40200000) {
6068 } else if (ix
>= 0x40122E8B) {
6071 } else if (ix
>= 0x4006DB6D) {
6074 } else /*ix >= 0x40000000*/ {
6080 r
= p
[0] + z
* (p
[1] + z
* (p
[2] + z
* (p
[3] + z
* (p
[4] + z
* p
[5]))));
6081 s
= 1.0 + z
* (q
[0] + z
* (q
[1] + z
* (q
[2] + z
* (q
[3] + z
* (q
[4] + z
* q
[5])))));
6082 return (-0.125 + r
/ s
) / x
;
6085 /* j0 and y0 approximation for |x|>=2 */
6086 static double j0_y0_approx(unsigned int ix
, double x
, BOOL y0
)
6088 static const double invsqrtpi
= 5.64189583547756279280e-01;
6090 double s
, c
, ss
, cc
, z
;
6096 /* avoid overflow in 2*x, big ulp error when x>=0x1p1023 */
6097 if (ix
< 0x7fe00000) {
6100 if (s
* c
< 0) cc
= z
/ ss
;
6102 if (ix
< 0x48000000) {
6104 cc
= pzero(x
) * cc
- qzero(x
) * ss
;
6107 return invsqrtpi
* cc
/ sqrt(x
);
6110 /*********************************************************************
6113 * Copied from musl: src/math/j0.c
6115 double CDECL
_j0(double x
)
6117 static const double R02
= 1.56249999999999947958e-02,
6118 R03
= -1.89979294238854721751e-04,
6119 R04
= 1.82954049532700665670e-06,
6120 R05
= -4.61832688532103189199e-09,
6121 S01
= 1.56191029464890010492e-02,
6122 S02
= 1.16926784663337450260e-04,
6123 S03
= 5.13546550207318111446e-07,
6124 S04
= 1.16614003333790000205e-09;
6129 ix
= *(ULONGLONG
*)&x
>> 32;
6132 /* j0(+-inf)=0, j0(nan)=nan */
6133 if (ix
>= 0x7ff00000)
6134 return math_error(_DOMAIN
, "_j0", x
, 0, 1 / (x
* x
));
6137 if (ix
>= 0x40000000) { /* |x| >= 2 */
6138 /* large ulp error near zeros: 2.4, 5.52, 8.6537,.. */
6139 return j0_y0_approx(ix
, x
, FALSE
);
6142 if (ix
>= 0x3f200000) { /* |x| >= 2**-13 */
6143 /* up to 4ulp error close to 2 */
6145 r
= z
* (R02
+ z
* (R03
+ z
* (R04
+ z
* R05
)));
6146 s
= 1 + z
* (S01
+ z
* (S02
+ z
* (S03
+ z
* S04
)));
6147 return (1 + x
/ 2) * (1 - x
/ 2) + z
* (r
/ s
);
6151 /* prevent underflow */
6152 /* inexact should be raised when x!=0, this is not done correctly */
6153 if (ix
>= 0x38000000) /* |x| >= 2**-127 */
6158 static double pone(double x
)
6160 static const double pr8
[6] = { /* for x in [inf, 8]=1/[0,0.125] */
6161 0.00000000000000000000e+00,
6162 1.17187499999988647970e-01,
6163 1.32394806593073575129e+01,
6164 4.12051854307378562225e+02,
6165 3.87474538913960532227e+03,
6166 7.91447954031891731574e+03,
6168 1.14207370375678408436e+02,
6169 3.65093083420853463394e+03,
6170 3.69562060269033463555e+04,
6171 9.76027935934950801311e+04,
6172 3.08042720627888811578e+04,
6173 }, pr5
[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */
6174 1.31990519556243522749e-11,
6175 1.17187493190614097638e-01,
6176 6.80275127868432871736e+00,
6177 1.08308182990189109773e+02,
6178 5.17636139533199752805e+02,
6179 5.28715201363337541807e+02,
6181 5.92805987221131331921e+01,
6182 9.91401418733614377743e+02,
6183 5.35326695291487976647e+03,
6184 7.84469031749551231769e+03,
6185 1.50404688810361062679e+03,
6187 3.02503916137373618024e-09,
6188 1.17186865567253592491e-01,
6189 3.93297750033315640650e+00,
6190 3.51194035591636932736e+01,
6191 9.10550110750781271918e+01,
6192 4.85590685197364919645e+01,
6194 3.47913095001251519989e+01,
6195 3.36762458747825746741e+02,
6196 1.04687139975775130551e+03,
6197 8.90811346398256432622e+02,
6198 1.03787932439639277504e+02,
6199 }, pr2
[6] = { /* for x in [2.8570,2]=1/[0.3499,0.5] */
6200 1.07710830106873743082e-07,
6201 1.17176219462683348094e-01,
6202 2.36851496667608785174e+00,
6203 1.22426109148261232917e+01,
6204 1.76939711271687727390e+01,
6205 5.07352312588818499250e+00,
6207 2.14364859363821409488e+01,
6208 1.25290227168402751090e+02,
6209 2.32276469057162813669e+02,
6210 1.17679373287147100768e+02,
6211 8.36463893371618283368e+00,
6214 const double *p
, *q
;
6218 ix
= *(ULONGLONG
*)&x
>> 32;
6220 if (ix
>= 0x40200000) {
6223 } else if (ix
>= 0x40122E8B) {
6226 } else if (ix
>= 0x4006DB6D) {
6229 } else /*ix >= 0x40000000*/ {
6234 r
= p
[0] + z
* (p
[1] + z
* (p
[2] + z
* (p
[3] + z
* (p
[4] + z
* p
[5]))));
6235 s
= 1.0 + z
* (q
[0] + z
* (q
[1] + z
* (q
[2] + z
* (q
[3] + z
* q
[4]))));
6239 static double qone(double x
)
6241 static const double qr8
[6] = { /* for x in [inf, 8]=1/[0,0.125] */
6242 0.00000000000000000000e+00,
6243 -1.02539062499992714161e-01,
6244 -1.62717534544589987888e+01,
6245 -7.59601722513950107896e+02,
6246 -1.18498066702429587167e+04,
6247 -4.84385124285750353010e+04,
6249 1.61395369700722909556e+02,
6250 7.82538599923348465381e+03,
6251 1.33875336287249578163e+05,
6252 7.19657723683240939863e+05,
6253 6.66601232617776375264e+05,
6254 -2.94490264303834643215e+05,
6255 }, qr5
[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */
6256 -2.08979931141764104297e-11,
6257 -1.02539050241375426231e-01,
6258 -8.05644828123936029840e+00,
6259 -1.83669607474888380239e+02,
6260 -1.37319376065508163265e+03,
6261 -2.61244440453215656817e+03,
6263 8.12765501384335777857e+01,
6264 1.99179873460485964642e+03,
6265 1.74684851924908907677e+04,
6266 4.98514270910352279316e+04,
6267 2.79480751638918118260e+04,
6268 -4.71918354795128470869e+03,
6270 -5.07831226461766561369e-09,
6271 -1.02537829820837089745e-01,
6272 -4.61011581139473403113e+00,
6273 -5.78472216562783643212e+01,
6274 -2.28244540737631695038e+02,
6275 -2.19210128478909325622e+02,
6277 4.76651550323729509273e+01,
6278 6.73865112676699709482e+02,
6279 3.38015286679526343505e+03,
6280 5.54772909720722782367e+03,
6281 1.90311919338810798763e+03,
6282 -1.35201191444307340817e+02,
6283 }, qr2
[6] = { /* for x in [2.8570,2]=1/[0.3499,0.5] */
6284 -1.78381727510958865572e-07,
6285 -1.02517042607985553460e-01,
6286 -2.75220568278187460720e+00,
6287 -1.96636162643703720221e+01,
6288 -4.23253133372830490089e+01,
6289 -2.13719211703704061733e+01,
6291 2.95333629060523854548e+01,
6292 2.52981549982190529136e+02,
6293 7.57502834868645436472e+02,
6294 7.39393205320467245656e+02,
6295 1.55949003336666123687e+02,
6296 -4.95949898822628210127e+00,
6299 const double *p
, *q
;
6303 ix
= *(ULONGLONG
*)&x
>> 32;
6305 if (ix
>= 0x40200000) {
6308 } else if (ix
>= 0x40122E8B) {
6311 } else if (ix
>= 0x4006DB6D) {
6314 } else /*ix >= 0x40000000*/ {
6319 r
= p
[0] + z
* (p
[1] + z
* (p
[2] + z
* (p
[3] + z
* (p
[4] + z
* p
[5]))));
6320 s
= 1.0 + z
* (q
[0] + z
* (q
[1] + z
* (q
[2] + z
* (q
[3] + z
* (q
[4] + z
* q
[5])))));
6321 return (0.375 + r
/ s
) / x
;
6324 static double j1_y1_approx(unsigned int ix
, double x
, BOOL y1
, int sign
)
6326 static const double invsqrtpi
= 5.64189583547756279280e-01;
6328 double z
, s
, c
, ss
, cc
;
6334 if (ix
< 0x7fe00000) {
6337 if (s
* c
> 0) cc
= z
/ ss
;
6339 if (ix
< 0x48000000) {
6342 cc
= pone(x
) * cc
- qone(x
) * ss
;
6347 return invsqrtpi
* cc
/ sqrt(x
);
6350 /*********************************************************************
6353 * Copied from musl: src/math/j1.c
6355 double CDECL
_j1(double x
)
6357 static const double r00
= -6.25000000000000000000e-02,
6358 r01
= 1.40705666955189706048e-03,
6359 r02
= -1.59955631084035597520e-05,
6360 r03
= 4.96727999609584448412e-08,
6361 s01
= 1.91537599538363460805e-02,
6362 s02
= 1.85946785588630915560e-04,
6363 s03
= 1.17718464042623683263e-06,
6364 s04
= 5.04636257076217042715e-09,
6365 s05
= 1.23542274426137913908e-11;
6371 ix
= *(ULONGLONG
*)&x
>> 32;
6374 if (ix
>= 0x7ff00000)
6375 return math_error(isnan(x
) ? 0 : _DOMAIN
, "_j1", x
, 0, 1 / (x
* x
));
6376 if (ix
>= 0x40000000) /* |x| >= 2 */
6377 return j1_y1_approx(ix
, fabs(x
), FALSE
, sign
);
6378 if (ix
>= 0x38000000) { /* |x| >= 2**-127 */
6380 r
= z
* (r00
+ z
* (r01
+ z
* (r02
+ z
* r03
)));
6381 s
= 1 + z
* (s01
+ z
* (s02
+ z
* (s03
+ z
* (s04
+ z
* s05
))));
6384 /* avoid underflow, raise inexact if x!=0 */
6387 return (0.5 + z
) * x
;
6390 /*********************************************************************
6393 * Copied from musl: src/math/jn.c
6395 double CDECL
_jn(int n
, double x
)
6397 static const double invsqrtpi
= 5.64189583547756279280e-01;
6399 unsigned int ix
, lx
;
6403 ix
= *(ULONGLONG
*)&x
>> 32;
6404 lx
= *(ULONGLONG
*)&x
;
6408 if ((ix
| (lx
| -lx
) >> 31) > 0x7ff00000) /* nan */
6423 sign
&= n
; /* even n: 0, odd n: signbit(x) */
6425 if ((ix
| lx
) == 0 || ix
== 0x7ff00000) /* if x is 0 or inf */
6428 if (ix
>= 0x52d00000) { /* x > 2**302 */
6431 temp
= -cos(x
) + sin(x
);
6434 temp
= -cos(x
) - sin(x
);
6437 temp
= cos(x
) - sin(x
);
6440 temp
= cos(x
) + sin(x
);
6443 b
= invsqrtpi
* temp
/ sqrt(x
);
6447 for (i
= 0; i
< nm1
; ) {
6450 b
= b
* (2.0 * i
/ x
) - a
; /* avoid underflow */
6455 if (ix
< 0x3e100000) { /* x < 2**-29 */
6456 if (nm1
> 32) /* underflow */
6462 for (i
= 2; i
<= nm1
+ 1; i
++) {
6463 a
*= (double)i
; /* a = n! */
6464 b
*= temp
; /* b = (x/2)^n */
6469 double t
, q0
, q1
, w
, h
, z
, tmp
, nf
;
6479 while (q1
< 1.0e9
) {
6486 for (t
= 0.0, i
= k
; i
>= 0; i
--)
6487 t
= 1 / (2 * (i
+ nf
) / x
- t
);
6490 tmp
= nf
* log(fabs(w
));
6491 if (tmp
< 7.09782712893383973096e+02) {
6492 for (i
= nm1
; i
> 0; i
--) {
6494 b
= b
* (2.0 * i
) / x
- a
;
6498 for (i
= nm1
; i
> 0; i
--) {
6500 b
= b
* (2.0 * i
) / x
- a
;
6502 /* scale b to avoid spurious overflow */
6512 if (fabs(z
) >= fabs(w
))
6518 return sign
? -b
: b
;
6521 /*********************************************************************
6524 double CDECL
_y0(double x
)
6526 static const double tpi
= 6.36619772367581382433e-01,
6527 u00
= -7.38042951086872317523e-02,
6528 u01
= 1.76666452509181115538e-01,
6529 u02
= -1.38185671945596898896e-02,
6530 u03
= 3.47453432093683650238e-04,
6531 u04
= -3.81407053724364161125e-06,
6532 u05
= 1.95590137035022920206e-08,
6533 u06
= -3.98205194132103398453e-11,
6534 v01
= 1.27304834834123699328e-02,
6535 v02
= 7.60068627350353253702e-05,
6536 v03
= 2.59150851840457805467e-07,
6537 v04
= 4.41110311332675467403e-10;
6540 unsigned int ix
, lx
;
6542 ix
= *(ULONGLONG
*)&x
>> 32;
6543 lx
= *(ULONGLONG
*)&x
;
6545 /* y0(nan)=nan, y0(<0)=nan, y0(0)=-inf, y0(inf)=0 */
6546 if ((ix
<< 1 | lx
) == 0)
6547 return math_error(_OVERFLOW
, "_y0", x
, 0, -INFINITY
);
6551 return math_error(_DOMAIN
, "_y0", x
, 0, 0 / (x
- x
));
6552 if (ix
>= 0x7ff00000)
6555 if (ix
>= 0x40000000) { /* x >= 2 */
6556 /* large ulp errors near zeros: 3.958, 7.086,.. */
6557 return j0_y0_approx(ix
, x
, TRUE
);
6560 if (ix
>= 0x3e400000) { /* x >= 2**-27 */
6561 /* large ulp error near the first zero, x ~= 0.89 */
6563 u
= u00
+ z
* (u01
+ z
* (u02
+ z
* (u03
+ z
* (u04
+ z
* (u05
+ z
* u06
)))));
6564 v
= 1.0 + z
* (v01
+ z
* (v02
+ z
* (v03
+ z
* v04
)));
6565 return u
/ v
+ tpi
* (j0(x
) * log(x
));
6567 return u00
+ tpi
* log(x
);
6570 /*********************************************************************
6573 double CDECL
_y1(double x
)
6575 static const double tpi
= 6.36619772367581382433e-01,
6576 u00
= -1.96057090646238940668e-01,
6577 u01
= 5.04438716639811282616e-02,
6578 u02
= -1.91256895875763547298e-03,
6579 u03
= 2.35252600561610495928e-05,
6580 u04
= -9.19099158039878874504e-08,
6581 v00
= 1.99167318236649903973e-02,
6582 v01
= 2.02552581025135171496e-04,
6583 v02
= 1.35608801097516229404e-06,
6584 v03
= 6.22741452364621501295e-09,
6585 v04
= 1.66559246207992079114e-11;
6588 unsigned int ix
, lx
;
6590 ix
= *(ULONGLONG
*)&x
>> 32;
6591 lx
= *(ULONGLONG
*)&x
;
6593 /* y1(nan)=nan, y1(<0)=nan, y1(0)=-inf, y1(inf)=0 */
6594 if ((ix
<< 1 | lx
) == 0)
6595 return math_error(_OVERFLOW
, "_y1", x
, 0, -INFINITY
);
6599 return math_error(_DOMAIN
, "_y1", x
, 0, 0 / (x
- x
));
6600 if (ix
>= 0x7ff00000)
6603 if (ix
>= 0x40000000) /* x >= 2 */
6604 return j1_y1_approx(ix
, x
, TRUE
, 0);
6605 if (ix
< 0x3c900000) /* x < 2**-54 */
6608 u
= u00
+ z
* (u01
+ z
* (u02
+ z
* (u03
+ z
* u04
)));
6609 v
= 1 + z
* (v00
+ z
* (v01
+ z
* (v02
+ z
* (v03
+ z
* v04
))));
6610 return x
* (u
/ v
) + tpi
* (j1(x
) * log(x
) - 1 / x
);
6613 /*********************************************************************
6616 * Copied from musl: src/math/jn.c
6618 double CDECL
_yn(int n
, double x
)
6620 static const double invsqrtpi
= 5.64189583547756279280e-01;
6622 unsigned int ix
, lx
, ib
;
6626 ix
= *(ULONGLONG
*)&x
>> 32;
6627 lx
= *(ULONGLONG
*)&x
;
6631 if ((ix
| (lx
| -lx
) >> 31) > 0x7ff00000) /* nan */
6633 if (sign
&& (ix
| lx
) != 0) /* x < 0 */
6634 return math_error(_DOMAIN
, "_y1", x
, 0, 0 / (x
- x
));
6635 if (ix
== 0x7ff00000)
6648 return sign
? -y1(x
) : y1(x
);
6650 if (ix
>= 0x52d00000) { /* x > 2**302 */
6653 temp
= -sin(x
) - cos(x
);
6656 temp
= -sin(x
) + cos(x
);
6659 temp
= sin(x
) + cos(x
);
6662 temp
= sin(x
) - cos(x
);
6665 b
= invsqrtpi
* temp
/ sqrt(x
);
6669 /* quit if b is -inf */
6670 ib
= *(ULONGLONG
*)&b
>> 32;
6671 for (i
= 0; i
< nm1
&& ib
!= 0xfff00000;) {
6674 b
= (2.0 * i
/ x
) * b
- a
;
6675 ib
= *(ULONGLONG
*)&b
>> 32;
6679 return sign
? -b
: b
;
6684 /*********************************************************************
6685 * _nearbyint (MSVCR120.@)
6687 * Based on musl: src/math/nearbyteint.c
6689 double CDECL
nearbyint(double x
)
6691 BOOL update_cw
, update_sw
;
6692 unsigned int cw
, sw
;
6694 _setfp(&cw
, 0, &sw
, 0);
6695 update_cw
= !(cw
& _EM_INEXACT
);
6696 update_sw
= !(sw
& _SW_INEXACT
);
6700 _setfp(&cw
, _EM_INEXACT
, NULL
, 0);
6703 if (update_cw
|| update_sw
)
6707 _setfp(update_cw
? &cw
: NULL
, _EM_INEXACT
,
6708 update_sw
? &sw
: NULL
, _SW_INEXACT
);
6713 /*********************************************************************
6714 * _nearbyintf (MSVCR120.@)
6716 * Based on musl: src/math/nearbyteintf.c
6718 float CDECL
nearbyintf(float x
)
6720 BOOL update_cw
, update_sw
;
6721 unsigned int cw
, sw
;
6723 _setfp(&cw
, 0, &sw
, 0);
6724 update_cw
= !(cw
& _EM_INEXACT
);
6725 update_sw
= !(sw
& _SW_INEXACT
);
6729 _setfp(&cw
, _EM_INEXACT
, NULL
, 0);
6732 if (update_cw
|| update_sw
)
6736 _setfp(update_cw
? &cw
: NULL
, _EM_INEXACT
,
6737 update_sw
? &sw
: NULL
, _SW_INEXACT
);
6742 /*********************************************************************
6743 * nexttoward (MSVCR120.@)
6745 double CDECL
MSVCRT_nexttoward(double num
, double next
)
6747 return _nextafter(num
, next
);
6750 /*********************************************************************
6751 * nexttowardf (MSVCR120.@)
6753 * Copied from musl: src/math/nexttowardf.c
6755 float CDECL
MSVCRT_nexttowardf(float x
, double y
)
6757 unsigned int ix
= *(unsigned int*)&x
;
6761 if (isnan(x
) || isnan(y
))
6780 e
= ix
& 0x7f800000;
6781 /* raise overflow if ix is infinite and x is finite */
6782 if (e
== 0x7f800000) {
6787 /* raise underflow if ret is subnormal or zero */
6789 fp_barrierf(x
* x
+ ret
* ret
);
6795 #endif /* _MSVCR_VER>=120 */
6797 /*********************************************************************
6798 * _nextafter (MSVCRT.@)
6800 * Copied from musl: src/math/nextafter.c
6802 double CDECL
_nextafter(double x
, double y
)
6804 ULONGLONG llx
= *(ULONGLONG
*)&x
;
6805 ULONGLONG lly
= *(ULONGLONG
*)&y
;
6809 if (isnan(x
) || isnan(y
))
6812 if (_fpclass(y
) & (_FPCLASS_ND
| _FPCLASS_PD
| _FPCLASS_NZ
| _FPCLASS_PZ
))
6816 ax
= llx
& -1ULL / 2;
6817 ay
= lly
& -1ULL / 2;
6821 llx
= (lly
& 1ULL << 63) | 1;
6822 } else if (ax
> ay
|| ((llx
^ lly
) & 1ULL << 63))
6826 e
= llx
>> 52 & 0x7ff;
6827 /* raise overflow if llx is infinite and x is finite */
6832 /* raise underflow if llx is subnormal or zero */
6835 fp_barrier(x
* x
+ y
* y
);
6841 /*********************************************************************
6844 char * CDECL
_ecvt( double number
, int ndigits
, int *decpt
, int *sign
)
6847 thread_data_t
*data
= msvcrt_get_thread_data();
6848 /* FIXME: check better for overflow (native supports over 300 chars) */
6849 ndigits
= min( ndigits
, 80 - 8); /* 8 : space for sign, dec point, "e",
6850 * 4 for exponent and one for
6851 * terminating '\0' */
6852 if (!data
->efcvt_buffer
)
6853 data
->efcvt_buffer
= malloc( 80 ); /* ought to be enough */
6855 /* handle cases with zero ndigits or less */
6857 if( prec
< 1) prec
= 2;
6858 len
= _snprintf(data
->efcvt_buffer
, 80, "%.*le", prec
- 1, number
);
6860 if (data
->efcvt_buffer
[0] == '-') {
6861 memmove( data
->efcvt_buffer
, data
->efcvt_buffer
+ 1, len
-- );
6865 /* take the decimal "point away */
6867 memmove( data
->efcvt_buffer
+ 1, data
->efcvt_buffer
+ 2, len
- 1 );
6868 /* take the exponential "e" out */
6869 data
->efcvt_buffer
[ prec
] = '\0';
6870 /* read the exponent */
6871 sscanf( data
->efcvt_buffer
+ prec
+ 1, "%d", decpt
);
6873 /* adjust for some border cases */
6874 if( data
->efcvt_buffer
[0] == '0')/* value is zero */
6876 /* handle cases with zero ndigits or less */
6878 if( data
->efcvt_buffer
[ 0] >= '5')
6880 data
->efcvt_buffer
[ 0] = '\0';
6882 TRACE("out=\"%s\"\n",data
->efcvt_buffer
);
6883 return data
->efcvt_buffer
;
6886 /*********************************************************************
6887 * _ecvt_s (MSVCRT.@)
6889 int CDECL
_ecvt_s( char *buffer
, size_t length
, double number
, int ndigits
, int *decpt
, int *sign
)
6894 if (!MSVCRT_CHECK_PMT(buffer
!= NULL
)) return EINVAL
;
6895 if (!MSVCRT_CHECK_PMT(decpt
!= NULL
)) return EINVAL
;
6896 if (!MSVCRT_CHECK_PMT(sign
!= NULL
)) return EINVAL
;
6897 if (!MSVCRT_CHECK_PMT_ERR( length
> 2, ERANGE
)) return ERANGE
;
6898 if (!MSVCRT_CHECK_PMT_ERR(ndigits
< (int)length
- 1, ERANGE
)) return ERANGE
;
6900 /* handle cases with zero ndigits or less */
6902 if( prec
< 1) prec
= 2;
6903 result
= malloc(prec
+ 8);
6905 len
= _snprintf(result
, prec
+ 8, "%.*le", prec
- 1, number
);
6906 if (result
[0] == '-') {
6907 memmove( result
, result
+ 1, len
-- );
6911 /* take the decimal "point away */
6913 memmove( result
+ 1, result
+ 2, len
- 1 );
6914 /* take the exponential "e" out */
6915 result
[ prec
] = '\0';
6916 /* read the exponent */
6917 sscanf( result
+ prec
+ 1, "%d", decpt
);
6919 /* adjust for some border cases */
6920 if( result
[0] == '0')/* value is zero */
6922 /* handle cases with zero ndigits or less */
6924 if( result
[ 0] >= '5')
6928 memcpy( buffer
, result
, max(ndigits
+ 1, 1) );
6933 /***********************************************************************
6936 char * CDECL
_fcvt( double number
, int ndigits
, int *decpt
, int *sign
)
6938 thread_data_t
*data
= msvcrt_get_thread_data();
6939 int stop
, dec1
, dec2
;
6940 char *ptr1
, *ptr2
, *first
;
6941 char buf
[80]; /* ought to be enough */
6942 char decimal_separator
= get_locinfo()->lconv
->decimal_point
[0];
6944 if (!data
->efcvt_buffer
)
6945 data
->efcvt_buffer
= malloc( 80 ); /* ought to be enough */
6947 stop
= _snprintf(buf
, 80, "%.*f", ndigits
< 0 ? 0 : ndigits
, number
);
6949 ptr2
= data
->efcvt_buffer
;
6959 /* For numbers below the requested resolution, work out where
6960 the decimal point will be rather than finding it in the string */
6961 if (number
< 1.0 && number
> 0.0) {
6962 dec2
= log10(number
+ 1e-10);
6963 if (-dec2
<= ndigits
) dec2
= 0;
6966 /* If requested digits is zero or less, we will need to truncate
6967 * the returned string */
6972 while (*ptr1
== '0') ptr1
++; /* Skip leading zeroes */
6973 while (*ptr1
!= '\0' && *ptr1
!= decimal_separator
) {
6974 if (!first
) first
= ptr2
;
6975 if ((ptr1
- buf
) < stop
) {
6986 while (*ptr1
== '0') { /* Process leading zeroes */
6991 while (*ptr1
!= '\0') {
6992 if (!first
) first
= ptr2
;
6999 /* We never found a non-zero digit, then our number is either
7000 * smaller than the requested precision, or 0.0 */
7005 first
= data
->efcvt_buffer
;
7010 *decpt
= dec2
? dec2
: dec1
;
7014 /***********************************************************************
7015 * _fcvt_s (MSVCRT.@)
7017 int CDECL
_fcvt_s(char* outbuffer
, size_t size
, double number
, int ndigits
, int *decpt
, int *sign
)
7019 int stop
, dec1
, dec2
;
7020 char *ptr1
, *ptr2
, *first
;
7021 char buf
[80]; /* ought to be enough */
7022 char decimal_separator
= get_locinfo()->lconv
->decimal_point
[0];
7024 if (!outbuffer
|| !decpt
|| !sign
|| size
== 0)
7030 stop
= _snprintf(buf
, 80, "%.*f", ndigits
< 0 ? 0 : ndigits
, number
);
7042 /* For numbers below the requested resolution, work out where
7043 the decimal point will be rather than finding it in the string */
7044 if (number
< 1.0 && number
> 0.0) {
7045 dec2
= log10(number
+ 1e-10);
7046 if (-dec2
<= ndigits
) dec2
= 0;
7049 /* If requested digits is zero or less, we will need to truncate
7050 * the returned string */
7055 while (*ptr1
== '0') ptr1
++; /* Skip leading zeroes */
7056 while (*ptr1
!= '\0' && *ptr1
!= decimal_separator
) {
7057 if (!first
) first
= ptr2
;
7058 if ((ptr1
- buf
) < stop
) {
7072 while (*ptr1
== '0') { /* Process leading zeroes */
7073 if (number
== 0.0 && size
> 1) {
7081 while (*ptr1
!= '\0') {
7082 if (!first
) first
= ptr2
;
7092 /* We never found a non-zero digit, then our number is either
7093 * smaller than the requested precision, or 0.0 */
7094 if (!first
&& (number
<= 0.0))
7097 *decpt
= dec2
? dec2
: dec1
;
7101 /***********************************************************************
7104 char * CDECL
_gcvt( double number
, int ndigit
, char *buff
)
7116 sprintf(buff
, "%.*g", ndigit
, number
);
7120 /***********************************************************************
7121 * _gcvt_s (MSVCRT.@)
7123 int CDECL
_gcvt_s(char *buff
, size_t size
, double number
, int digits
)
7132 if( digits
<0 || digits
>=size
) {
7140 len
= _scprintf("%.*g", digits
, number
);
7147 sprintf(buff
, "%.*g", digits
, number
);
7151 #include <stdlib.h> /* div_t, ldiv_t */
7153 /*********************************************************************
7156 * [i386] Windows binary compatible - returns the struct in eax/edx.
7159 unsigned __int64 CDECL
div(int num
, int denom
)
7163 unsigned __int64 uint64
;
7166 ret
.div
.quot
= num
/ denom
;
7167 ret
.div
.rem
= num
% denom
;
7171 /*********************************************************************
7174 * [!i386] Non-x86 can't run win32 apps so we don't need binary compatibility
7176 div_t CDECL
div(int num
, int denom
)
7180 ret
.quot
= num
/ denom
;
7181 ret
.rem
= num
% denom
;
7184 #endif /* ifdef __i386__ */
7187 /*********************************************************************
7190 * [i386] Windows binary compatible - returns the struct in eax/edx.
7193 unsigned __int64 CDECL
ldiv(__msvcrt_long num
, __msvcrt_long denom
)
7197 unsigned __int64 uint64
;
7200 ret
.ldiv
.quot
= num
/ denom
;
7201 ret
.ldiv
.rem
= num
% denom
;
7205 /*********************************************************************
7208 * [!i386] Non-x86 can't run win32 apps so we don't need binary compatibility
7210 ldiv_t CDECL
ldiv(__msvcrt_long num
, __msvcrt_long denom
)
7214 ret
.quot
= num
/ denom
;
7215 ret
.rem
= num
% denom
;
7218 #endif /* ifdef __i386__ */
7221 /*********************************************************************
7222 * lldiv (MSVCR100.@)
7224 lldiv_t CDECL
lldiv(__int64 num
, __int64 denom
)
7228 ret
.quot
= num
/ denom
;
7229 ret
.rem
= num
% denom
;
7237 /*********************************************************************
7238 * _adjust_fdiv (MSVCRT.@)
7239 * Used by the MSVC compiler to work around the Pentium FDIV bug.
7241 int MSVCRT__adjust_fdiv
= 0;
7243 /***********************************************************************
7244 * _adj_fdiv_m16i (MSVCRT.@)
7247 * I _think_ this function is intended to work around the Pentium
7250 void __stdcall
_adj_fdiv_m16i( short arg
)
7252 TRACE("(): stub\n");
7255 /***********************************************************************
7256 * _adj_fdiv_m32 (MSVCRT.@)
7259 * I _think_ this function is intended to work around the Pentium
7262 void __stdcall
_adj_fdiv_m32( unsigned int arg
)
7264 TRACE("(): stub\n");
7267 /***********************************************************************
7268 * _adj_fdiv_m32i (MSVCRT.@)
7271 * I _think_ this function is intended to work around the Pentium
7274 void __stdcall
_adj_fdiv_m32i( int arg
)
7276 TRACE("(): stub\n");
7279 /***********************************************************************
7280 * _adj_fdiv_m64 (MSVCRT.@)
7283 * I _think_ this function is intended to work around the Pentium
7286 void __stdcall
_adj_fdiv_m64( unsigned __int64 arg
)
7288 TRACE("(): stub\n");
7291 /***********************************************************************
7292 * _adj_fdiv_r (MSVCRT.@)
7294 * This function is likely to have the wrong number of arguments.
7297 * I _think_ this function is intended to work around the Pentium
7300 void _adj_fdiv_r(void)
7302 TRACE("(): stub\n");
7305 /***********************************************************************
7306 * _adj_fdivr_m16i (MSVCRT.@)
7309 * I _think_ this function is intended to work around the Pentium
7312 void __stdcall
_adj_fdivr_m16i( short arg
)
7314 TRACE("(): stub\n");
7317 /***********************************************************************
7318 * _adj_fdivr_m32 (MSVCRT.@)
7321 * I _think_ this function is intended to work around the Pentium
7324 void __stdcall
_adj_fdivr_m32( unsigned int arg
)
7326 TRACE("(): stub\n");
7329 /***********************************************************************
7330 * _adj_fdivr_m32i (MSVCRT.@)
7333 * I _think_ this function is intended to work around the Pentium
7336 void __stdcall
_adj_fdivr_m32i( int arg
)
7338 TRACE("(): stub\n");
7341 /***********************************************************************
7342 * _adj_fdivr_m64 (MSVCRT.@)
7345 * I _think_ this function is intended to work around the Pentium
7348 void __stdcall
_adj_fdivr_m64( unsigned __int64 arg
)
7350 TRACE("(): stub\n");
7353 /***********************************************************************
7354 * _adj_fpatan (MSVCRT.@)
7356 * This function is likely to have the wrong number of arguments.
7359 * I _think_ this function is intended to work around the Pentium
7362 void _adj_fpatan(void)
7364 TRACE("(): stub\n");
7367 /***********************************************************************
7368 * _adj_fprem (MSVCRT.@)
7370 * This function is likely to have the wrong number of arguments.
7373 * I _think_ this function is intended to work around the Pentium
7376 void _adj_fprem(void)
7378 TRACE("(): stub\n");
7381 /***********************************************************************
7382 * _adj_fprem1 (MSVCRT.@)
7384 * This function is likely to have the wrong number of arguments.
7387 * I _think_ this function is intended to work around the Pentium
7390 void _adj_fprem1(void)
7392 TRACE("(): stub\n");
7395 /***********************************************************************
7396 * _adj_fptan (MSVCRT.@)
7398 * This function is likely to have the wrong number of arguments.
7401 * I _think_ this function is intended to work around the Pentium
7404 void _adj_fptan(void)
7406 TRACE("(): stub\n");
7409 /***********************************************************************
7410 * _safe_fdiv (MSVCRT.@)
7412 * This function is likely to have the wrong number of arguments.
7415 * I _think_ this function is intended to work around the Pentium
7418 void _safe_fdiv(void)
7420 TRACE("(): stub\n");
7423 /***********************************************************************
7424 * _safe_fdivr (MSVCRT.@)
7426 * This function is likely to have the wrong number of arguments.
7429 * I _think_ this function is intended to work around the Pentium
7432 void _safe_fdivr(void)
7434 TRACE("(): stub\n");
7437 /***********************************************************************
7438 * _safe_fprem (MSVCRT.@)
7440 * This function is likely to have the wrong number of arguments.
7443 * I _think_ this function is intended to work around the Pentium
7446 void _safe_fprem(void)
7448 TRACE("(): stub\n");
7451 /***********************************************************************
7452 * _safe_fprem1 (MSVCRT.@)
7455 * This function is likely to have the wrong number of arguments.
7458 * I _think_ this function is intended to work around the Pentium
7461 void _safe_fprem1(void)
7463 TRACE("(): stub\n");
7466 /***********************************************************************
7467 * __libm_sse2_acos (MSVCRT.@)
7469 void __cdecl
__libm_sse2_acos(void)
7472 __asm__
__volatile__( "movq %%xmm0,%0" : "=m" (d
) );
7474 __asm__
__volatile__( "movq %0,%%xmm0" : : "m" (d
) );
7477 /***********************************************************************
7478 * __libm_sse2_acosf (MSVCRT.@)
7480 void __cdecl
__libm_sse2_acosf(void)
7483 __asm__
__volatile__( "movd %%xmm0,%0" : "=g" (f
) );
7485 __asm__
__volatile__( "movd %0,%%xmm0" : : "g" (f
) );
7488 /***********************************************************************
7489 * __libm_sse2_asin (MSVCRT.@)
7491 void __cdecl
__libm_sse2_asin(void)
7494 __asm__
__volatile__( "movq %%xmm0,%0" : "=m" (d
) );
7496 __asm__
__volatile__( "movq %0,%%xmm0" : : "m" (d
) );
7499 /***********************************************************************
7500 * __libm_sse2_asinf (MSVCRT.@)
7502 void __cdecl
__libm_sse2_asinf(void)
7505 __asm__
__volatile__( "movd %%xmm0,%0" : "=g" (f
) );
7507 __asm__
__volatile__( "movd %0,%%xmm0" : : "g" (f
) );
7510 /***********************************************************************
7511 * __libm_sse2_atan (MSVCRT.@)
7513 void __cdecl
__libm_sse2_atan(void)
7516 __asm__
__volatile__( "movq %%xmm0,%0" : "=m" (d
) );
7518 __asm__
__volatile__( "movq %0,%%xmm0" : : "m" (d
) );
7521 /***********************************************************************
7522 * __libm_sse2_atan2 (MSVCRT.@)
7524 void __cdecl
__libm_sse2_atan2(void)
7527 __asm__
__volatile__( "movq %%xmm0,%0; movq %%xmm1,%1 " : "=m" (d1
), "=m" (d2
) );
7528 d1
= atan2( d1
, d2
);
7529 __asm__
__volatile__( "movq %0,%%xmm0" : : "m" (d1
) );
7532 /***********************************************************************
7533 * __libm_sse2_atanf (MSVCRT.@)
7535 void __cdecl
__libm_sse2_atanf(void)
7538 __asm__
__volatile__( "movd %%xmm0,%0" : "=g" (f
) );
7540 __asm__
__volatile__( "movd %0,%%xmm0" : : "g" (f
) );
7543 /***********************************************************************
7544 * __libm_sse2_cos (MSVCRT.@)
7546 void __cdecl
__libm_sse2_cos(void)
7549 __asm__
__volatile__( "movq %%xmm0,%0" : "=m" (d
) );
7551 __asm__
__volatile__( "movq %0,%%xmm0" : : "m" (d
) );
7554 /***********************************************************************
7555 * __libm_sse2_cosf (MSVCRT.@)
7557 void __cdecl
__libm_sse2_cosf(void)
7560 __asm__
__volatile__( "movd %%xmm0,%0" : "=g" (f
) );
7562 __asm__
__volatile__( "movd %0,%%xmm0" : : "g" (f
) );
7565 /***********************************************************************
7566 * __libm_sse2_exp (MSVCRT.@)
7568 void __cdecl
__libm_sse2_exp(void)
7571 __asm__
__volatile__( "movq %%xmm0,%0" : "=m" (d
) );
7573 __asm__
__volatile__( "movq %0,%%xmm0" : : "m" (d
) );
7576 /***********************************************************************
7577 * __libm_sse2_expf (MSVCRT.@)
7579 void __cdecl
__libm_sse2_expf(void)
7582 __asm__
__volatile__( "movd %%xmm0,%0" : "=g" (f
) );
7584 __asm__
__volatile__( "movd %0,%%xmm0" : : "g" (f
) );
7587 /***********************************************************************
7588 * __libm_sse2_log (MSVCRT.@)
7590 void __cdecl
__libm_sse2_log(void)
7593 __asm__
__volatile__( "movq %%xmm0,%0" : "=m" (d
) );
7595 __asm__
__volatile__( "movq %0,%%xmm0" : : "m" (d
) );
7598 /***********************************************************************
7599 * __libm_sse2_log10 (MSVCRT.@)
7601 void __cdecl
__libm_sse2_log10(void)
7604 __asm__
__volatile__( "movq %%xmm0,%0" : "=m" (d
) );
7606 __asm__
__volatile__( "movq %0,%%xmm0" : : "m" (d
) );
7609 /***********************************************************************
7610 * __libm_sse2_log10f (MSVCRT.@)
7612 void __cdecl
__libm_sse2_log10f(void)
7615 __asm__
__volatile__( "movd %%xmm0,%0" : "=g" (f
) );
7617 __asm__
__volatile__( "movd %0,%%xmm0" : : "g" (f
) );
7620 /***********************************************************************
7621 * __libm_sse2_logf (MSVCRT.@)
7623 void __cdecl
__libm_sse2_logf(void)
7626 __asm__
__volatile__( "movd %%xmm0,%0" : "=g" (f
) );
7628 __asm__
__volatile__( "movd %0,%%xmm0" : : "g" (f
) );
7631 /***********************************************************************
7632 * __libm_sse2_pow (MSVCRT.@)
7634 void __cdecl
__libm_sse2_pow(void)
7637 __asm__
__volatile__( "movq %%xmm0,%0; movq %%xmm1,%1 " : "=m" (d1
), "=m" (d2
) );
7639 __asm__
__volatile__( "movq %0,%%xmm0" : : "m" (d1
) );
7642 /***********************************************************************
7643 * __libm_sse2_powf (MSVCRT.@)
7645 void __cdecl
__libm_sse2_powf(void)
7648 __asm__
__volatile__( "movd %%xmm0,%0; movd %%xmm1,%1" : "=g" (f1
), "=g" (f2
) );
7649 f1
= powf( f1
, f2
);
7650 __asm__
__volatile__( "movd %0,%%xmm0" : : "g" (f1
) );
7653 /***********************************************************************
7654 * __libm_sse2_sin (MSVCRT.@)
7656 void __cdecl
__libm_sse2_sin(void)
7659 __asm__
__volatile__( "movq %%xmm0,%0" : "=m" (d
) );
7661 __asm__
__volatile__( "movq %0,%%xmm0" : : "m" (d
) );
7664 /***********************************************************************
7665 * __libm_sse2_sinf (MSVCRT.@)
7667 void __cdecl
__libm_sse2_sinf(void)
7670 __asm__
__volatile__( "movd %%xmm0,%0" : "=g" (f
) );
7672 __asm__
__volatile__( "movd %0,%%xmm0" : : "g" (f
) );
7675 /***********************************************************************
7676 * __libm_sse2_tan (MSVCRT.@)
7678 void __cdecl
__libm_sse2_tan(void)
7681 __asm__
__volatile__( "movq %%xmm0,%0" : "=m" (d
) );
7683 __asm__
__volatile__( "movq %0,%%xmm0" : : "m" (d
) );
7686 /***********************************************************************
7687 * __libm_sse2_tanf (MSVCRT.@)
7689 void __cdecl
__libm_sse2_tanf(void)
7692 __asm__
__volatile__( "movd %%xmm0,%0" : "=g" (f
) );
7694 __asm__
__volatile__( "movd %0,%%xmm0" : : "g" (f
) );
7697 /***********************************************************************
7698 * __libm_sse2_sqrt_precise (MSVCR110.@)
7700 void __cdecl
__libm_sse2_sqrt_precise(void)
7705 __asm__
__volatile__( "movq %%xmm0,%0" : "=m" (d
) );
7706 __control87_2(0, 0, NULL
, &cw
);
7710 __asm__
__volatile__( "movq %0,%%xmm0" : : "m" (d
) );
7714 if (!sqrt_validate(&d
, FALSE
))
7716 __asm__
__volatile__( "movq %0,%%xmm0" : : "m" (d
) );
7719 __asm__
__volatile__( "call " __ASM_NAME( "sse2_sqrt" ) );
7721 #endif /* __i386__ */
7725 /*********************************************************************
7728 * Copied from musl: src/math/exp2.c
7730 double CDECL
exp2(double x
)
7732 static const double C
[] = {
7733 0x1.62e42fefa39efp
-1,
7734 0x1.ebfbdff82c424p
-3,
7735 0x1.c6b08d70cf4b5p
-5,
7736 0x1.3b2abd24650ccp
-7,
7737 0x1.5d7e09b4e3a84p
-10
7741 UINT64 ki
, idx
, top
, sbits
;
7742 double kd
, r
, r2
, scale
, tail
, tmp
;
7744 abstop
= (*(UINT64
*)&x
>> 52) & 0x7ff;
7745 if (abstop
- 0x3c9 >= 0x408 - 0x3c9) {
7746 if (abstop
- 0x3c9 >= 0x80000000) {
7747 /* Avoid spurious underflow for tiny x. */
7748 /* Note: 0 is common input. */
7751 if (abstop
>= 409) {
7752 if (*(UINT64
*)&x
== 0xfff0000000000000ull
)
7754 if (abstop
>= 0x7ff)
7756 if (!(*(UINT64
*)&x
>> 63)) {
7758 return fp_barrier(DBL_MAX
) * DBL_MAX
;
7760 else if (x
<= -2147483648.0) {
7761 fp_barrier(x
+ 0x1p
120f
);
7764 else if (*(UINT64
*)&x
>= 0xc090cc0000000000ull
) {
7766 fp_barrier(x
+ 0x1p
120f
);
7770 if (2 * *(UINT64
*)&x
> 2 * 0x408d000000000000ull
)
7771 /* Large x is special cased below. */
7775 /* exp2(x) = 2^(k/N) * 2^r, with 2^r in [2^(-1/2N),2^(1/2N)]. */
7776 /* x = k/N + r, with int k and r in [-1/2N, 1/2N]. */
7777 kd
= fp_barrier(x
+ 0x1.8p52
/ (1 << 7));
7778 ki
= *(UINT64
*)&kd
; /* k. */
7779 kd
-= 0x1.8p52
/ (1 << 7); /* k/N for int k. */
7781 /* 2^(k/N) ~= scale * (1 + tail). */
7782 idx
= 2 * (ki
% (1 << 7));
7783 top
= ki
<< (52 - 7);
7784 tail
= *(double*)&exp_T
[idx
];
7785 /* This is only a valid scale when -1023*N < k < 1024*N. */
7786 sbits
= exp_T
[idx
+ 1] + top
;
7787 /* exp2(x) = 2^(k/N) * 2^r ~= scale + scale * (tail + 2^r - 1). */
7788 /* Evaluation is optimized assuming superscalar pipelined execution. */
7790 /* Without fma the worst case error is 0.5/N ulp larger. */
7791 /* Worst case error is less than 0.5+0.86/N+(abs poly error * 2^53) ulp. */
7792 tmp
= tail
+ r
* C
[0] + r2
* (C
[1] + r
* C
[2]) + r2
* r2
* (C
[3] + r
* C
[4]);
7795 /* Handle cases that may overflow or underflow when computing the result that
7796 is scale*(1+TMP) without intermediate rounding. The bit representation of
7797 scale is in SBITS, however it has a computed exponent that may have
7798 overflown into the sign bit so that needs to be adjusted before using it as
7799 a double. (int32_t)KI is the k used in the argument reduction and exponent
7800 adjustment of scale, positive k here means the result may overflow and
7801 negative k means the result may underflow. */
7804 if ((ki
& 0x80000000) == 0) {
7805 /* k > 0, the exponent of scale might have overflowed by 1. */
7806 sbits
-= 1ull << 52;
7807 scale
= *(double*)&sbits
;
7808 y
= 2 * (scale
+ scale
* tmp
);
7811 /* k < 0, need special care in the subnormal range. */
7812 sbits
+= 1022ull << 52;
7813 scale
= *(double*)&sbits
;
7814 y
= scale
+ scale
* tmp
;
7816 /* Round y to the right precision before scaling it into the subnormal
7817 range to avoid double rounding that can cause 0.5+E/2 ulp error where
7818 E is the worst-case ulp error outside the subnormal range. So this
7819 is only useful if the goal is better than 1 ulp worst-case error. */
7821 lo
= scale
- y
+ scale
* tmp
;
7823 lo
= 1.0 - hi
+ y
+ lo
;
7825 /* Avoid -0.0 with downward rounding. */
7828 /* The underflow exception needs to be signaled explicitly. */
7829 fp_barrier(fp_barrier(0x1p
-1022) * 0x1p
-1022);
7834 scale
= *(double*)&sbits
;
7835 /* Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-928, so there
7836 is no spurious underflow here even without fma. */
7837 return scale
+ scale
* tmp
;
7840 /*********************************************************************
7841 * exp2f (MSVCR120.@)
7843 * Copied from musl: src/math/exp2f.c
7845 float CDECL
exp2f(float x
)
7847 static const double C
[] = {
7848 0x1.c6af84b912394p
-5, 0x1.ebfce50fac4f3p
-3, 0x1.62e42ff0c52d6p
-1
7850 static const double shift
= 0x1.8p
+52 / (1 << 5);
7852 double kd
, xd
, z
, r
, r2
, y
, s
;
7857 abstop
= (*(UINT32
*)&x
>> 20) & 0x7ff;
7858 if (abstop
>= 0x430) {
7859 /* |x| >= 128 or x is nan. */
7860 if (*(UINT32
*)&x
== 0xff800000)
7862 if (abstop
>= 0x7f8)
7866 return fp_barrierf(x
* FLT_MAX
);
7869 fp_barrierf(x
- 0x1p
120);
7874 /* x = k/N + r with r in [-1/(2N), 1/(2N)] and int k, N = 1 << 5. */
7877 kd
-= shift
; /* k/(1<<5) for int k. */
7880 /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
7881 t
= exp2f_T
[ki
% (1 << 5)];
7882 t
+= ki
<< (52 - 5);
7884 z
= C
[0] * r
+ C
[1];
7892 /*********************************************************************
7893 * expm1 (MSVCR120.@)
7895 double CDECL
expm1(double x
)
7900 /*********************************************************************
7901 * expm1f (MSVCR120.@)
7903 float CDECL
expm1f(float x
)
7908 /*********************************************************************
7909 * log1p (MSVCR120.@)
7911 * Copied from musl: src/math/log1p.c
7913 double CDECL
log1p(double x
)
7915 static const double ln2_hi
= 6.93147180369123816490e-01,
7916 ln2_lo
= 1.90821492927058770002e-10,
7917 Lg1
= 6.666666666666735130e-01,
7918 Lg2
= 3.999999999940941908e-01,
7919 Lg3
= 2.857142874366239149e-01,
7920 Lg4
= 2.222219843214978396e-01,
7921 Lg5
= 1.818357216161805012e-01,
7922 Lg6
= 1.531383769920937332e-01,
7923 Lg7
= 1.479819860511658591e-01;
7925 union {double f
; UINT64 i
;} u
= {x
};
7926 double hfsq
, f
, c
, s
, z
, R
, w
, t1
, t2
, dk
;
7932 if (hx
< 0x3fda827a || hx
>> 31) { /* 1+x < sqrt(2)+ */
7933 if (hx
>= 0xbff00000) { /* x <= -1.0 */
7936 return x
/ 0.0; /* og1p(-1) = -inf */
7939 return (x
-x
) / 0.0; /* log1p(x<-1) = NaN */
7941 if (hx
<< 1 < 0x3ca00000 << 1) { /* |x| < 2**-53 */
7942 fp_barrier(x
+ 0x1p
120f
);
7943 /* underflow if subnormal */
7944 if ((hx
& 0x7ff00000) == 0)
7948 if (hx
<= 0xbfd2bec4) { /* sqrt(2)/2- <= 1+x < sqrt(2)+ */
7953 } else if (hx
>= 0x7ff00000)
7958 hu
+= 0x3ff00000 - 0x3fe6a09e;
7959 k
= (int)(hu
>> 20) - 0x3ff;
7960 /* correction term ~ log(1+x)-log(u), avoid underflow in c/u */
7962 c
= k
>= 2 ? 1 - (u
.f
- x
) : x
- (u
.f
- 1);
7966 /* reduce u into [sqrt(2)/2, sqrt(2)] */
7967 hu
= (hu
& 0x000fffff) + 0x3fe6a09e;
7968 u
.i
= (UINT64
)hu
<< 32 | (u
.i
& 0xffffffff);
7975 t1
= w
* (Lg2
+ w
* (Lg4
+ w
* Lg6
));
7976 t2
= z
* (Lg1
+ w
* (Lg3
+ w
* (Lg5
+ w
* Lg7
)));
7979 return s
* (hfsq
+ R
) + (dk
* ln2_lo
+ c
) - hfsq
+ f
+ dk
* ln2_hi
;
7982 /*********************************************************************
7983 * log1pf (MSVCR120.@)
7985 * Copied from musl: src/math/log1pf.c
7987 float CDECL
log1pf(float x
)
7989 static const float ln2_hi
= 6.9313812256e-01,
7990 ln2_lo
= 9.0580006145e-06,
7991 Lg1
= 0xaaaaaa.0p
-24,
7992 Lg2
= 0xccce13.0p
-25,
7993 Lg3
= 0x91e9ee.0p
-25,
7994 Lg4
= 0xf89e26.0p
-26;
7996 union {float f
; UINT32 i
;} u
= {x
};
7997 float hfsq
, f
, c
, s
, z
, R
, w
, t1
, t2
, dk
;
8003 if (ix
< 0x3ed413d0 || ix
>> 31) { /* 1+x < sqrt(2)+ */
8004 if (ix
>= 0xbf800000) { /* x <= -1.0 */
8007 return x
/ 0.0f
; /* log1p(-1)=+inf */
8010 return (x
- x
) / 0.0f
; /* log1p(x<-1)=NaN */
8012 if (ix
<<1 < 0x33800000<<1) { /* |x| < 2**-24 */
8013 /* underflow if subnormal */
8014 if ((ix
& 0x7f800000) == 0)
8018 if (ix
<= 0xbe95f619) { /* sqrt(2)/2- <= 1+x < sqrt(2)+ */
8023 } else if (ix
>= 0x7f800000)
8028 iu
+= 0x3f800000 - 0x3f3504f3;
8029 k
= (int)(iu
>> 23) - 0x7f;
8030 /* correction term ~ log(1+x)-log(u), avoid underflow in c/u */
8032 c
= k
>= 2 ? 1 - (u
.f
- x
) : x
- (u
.f
- 1);
8036 /* reduce u into [sqrt(2)/2, sqrt(2)] */
8037 iu
= (iu
& 0x007fffff) + 0x3f3504f3;
8044 t1
= w
* (Lg2
+ w
* Lg4
);
8045 t2
= z
* (Lg1
+ w
* Lg3
);
8047 hfsq
= 0.5f
* f
* f
;
8049 return s
* (hfsq
+ R
) + (dk
* ln2_lo
+ c
) - hfsq
+ f
+ dk
* ln2_hi
;
8052 /*********************************************************************
8055 * Copied from musl: src/math/log2.c
8057 double CDECL
log2(double x
)
8059 static const double invln2hi
= 0x1.7154765200000p
+0,
8060 invln2lo
= 0x1.705fc2eefa200p
-33;
8061 static const double A
[] = {
8062 -0x1.71547652b8339p
-1,
8063 0x1.ec709dc3a04bep
-2,
8064 -0x1.7154764702ffbp
-2,
8065 0x1.2776c50034c48p
-2,
8066 -0x1.ec7b328ea92bcp
-3,
8067 0x1.a6225e117f92ep
-3
8069 static const double B
[] = {
8070 -0x1.71547652b82fep
-1,
8071 0x1.ec709dc3a03f7p
-2,
8072 -0x1.71547652b7c3fp
-2,
8073 0x1.2776c50f05be4p
-2,
8074 -0x1.ec709dd768fe5p
-3,
8075 0x1.a61761ec4e736p
-3,
8076 -0x1.7153fbc64a79bp
-3,
8077 0x1.484d154f01b4ap
-3,
8078 -0x1.289e4a72c383cp
-3,
8079 0x1.0b32f285aee66p
-3
8081 static const struct {
8084 {0x1.724286bb1acf8p
+0, -0x1.1095feecdb000p
-1},
8085 {0x1.6e1f766d2cca1p
+0, -0x1.08494bd76d000p
-1},
8086 {0x1.6a13d0e30d48ap
+0, -0x1.00143aee8f800p
-1},
8087 {0x1.661ec32d06c85p
+0, -0x1.efec5360b4000p
-2},
8088 {0x1.623fa951198f8p
+0, -0x1.dfdd91ab7e000p
-2},
8089 {0x1.5e75ba4cf026cp
+0, -0x1.cffae0cc79000p
-2},
8090 {0x1.5ac055a214fb8p
+0, -0x1.c043811fda000p
-2},
8091 {0x1.571ed0f166e1ep
+0, -0x1.b0b67323ae000p
-2},
8092 {0x1.53909590bf835p
+0, -0x1.a152f5a2db000p
-2},
8093 {0x1.5014fed61adddp
+0, -0x1.9217f5af86000p
-2},
8094 {0x1.4cab88e487bd0p
+0, -0x1.8304db0719000p
-2},
8095 {0x1.49539b4334feep
+0, -0x1.74189f9a9e000p
-2},
8096 {0x1.460cbdfafd569p
+0, -0x1.6552bb5199000p
-2},
8097 {0x1.42d664ee4b953p
+0, -0x1.56b23a29b1000p
-2},
8098 {0x1.3fb01111dd8a6p
+0, -0x1.483650f5fa000p
-2},
8099 {0x1.3c995b70c5836p
+0, -0x1.39de937f6a000p
-2},
8100 {0x1.3991c4ab6fd4ap
+0, -0x1.2baa1538d6000p
-2},
8101 {0x1.3698e0ce099b5p
+0, -0x1.1d98340ca4000p
-2},
8102 {0x1.33ae48213e7b2p
+0, -0x1.0fa853a40e000p
-2},
8103 {0x1.30d191985bdb1p
+0, -0x1.01d9c32e73000p
-2},
8104 {0x1.2e025cab271d7p
+0, -0x1.e857da2fa6000p
-3},
8105 {0x1.2b404cf13cd82p
+0, -0x1.cd3c8633d8000p
-3},
8106 {0x1.288b02c7ccb50p
+0, -0x1.b26034c14a000p
-3},
8107 {0x1.25e2263944de5p
+0, -0x1.97c1c2f4fe000p
-3},
8108 {0x1.234563d8615b1p
+0, -0x1.7d6023f800000p
-3},
8109 {0x1.20b46e33eaf38p
+0, -0x1.633a71a05e000p
-3},
8110 {0x1.1e2eefdcda3ddp
+0, -0x1.494f5e9570000p
-3},
8111 {0x1.1bb4a580b3930p
+0, -0x1.2f9e424e0a000p
-3},
8112 {0x1.19453847f2200p
+0, -0x1.162595afdc000p
-3},
8113 {0x1.16e06c0d5d73cp
+0, -0x1.f9c9a75bd8000p
-4},
8114 {0x1.1485f47b7e4c2p
+0, -0x1.c7b575bf9c000p
-4},
8115 {0x1.12358ad0085d1p
+0, -0x1.960c60ff48000p
-4},
8116 {0x1.0fef00f532227p
+0, -0x1.64ce247b60000p
-4},
8117 {0x1.0db2077d03a8fp
+0, -0x1.33f78b2014000p
-4},
8118 {0x1.0b7e6d65980d9p
+0, -0x1.0387d1a42c000p
-4},
8119 {0x1.0953efe7b408dp
+0, -0x1.a6f9208b50000p
-5},
8120 {0x1.07325cac53b83p
+0, -0x1.47a954f770000p
-5},
8121 {0x1.05197e40d1b5cp
+0, -0x1.d23a8c50c0000p
-6},
8122 {0x1.03091c1208ea2p
+0, -0x1.16a2629780000p
-6},
8123 {0x1.0101025b37e21p
+0, -0x1.720f8d8e80000p
-8},
8124 {0x1.fc07ef9caa76bp
-1, 0x1.6fe53b1500000p
-7},
8125 {0x1.f4465d3f6f184p
-1, 0x1.11ccce10f8000p
-5},
8126 {0x1.ecc079f84107fp
-1, 0x1.c4dfc8c8b8000p
-5},
8127 {0x1.e573a99975ae8p
-1, 0x1.3aa321e574000p
-4},
8128 {0x1.de5d6f0bd3de6p
-1, 0x1.918a0d08b8000p
-4},
8129 {0x1.d77b681ff38b3p
-1, 0x1.e72e9da044000p
-4},
8130 {0x1.d0cb5724de943p
-1, 0x1.1dcd2507f6000p
-3},
8131 {0x1.ca4b2dc0e7563p
-1, 0x1.476ab03dea000p
-3},
8132 {0x1.c3f8ee8d6cb51p
-1, 0x1.7074377e22000p
-3},
8133 {0x1.bdd2b4f020c4cp
-1, 0x1.98ede8ba94000p
-3},
8134 {0x1.b7d6c006015cap
-1, 0x1.c0db86ad2e000p
-3},
8135 {0x1.b20366e2e338fp
-1, 0x1.e840aafcee000p
-3},
8136 {0x1.ac57026295039p
-1, 0x1.0790ab4678000p
-2},
8137 {0x1.a6d01bc2731ddp
-1, 0x1.1ac056801c000p
-2},
8138 {0x1.a16d3bc3ff18bp
-1, 0x1.2db11d4fee000p
-2},
8139 {0x1.9c2d14967feadp
-1, 0x1.406464ec58000p
-2},
8140 {0x1.970e4f47c9902p
-1, 0x1.52dbe093af000p
-2},
8141 {0x1.920fb3982bcf2p
-1, 0x1.651902050d000p
-2},
8142 {0x1.8d30187f759f1p
-1, 0x1.771d2cdeaf000p
-2},
8143 {0x1.886e5ebb9f66dp
-1, 0x1.88e9c857d9000p
-2},
8144 {0x1.83c97b658b994p
-1, 0x1.9a80155e16000p
-2},
8145 {0x1.7f405ffc61022p
-1, 0x1.abe186ed3d000p
-2},
8146 {0x1.7ad22181415cap
-1, 0x1.bd0f2aea0e000p
-2},
8147 {0x1.767dcf99eff8cp
-1, 0x1.ce0a43dbf4000p
-2}
8149 static const struct {
8152 {0x1.6200012b90a8ep
-1, 0x1.904ab0644b605p
-55},
8153 {0x1.66000045734a6p
-1, 0x1.1ff9bea62f7a9p
-57},
8154 {0x1.69fffc325f2c5p
-1, 0x1.27ecfcb3c90bap
-55},
8155 {0x1.6e00038b95a04p
-1, 0x1.8ff8856739326p
-55},
8156 {0x1.71fffe09994e3p
-1, 0x1.afd40275f82b1p
-55},
8157 {0x1.7600015590e1p
-1, -0x1.2fd75b4238341p
-56},
8158 {0x1.7a00012655bd5p
-1, 0x1.808e67c242b76p
-56},
8159 {0x1.7e0003259e9a6p
-1, -0x1.208e426f622b7p
-57},
8160 {0x1.81fffedb4b2d2p
-1, -0x1.402461ea5c92fp
-55},
8161 {0x1.860002dfafcc3p
-1, 0x1.df7f4a2f29a1fp
-57},
8162 {0x1.89ffff78c6b5p
-1, -0x1.e0453094995fdp
-55},
8163 {0x1.8e00039671566p
-1, -0x1.a04f3bec77b45p
-55},
8164 {0x1.91fffe2bf1745p
-1, -0x1.7fa34400e203cp
-56},
8165 {0x1.95fffcc5c9fd1p
-1, -0x1.6ff8005a0695dp
-56},
8166 {0x1.9a0003bba4767p
-1, 0x1.0f8c4c4ec7e03p
-56},
8167 {0x1.9dfffe7b92da5p
-1, 0x1.e7fd9478c4602p
-55},
8168 {0x1.a1fffd72efdafp
-1, -0x1.a0c554dcdae7ep
-57},
8169 {0x1.a5fffde04ff95p
-1, 0x1.67da98ce9b26bp
-55},
8170 {0x1.a9fffca5e8d2bp
-1, -0x1.284c9b54c13dep
-55},
8171 {0x1.adfffddad03eap
-1, 0x1.812c8ea602e3cp
-58},
8172 {0x1.b1ffff10d3d4dp
-1, -0x1.efaddad27789cp
-55},
8173 {0x1.b5fffce21165ap
-1, 0x1.3cb1719c61237p
-58},
8174 {0x1.b9fffd950e674p
-1, 0x1.3f7d94194cep
-56},
8175 {0x1.be000139ca8afp
-1, 0x1.50ac4215d9bcp
-56},
8176 {0x1.c20005b46df99p
-1, 0x1.beea653e9c1c9p
-57},
8177 {0x1.c600040b9f7aep
-1, -0x1.c079f274a70d6p
-56},
8178 {0x1.ca0006255fd8ap
-1, -0x1.a0b4076e84c1fp
-56},
8179 {0x1.cdfffd94c095dp
-1, 0x1.8f933f99ab5d7p
-55},
8180 {0x1.d1ffff975d6cfp
-1, -0x1.82c08665fe1bep
-58},
8181 {0x1.d5fffa2561c93p
-1, -0x1.b04289bd295f3p
-56},
8182 {0x1.d9fff9d228b0cp
-1, 0x1.70251340fa236p
-55},
8183 {0x1.de00065bc7e16p
-1, -0x1.5011e16a4d80cp
-56},
8184 {0x1.e200002f64791p
-1, 0x1.9802f09ef62ep
-55},
8185 {0x1.e600057d7a6d8p
-1, -0x1.e0b75580cf7fap
-56},
8186 {0x1.ea00027edc00cp
-1, -0x1.c848309459811p
-55},
8187 {0x1.ee0006cf5cb7cp
-1, -0x1.f8027951576f4p
-55},
8188 {0x1.f2000782b7dccp
-1, -0x1.f81d97274538fp
-55},
8189 {0x1.f6000260c450ap
-1, -0x1.071002727ffdcp
-59},
8190 {0x1.f9fffe88cd533p
-1, -0x1.81bdce1fda8bp
-58},
8191 {0x1.fdfffd50f8689p
-1, 0x1.7f91acb918e6ep
-55},
8192 {0x1.0200004292367p
+0, 0x1.b7ff365324681p
-54},
8193 {0x1.05fffe3e3d668p
+0, 0x1.6fa08ddae957bp
-55},
8194 {0x1.0a0000a85a757p
+0, -0x1.7e2de80d3fb91p
-58},
8195 {0x1.0e0001a5f3fccp
+0, -0x1.1823305c5f014p
-54},
8196 {0x1.11ffff8afbaf5p
+0, -0x1.bfabb6680bac2p
-55},
8197 {0x1.15fffe54d91adp
+0, -0x1.d7f121737e7efp
-54},
8198 {0x1.1a00011ac36e1p
+0, 0x1.c000a0516f5ffp
-54},
8199 {0x1.1e00019c84248p
+0, -0x1.082fbe4da5dap
-54},
8200 {0x1.220000ffe5e6ep
+0, -0x1.8fdd04c9cfb43p
-55},
8201 {0x1.26000269fd891p
+0, 0x1.cfe2a7994d182p
-55},
8202 {0x1.2a00029a6e6dap
+0, -0x1.00273715e8bc5p
-56},
8203 {0x1.2dfffe0293e39p
+0, 0x1.b7c39dab2a6f9p
-54},
8204 {0x1.31ffff7dcf082p
+0, 0x1.df1336edc5254p
-56},
8205 {0x1.35ffff05a8b6p
+0, -0x1.e03564ccd31ebp
-54},
8206 {0x1.3a0002e0eaeccp
+0, 0x1.5f0e74bd3a477p
-56},
8207 {0x1.3e000043bb236p
+0, 0x1.c7dcb149d8833p
-54},
8208 {0x1.4200002d187ffp
+0, 0x1.e08afcf2d3d28p
-56},
8209 {0x1.460000d387cb1p
+0, 0x1.20837856599a6p
-55},
8210 {0x1.4a00004569f89p
+0, -0x1.9fa5c904fbcd2p
-55},
8211 {0x1.4e000043543f3p
+0, -0x1.81125ed175329p
-56},
8212 {0x1.51fffcc027f0fp
+0, 0x1.883d8847754dcp
-54},
8213 {0x1.55ffffd87b36fp
+0, -0x1.709e731d02807p
-55},
8214 {0x1.59ffff21df7bap
+0, 0x1.7f79f68727b02p
-55},
8215 {0x1.5dfffebfc3481p
+0, -0x1.180902e30e93ep
-54}
8218 double z
, r
, r2
, r4
, y
, invc
, logc
, kd
, hi
, lo
, t1
, t2
, t3
, p
, rhi
, rlo
;
8225 if (ix
- 0x3feea4af00000000ULL
< 0x210aa00000000ULL
) {
8226 /* Handle close to 1.0 inputs separately. */
8227 /* Fix sign of zero with downward rounding when x==1. */
8228 if (ix
== 0x3ff0000000000000ULL
)
8231 *(UINT64
*)&rhi
= *(UINT64
*)&r
& -1ULL << 32;
8233 hi
= rhi
* invln2hi
;
8234 lo
= rlo
* invln2hi
+ r
* invln2lo
;
8235 r2
= r
* r
; /* rounding error: 0x1p-62. */
8237 /* Worst-case error is less than 0.54 ULP (0.55 ULP without fma). */
8238 p
= r2
* (B
[0] + r
* B
[1]);
8241 lo
+= r4
* (B
[2] + r
* B
[3] + r2
* (B
[4] + r
* B
[5]) +
8242 r4
* (B
[6] + r
* B
[7] + r2
* (B
[8] + r
* B
[9])));
8246 if (top
- 0x0010 >= 0x7ff0 - 0x0010) {
8247 /* x < 0x1p-1022 or inf or nan. */
8252 if (ix
== 0x7ff0000000000000ULL
) /* log(inf) == inf. */
8254 if ((top
& 0x7ff0) == 0x7ff0 && (ix
& 0xfffffffffffffULL
))
8258 return (x
- x
) / (x
- x
);
8260 /* x is subnormal, normalize it. */
8266 /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
8267 The range is split into N subintervals.
8268 The ith subinterval contains z and c is near its center. */
8269 tmp
= ix
- 0x3fe6000000000000ULL
;
8270 i
= (tmp
>> (52 - 6)) % (1 << 6);
8271 k
= (INT64
)tmp
>> 52; /* arithmetic shift */
8272 iz
= ix
- (tmp
& 0xfffULL
<< 52);
8278 /* log2(x) = log2(z/c) + log2(c) + k. */
8279 /* r ~= z/c - 1, |r| < 1/(2*N). */
8280 /* rounding error: 0x1p-55/N + 0x1p-65. */
8281 r
= (z
- T2
[i
].chi
- T2
[i
].clo
) * invc
;
8282 *(UINT64
*)&rhi
= *(UINT64
*)&r
& -1ULL << 32;
8284 t1
= rhi
* invln2hi
;
8285 t2
= rlo
* invln2hi
+ r
* invln2lo
;
8287 /* hi + lo = r/ln2 + log2(c) + k. */
8290 lo
= t3
- hi
+ t1
+ t2
;
8292 /* log2(r+1) = r/ln2 + r^2*poly(r). */
8293 /* Evaluation is optimized assuming superscalar pipelined execution. */
8294 r2
= r
* r
; /* rounding error: 0x1p-54/N^2. */
8296 /* Worst-case error if |y| > 0x1p-4: 0.547 ULP (0.550 ULP without fma).
8297 ~ 0.5 + 2/N/ln2 + abs-poly-error*0x1p56 ULP (+ 0.003 ULP without fma). */
8298 p
= A
[0] + r
* A
[1] + r2
* (A
[2] + r
* A
[3]) + r4
* (A
[4] + r
* A
[5]);
8299 y
= lo
+ r2
* p
+ hi
;
8303 /*********************************************************************
8304 * log2f (MSVCR120.@)
8306 * Copied from musl: src/math/log2f.c
8308 float CDECL
log2f(float x
)
8310 static const double A
[] = {
8311 -0x1.712b6f70a7e4dp
-2,
8312 0x1.ecabf496832ep
-2,
8313 -0x1.715479ffae3dep
-1,
8316 static const struct {
8319 { 0x1.661ec79f8f3bep
+0, -0x1.efec65b963019p
-2 },
8320 { 0x1.571ed4aaf883dp
+0, -0x1.b0b6832d4fca4p
-2 },
8321 { 0x1.49539f0f010bp
+0, -0x1.7418b0a1fb77bp
-2 },
8322 { 0x1.3c995b0b80385p
+0, -0x1.39de91a6dcf7bp
-2 },
8323 { 0x1.30d190c8864a5p
+0, -0x1.01d9bf3f2b631p
-2 },
8324 { 0x1.25e227b0b8eap
+0, -0x1.97c1d1b3b7afp
-3 },
8325 { 0x1.1bb4a4a1a343fp
+0, -0x1.2f9e393af3c9fp
-3 },
8326 { 0x1.12358f08ae5bap
+0, -0x1.960cbbf788d5cp
-4 },
8327 { 0x1.0953f419900a7p
+0, -0x1.a6f9db6475fcep
-5 },
8329 { 0x1.e608cfd9a47acp
-1, 0x1.338ca9f24f53dp
-4 },
8330 { 0x1.ca4b31f026aap
-1, 0x1.476a9543891bap
-3 },
8331 { 0x1.b2036576afce6p
-1, 0x1.e840b4ac4e4d2p
-3 },
8332 { 0x1.9c2d163a1aa2dp
-1, 0x1.40645f0c6651cp
-2 },
8333 { 0x1.886e6037841edp
-1, 0x1.88e9c2c1b9ff8p
-2 },
8334 { 0x1.767dcf5534862p
-1, 0x1.ce0a44eb17bccp
-2 }
8337 double z
, r
, r2
, p
, y
, y0
, invc
, logc
;
8338 UINT32 ix
, iz
, top
, tmp
;
8342 /* Fix sign of zero with downward rounding when x==1. */
8343 if (ix
== 0x3f800000)
8345 if (ix
- 0x00800000 >= 0x7f800000 - 0x00800000) {
8346 /* x < 0x1p-126 or inf or nan. */
8351 if (ix
== 0x7f800000) /* log2(inf) == inf. */
8353 if (ix
* 2 > 0xff000000)
8355 if (ix
& 0x80000000) {
8357 return (x
- x
) / (x
- x
);
8359 /* x is subnormal, normalize it. */
8365 /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
8366 The range is split into N subintervals.
8367 The ith subinterval contains z and c is near its center. */
8368 tmp
= ix
- 0x3f330000;
8369 i
= (tmp
>> (23 - 4)) % (1 << 4);
8370 top
= tmp
& 0xff800000;
8372 k
= (INT32
)tmp
>> 23; /* arithmetic shift */
8377 /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
8379 y0
= logc
+ (double)k
;
8381 /* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */
8383 y
= A
[1] * r
+ A
[2];
8390 /*********************************************************************
8393 double CDECL
rint(double x
)
8398 /*********************************************************************
8399 * rintf (MSVCR120.@)
8401 * Copied from musl: src/math/rintf.c
8403 float CDECL
rintf(float x
)
8405 static const float toint
= 1 / FLT_EPSILON
;
8407 unsigned int ix
= *(unsigned int*)&x
;
8408 int e
= ix
>> 23 & 0xff;
8415 y
= fp_barrierf(x
- toint
) + toint
;
8417 y
= fp_barrierf(x
+ toint
) - toint
;
8419 return s
? -0.0f
: 0.0f
;
8423 /*********************************************************************
8424 * lrint (MSVCR120.@)
8426 __msvcrt_long CDECL
lrint(double x
)
8431 if ((d
< 0 && d
!= (double)(__msvcrt_long
)d
)
8432 || (d
>= 0 && d
!= (double)(__msvcrt_ulong
)d
)) {
8439 /*********************************************************************
8440 * lrintf (MSVCR120.@)
8442 __msvcrt_long CDECL
lrintf(float x
)
8447 if ((f
< 0 && f
!= (float)(__msvcrt_long
)f
)
8448 || (f
>= 0 && f
!= (float)(__msvcrt_ulong
)f
)) {
8455 /*********************************************************************
8456 * llrint (MSVCR120.@)
8458 __int64 CDECL
llrint(double x
)
8463 if ((d
< 0 && d
!= (double)(__int64
)d
)
8464 || (d
>= 0 && d
!= (double)(unsigned __int64
)d
)) {
8471 /*********************************************************************
8472 * llrintf (MSVCR120.@)
8474 __int64 CDECL
llrintf(float x
)
8479 if ((f
< 0 && f
!= (float)(__int64
)f
)
8480 || (f
>= 0 && f
!= (float)(unsigned __int64
)f
)) {
8487 /*********************************************************************
8488 * round (MSVCR120.@)
8490 double CDECL
round(double x
)
8495 /*********************************************************************
8496 * roundf (MSVCR120.@)
8498 * Copied from musl: src/math/roundf.c
8500 float CDECL
roundf(float x
)
8502 static const float toint
= 1 / FLT_EPSILON
;
8504 unsigned int ix
= *(unsigned int*)&x
;
8505 int e
= ix
>> 23 & 0xff;
8513 return 0 * *(float*)&ix
;
8514 y
= fp_barrierf(x
+ toint
) - toint
- x
;
8517 else if (y
<= -0.5f
)
8526 /*********************************************************************
8527 * lround (MSVCR120.@)
8529 * Copied from musl: src/math/lround.c
8531 __msvcrt_long CDECL
lround(double x
)
8533 double d
= round(x
);
8534 if (d
!= (double)(__msvcrt_long
)d
) {
8541 /*********************************************************************
8542 * lroundf (MSVCR120.@)
8544 * Copied from musl: src/math/lroundf.c
8546 __msvcrt_long CDECL
lroundf(float x
)
8548 float f
= roundf(x
);
8549 if (f
!= (float)(__msvcrt_long
)f
) {
8556 /*********************************************************************
8557 * llround (MSVCR120.@)
8559 * Copied from musl: src/math/llround.c
8561 __int64 CDECL
llround(double x
)
8563 double d
= round(x
);
8564 if (d
!= (double)(__int64
)d
) {
8571 /*********************************************************************
8572 * llroundf (MSVCR120.@)
8574 * Copied from musl: src/math/llroundf.c
8576 __int64 CDECL
llroundf(float x
)
8578 float f
= roundf(x
);
8579 if (f
!= (float)(__int64
)f
) {
8586 /*********************************************************************
8587 * trunc (MSVCR120.@)
8589 * Copied from musl: src/math/trunc.c
8591 double CDECL
trunc(double x
)
8593 union {double f
; UINT64 i
;} u
= {x
};
8594 int e
= (u
.i
>> 52 & 0x7ff) - 0x3ff + 12;
8608 /*********************************************************************
8609 * truncf (MSVCR120.@)
8611 * Copied from musl: src/math/truncf.c
8613 float CDECL
truncf(float x
)
8615 union {float f
; UINT32 i
;} u
= {x
};
8616 int e
= (u
.i
>> 23 & 0xff) - 0x7f + 9;
8630 /*********************************************************************
8631 * _dtest (MSVCR120.@)
8633 short CDECL
_dtest(double *x
)
8638 /*********************************************************************
8639 * _fdtest (MSVCR120.@)
8641 short CDECL
_fdtest(float *x
)
8643 return _fdclass(*x
);
8646 static double erfc1(double x
)
8648 static const double erx
= 8.45062911510467529297e-01,
8649 pa0
= -2.36211856075265944077e-03,
8650 pa1
= 4.14856118683748331666e-01,
8651 pa2
= -3.72207876035701323847e-01,
8652 pa3
= 3.18346619901161753674e-01,
8653 pa4
= -1.10894694282396677476e-01,
8654 pa5
= 3.54783043256182359371e-02,
8655 pa6
= -2.16637559486879084300e-03,
8656 qa1
= 1.06420880400844228286e-01,
8657 qa2
= 5.40397917702171048937e-01,
8658 qa3
= 7.18286544141962662868e-02,
8659 qa4
= 1.26171219808761642112e-01,
8660 qa5
= 1.36370839120290507362e-02,
8661 qa6
= 1.19844998467991074170e-02;
8666 P
= pa0
+ s
* (pa1
+ s
* (pa2
+ s
* (pa3
+ s
* (pa4
+ s
* (pa5
+ s
* pa6
)))));
8667 Q
= 1 + s
* (qa1
+ s
* (qa2
+ s
* (qa3
+ s
* (qa4
+ s
* (qa5
+ s
* qa6
)))));
8668 return 1 - erx
- P
/ Q
;
8671 static double erfc2(UINT32 ix
, double x
)
8673 static const double ra0
= -9.86494403484714822705e-03,
8674 ra1
= -6.93858572707181764372e-01,
8675 ra2
= -1.05586262253232909814e+01,
8676 ra3
= -6.23753324503260060396e+01,
8677 ra4
= -1.62396669462573470355e+02,
8678 ra5
= -1.84605092906711035994e+02,
8679 ra6
= -8.12874355063065934246e+01,
8680 ra7
= -9.81432934416914548592e+00,
8681 sa1
= 1.96512716674392571292e+01,
8682 sa2
= 1.37657754143519042600e+02,
8683 sa3
= 4.34565877475229228821e+02,
8684 sa4
= 6.45387271733267880336e+02,
8685 sa5
= 4.29008140027567833386e+02,
8686 sa6
= 1.08635005541779435134e+02,
8687 sa7
= 6.57024977031928170135e+00,
8688 sa8
= -6.04244152148580987438e-02,
8689 rb0
= -9.86494292470009928597e-03,
8690 rb1
= -7.99283237680523006574e-01,
8691 rb2
= -1.77579549177547519889e+01,
8692 rb3
= -1.60636384855821916062e+02,
8693 rb4
= -6.37566443368389627722e+02,
8694 rb5
= -1.02509513161107724954e+03,
8695 rb6
= -4.83519191608651397019e+02,
8696 sb1
= 3.03380607434824582924e+01,
8697 sb2
= 3.25792512996573918826e+02,
8698 sb3
= 1.53672958608443695994e+03,
8699 sb4
= 3.19985821950859553908e+03,
8700 sb5
= 2.55305040643316442583e+03,
8701 sb6
= 4.74528541206955367215e+02,
8702 sb7
= -2.24409524465858183362e+01;
8707 if (ix
< 0x3ff40000) /* |x| < 1.25 */
8712 if (ix
< 0x4006db6d) { /* |x| < 1/.35 ~ 2.85714 */
8713 R
= ra0
+ s
* (ra1
+ s
* (ra2
+ s
* (ra3
+ s
* (ra4
+ s
*
8714 (ra5
+ s
* (ra6
+ s
* ra7
))))));
8715 S
= 1.0 + s
* (sa1
+ s
* (sa2
+ s
* (sa3
+ s
* (sa4
+ s
*
8716 (sa5
+ s
* (sa6
+ s
* (sa7
+ s
* sa8
)))))));
8717 } else { /* |x| > 1/.35 */
8718 R
= rb0
+ s
* (rb1
+ s
* (rb2
+ s
* (rb3
+ s
* (rb4
+ s
*
8719 (rb5
+ s
* rb6
)))));
8720 S
= 1.0 + s
* (sb1
+ s
* (sb2
+ s
* (sb3
+ s
* (sb4
+ s
*
8721 (sb5
+ s
* (sb6
+ s
* sb7
))))));
8724 iz
= *(ULONGLONG
*)&z
;
8725 iz
&= 0xffffffff00000000ULL
;
8727 return exp(-z
* z
- 0.5625) * exp((z
- x
) * (z
+ x
) + R
/ S
) / x
;
8730 /*********************************************************************
8733 double CDECL
erf(double x
)
8735 static const double efx8
= 1.02703333676410069053e+00,
8736 pp0
= 1.28379167095512558561e-01,
8737 pp1
= -3.25042107247001499370e-01,
8738 pp2
= -2.84817495755985104766e-02,
8739 pp3
= -5.77027029648944159157e-03,
8740 pp4
= -2.37630166566501626084e-05,
8741 qq1
= 3.97917223959155352819e-01,
8742 qq2
= 6.50222499887672944485e-02,
8743 qq3
= 5.08130628187576562776e-03,
8744 qq4
= 1.32494738004321644526e-04,
8745 qq5
= -3.96022827877536812320e-06;
8751 ix
= *(UINT64
*)&x
>> 32;
8754 if (ix
>= 0x7ff00000) {
8755 /* erf(nan)=nan, erf(+-inf)=+-1 */
8756 return 1 - 2 * sign
+ 1 / x
;
8758 if (ix
< 0x3feb0000) { /* |x| < 0.84375 */
8759 if (ix
< 0x3e300000) { /* |x| < 2**-28 */
8760 /* avoid underflow */
8761 return 0.125 * (8 * x
+ efx8
* x
);
8764 r
= pp0
+ z
* (pp1
+ z
* (pp2
+ z
* (pp3
+ z
* pp4
)));
8765 s
= 1.0 + z
* (qq1
+ z
* (qq2
+ z
* (qq3
+ z
* (qq4
+ z
* qq5
))));
8769 if (ix
< 0x40180000) /* 0.84375 <= |x| < 6 */
8770 y
= 1 - erfc2(ix
, x
);
8773 return sign
? -y
: y
;
8776 static float erfc1f(float x
)
8778 static const float erx
= 8.4506291151e-01,
8779 pa0
= -2.3621185683e-03,
8780 pa1
= 4.1485610604e-01,
8781 pa2
= -3.7220788002e-01,
8782 pa3
= 3.1834661961e-01,
8783 pa4
= -1.1089469492e-01,
8784 pa5
= 3.5478305072e-02,
8785 pa6
= -2.1663755178e-03,
8786 qa1
= 1.0642088205e-01,
8787 qa2
= 5.4039794207e-01,
8788 qa3
= 7.1828655899e-02,
8789 qa4
= 1.2617121637e-01,
8790 qa5
= 1.3637083583e-02,
8791 qa6
= 1.1984500103e-02;
8796 P
= pa0
+ s
* (pa1
+ s
* (pa2
+ s
* (pa3
+ s
* (pa4
+ s
* (pa5
+ s
* pa6
)))));
8797 Q
= 1 + s
* (qa1
+ s
* (qa2
+ s
* (qa3
+ s
* (qa4
+ s
* (qa5
+ s
* qa6
)))));
8798 return 1 - erx
- P
/ Q
;
8801 static float erfc2f(UINT32 ix
, float x
)
8803 static const float ra0
= -9.8649440333e-03,
8804 ra1
= -6.9385856390e-01,
8805 ra2
= -1.0558626175e+01,
8806 ra3
= -6.2375331879e+01,
8807 ra4
= -1.6239666748e+02,
8808 ra5
= -1.8460508728e+02,
8809 ra6
= -8.1287437439e+01,
8810 ra7
= -9.8143291473e+00,
8811 sa1
= 1.9651271820e+01,
8812 sa2
= 1.3765776062e+02,
8813 sa3
= 4.3456588745e+02,
8814 sa4
= 6.4538726807e+02,
8815 sa5
= 4.2900814819e+02,
8816 sa6
= 1.0863500214e+02,
8817 sa7
= 6.5702495575e+00,
8818 sa8
= -6.0424413532e-02,
8819 rb0
= -9.8649431020e-03,
8820 rb1
= -7.9928326607e-01,
8821 rb2
= -1.7757955551e+01,
8822 rb3
= -1.6063638306e+02,
8823 rb4
= -6.3756646729e+02,
8824 rb5
= -1.0250950928e+03,
8825 rb6
= -4.8351919556e+02,
8826 sb1
= 3.0338060379e+01,
8827 sb2
= 3.2579251099e+02,
8828 sb3
= 1.5367296143e+03,
8829 sb4
= 3.1998581543e+03,
8830 sb5
= 2.5530502930e+03,
8831 sb6
= 4.7452853394e+02,
8832 sb7
= -2.2440952301e+01;
8836 if (ix
< 0x3fa00000) /* |x| < 1.25 */
8841 if (ix
< 0x4036db6d) { /* |x| < 1/0.35 */
8842 R
= ra0
+ s
* (ra1
+ s
* (ra2
+ s
* (ra3
+ s
* (ra4
+ s
*
8843 (ra5
+ s
* (ra6
+ s
* ra7
))))));
8844 S
= 1.0f
+ s
* (sa1
+ s
* (sa2
+ s
* (sa3
+ s
* (sa4
+ s
*
8845 (sa5
+ s
* (sa6
+ s
* (sa7
+ s
* sa8
)))))));
8846 } else { /* |x| >= 1/0.35 */
8847 R
= rb0
+ s
* (rb1
+ s
* (rb2
+ s
* (rb3
+ s
* (rb4
+ s
* (rb5
+ s
* rb6
)))));
8848 S
= 1.0f
+ s
* (sb1
+ s
* (sb2
+ s
* (sb3
+ s
* (sb4
+ s
*
8849 (sb5
+ s
* (sb6
+ s
* sb7
))))));
8852 ix
= *(UINT32
*)&x
& 0xffffe000;
8854 return expf(-z
* z
- 0.5625f
) * expf((z
- x
) * (z
+ x
) + R
/ S
) / x
;
8857 /*********************************************************************
8860 * Copied from musl: src/math/erff.c
8862 float CDECL
erff(float x
)
8864 static const float efx8
= 1.0270333290e+00,
8865 pp0
= 1.2837916613e-01,
8866 pp1
= -3.2504209876e-01,
8867 pp2
= -2.8481749818e-02,
8868 pp3
= -5.7702702470e-03,
8869 pp4
= -2.3763017452e-05,
8870 qq1
= 3.9791721106e-01,
8871 qq2
= 6.5022252500e-02,
8872 qq3
= 5.0813062117e-03,
8873 qq4
= 1.3249473704e-04,
8874 qq5
= -3.9602282413e-06;
8883 if (ix
>= 0x7f800000) {
8884 /* erf(nan)=nan, erf(+-inf)=+-1 */
8885 return 1 - 2 * sign
+ 1 / x
;
8887 if (ix
< 0x3f580000) { /* |x| < 0.84375 */
8888 if (ix
< 0x31800000) { /* |x| < 2**-28 */
8889 /*avoid underflow */
8890 return 0.125f
* (8 * x
+ efx8
* x
);
8893 r
= pp0
+ z
* (pp1
+ z
* (pp2
+ z
* (pp3
+ z
* pp4
)));
8894 s
= 1 + z
* (qq1
+ z
* (qq2
+ z
* (qq3
+ z
* (qq4
+ z
* qq5
))));
8898 if (ix
< 0x40c00000) /* |x| < 6 */
8899 y
= 1 - erfc2f(ix
, x
);
8902 return sign
? -y
: y
;
8905 /*********************************************************************
8908 * Copied from musl: src/math/erf.c
8910 double CDECL
erfc(double x
)
8912 static const double pp0
= 1.28379167095512558561e-01,
8913 pp1
= -3.25042107247001499370e-01,
8914 pp2
= -2.84817495755985104766e-02,
8915 pp3
= -5.77027029648944159157e-03,
8916 pp4
= -2.37630166566501626084e-05,
8917 qq1
= 3.97917223959155352819e-01,
8918 qq2
= 6.50222499887672944485e-02,
8919 qq3
= 5.08130628187576562776e-03,
8920 qq4
= 1.32494738004321644526e-04,
8921 qq5
= -3.96022827877536812320e-06;
8927 ix
= *(ULONGLONG
*)&x
>> 32;
8930 if (ix
>= 0x7ff00000) {
8931 /* erfc(nan)=nan, erfc(+-inf)=0,2 */
8932 return 2 * sign
+ 1 / x
;
8934 if (ix
< 0x3feb0000) { /* |x| < 0.84375 */
8935 if (ix
< 0x3c700000) /* |x| < 2**-56 */
8938 r
= pp0
+ z
* (pp1
+ z
* (pp2
+ z
* (pp3
+ z
* pp4
)));
8939 s
= 1.0 + z
* (qq1
+ z
* (qq2
+ z
* (qq3
+ z
* (qq4
+ z
* qq5
))));
8941 if (sign
|| ix
< 0x3fd00000) { /* x < 1/4 */
8942 return 1.0 - (x
+ x
* y
);
8944 return 0.5 - (x
- 0.5 + x
* y
);
8946 if (ix
< 0x403c0000) { /* 0.84375 <= |x| < 28 */
8947 return sign
? 2 - erfc2(ix
, x
) : erfc2(ix
, x
);
8952 return fp_barrier(DBL_MIN
) * DBL_MIN
;
8955 /*********************************************************************
8956 * erfcf (MSVCR120.@)
8958 * Copied from musl: src/math/erff.c
8960 float CDECL
erfcf(float x
)
8962 static const float pp0
= 1.2837916613e-01,
8963 pp1
= -3.2504209876e-01,
8964 pp2
= -2.8481749818e-02,
8965 pp3
= -5.7702702470e-03,
8966 pp4
= -2.3763017452e-05,
8967 qq1
= 3.9791721106e-01,
8968 qq2
= 6.5022252500e-02,
8969 qq3
= 5.0813062117e-03,
8970 qq4
= 1.3249473704e-04,
8971 qq5
= -3.9602282413e-06;
8980 if (ix
>= 0x7f800000) {
8981 /* erfc(nan)=nan, erfc(+-inf)=0,2 */
8982 return 2 * sign
+ 1 / x
;
8985 if (ix
< 0x3f580000) { /* |x| < 0.84375 */
8986 if (ix
< 0x23800000) /* |x| < 2**-56 */
8989 r
= pp0
+ z
* (pp1
+ z
* (pp2
+ z
* (pp3
+ z
* pp4
)));
8990 s
= 1.0f
+ z
* (qq1
+ z
* (qq2
+ z
* (qq3
+ z
* (qq4
+ z
* qq5
))));
8992 if (sign
|| ix
< 0x3e800000) /* x < 1/4 */
8993 return 1.0f
- (x
+ x
* y
);
8994 return 0.5f
- (x
- 0.5f
+ x
* y
);
8996 if (ix
< 0x41e00000) { /* |x| < 28 */
8997 return sign
? 2 - erfc2f(ix
, x
) : erfc2f(ix
, x
);
9002 return FLT_MIN
* FLT_MIN
;
9005 /*********************************************************************
9006 * fmaxf (MSVCR120.@)
9008 float CDECL
fmaxf(float x
, float y
)
9015 return signbit(x
) ? y
: x
;
9019 /*********************************************************************
9022 double CDECL
fmax(double x
, double y
)
9029 return signbit(x
) ? y
: x
;
9033 /*********************************************************************
9034 * fdimf (MSVCR120.@)
9036 float CDECL
fdimf(float x
, float y
)
9042 return x
>y
? x
-y
: 0;
9045 /*********************************************************************
9048 double CDECL
fdim(double x
, double y
)
9054 return x
>y
? x
-y
: 0;
9057 /*********************************************************************
9058 * _fdsign (MSVCR120.@)
9060 int CDECL
_fdsign(float x
)
9062 union { float f
; UINT32 i
; } u
= { x
};
9063 return (u
.i
>> 16) & 0x8000;
9066 /*********************************************************************
9067 * _dsign (MSVCR120.@)
9069 int CDECL
_dsign(double x
)
9071 union { double f
; UINT64 i
; } u
= { x
};
9072 return (u
.i
>> 48) & 0x8000;
9076 /*********************************************************************
9077 * _dpcomp (MSVCR120.@)
9079 int CDECL
_dpcomp(double x
, double y
)
9081 if(isnan(x
) || isnan(y
))
9084 if(x
== y
) return 2;
9085 return x
< y
? 1 : 4;
9088 /*********************************************************************
9089 * _fdpcomp (MSVCR120.@)
9091 int CDECL
_fdpcomp(float x
, float y
)
9093 return _dpcomp(x
, y
);
9096 /*********************************************************************
9097 * fminf (MSVCR120.@)
9099 float CDECL
fminf(float x
, float y
)
9106 return signbit(x
) ? x
: y
;
9110 /*********************************************************************
9113 double CDECL
fmin(double x
, double y
)
9120 return signbit(x
) ? x
: y
;
9124 /*********************************************************************
9125 * asinh (MSVCR120.@)
9127 * Copied from musl: src/math/asinh.c
9129 double CDECL
asinh(double x
)
9131 UINT64 ux
= *(UINT64
*)&x
;
9132 int e
= ux
>> 52 & 0x7ff;
9136 ux
&= (UINT64
)-1 / 2;
9139 if (e
>= 0x3ff + 26) /* |x| >= 0x1p26 or inf or nan */
9140 x
= log(x
) + 0.693147180559945309417232121458176568;
9141 else if (e
>= 0x3ff + 1) /* |x| >= 2 */
9142 x
= log(2 * x
+ 1 / (sqrt(x
* x
+ 1) + x
));
9143 else if (e
>= 0x3ff - 26) /* |x| >= 0x1p-26 */
9144 x
= log1p(x
+ x
* x
/ (sqrt(x
* x
+ 1) + 1));
9145 else /* |x| < 0x1p-26, raise inexact if x != 0 */
9146 fp_barrier(x
+ 0x1p
120f
);
9150 /*********************************************************************
9151 * asinhf (MSVCR120.@)
9153 * Copied from musl: src/math/asinhf.c
9155 float CDECL
asinhf(float x
)
9157 UINT32 ux
= *(UINT32
*)&x
;
9158 UINT32 i
= ux
& 0x7fffffff;
9164 if (i
>= 0x3f800000 + (12 << 23))/* |x| >= 0x1p12 or inf or nan */
9165 x
= logf(x
) + 0.693147180559945309417232121458176568f
;
9166 else if (i
>= 0x3f800000 + (1 << 23)) /* |x| >= 2 */
9167 x
= logf(2 * x
+ 1 / (sqrtf(x
* x
+ 1) + x
));
9168 else if (i
>= 0x3f800000 - (12 << 23)) /* |x| >= 0x1p-12 */
9169 x
= log1pf(x
+ x
* x
/ (sqrtf(x
* x
+ 1) + 1));
9170 else /* |x| < 0x1p-12, raise inexact if x!=0 */
9171 fp_barrierf(x
+ 0x1p
120f
);
9175 /*********************************************************************
9176 * acosh (MSVCR120.@)
9178 * Copied from musl: src/math/acosh.c
9180 double CDECL
acosh(double x
)
9182 int e
= *(UINT64
*)&x
>> 52 & 0x7ff;
9187 feraiseexcept(FE_INVALID
);
9191 if (e
< 0x3ff + 1) /* |x| < 2, up to 2ulp error in [1,1.125] */
9192 return log1p(x
- 1 + sqrt((x
- 1) * (x
- 1) + 2 * (x
- 1)));
9193 if (e
< 0x3ff + 26) /* |x| < 0x1p26 */
9194 return log(2 * x
- 1 / (x
+ sqrt(x
* x
- 1)));
9195 /* |x| >= 0x1p26 or nan */
9196 return log(x
) + 0.693147180559945309417232121458176568;
9199 /*********************************************************************
9200 * acoshf (MSVCR120.@)
9202 * Copied from musl: src/math/acoshf.c
9204 float CDECL
acoshf(float x
)
9206 UINT32 a
= *(UINT32
*)&x
& 0x7fffffff;
9211 feraiseexcept(FE_INVALID
);
9215 if (a
< 0x3f800000 + (1 << 23)) /* |x| < 2, up to 2ulp error in [1,1.125] */
9216 return log1pf(x
- 1 + sqrtf((x
- 1) * (x
- 1) + 2 * (x
- 1)));
9217 if (*(UINT32
*)&x
< 0x3f800000 + (12 << 23)) /* 2 <= x < 0x1p12 */
9218 return logf(2 * x
- 1 / (x
+ sqrtf(x
* x
- 1)));
9219 /* x >= 0x1p12 or x <= -2 or nan */
9220 return logf(x
) + 0.693147180559945309417232121458176568f
;
9223 /*********************************************************************
9224 * atanh (MSVCR120.@)
9226 * Copied from musl: src/math/atanh.c
9228 double CDECL
atanh(double x
)
9230 UINT64 ux
= *(UINT64
*)&x
;
9231 int e
= ux
>> 52 & 0x7ff;
9235 ux
&= (UINT64
)-1 / 2;
9240 feraiseexcept(FE_INVALID
);
9244 if (e
< 0x3ff - 1) {
9245 if (e
< 0x3ff - 32) {
9246 fp_barrier(x
+ 0x1p
120f
);
9247 if (e
== 0) /* handle underflow */
9249 } else { /* |x| < 0.5, up to 1.7ulp error */
9250 x
= 0.5 * log1p(2 * x
+ 2 * x
* x
/ (1 - x
));
9252 } else { /* avoid overflow */
9253 x
= 0.5 * log1p(2 * (x
/ (1 - x
)));
9254 if (isinf(x
)) *_errno() = ERANGE
;
9259 /*********************************************************************
9260 * atanhf (MSVCR120.@)
9262 * Copied from musl: src/math/atanhf.c
9264 float CDECL
atanhf(float x
)
9266 UINT32 ux
= *(UINT32
*)&x
;
9275 feraiseexcept(FE_INVALID
);
9279 if (ux
< 0x3f800000 - (1 << 23)) {
9280 if (ux
< 0x3f800000 - (32 << 23)) {
9281 fp_barrierf(x
+ 0x1p
120f
);
9282 if (ux
< (1 << 23)) /* handle underflow */
9284 } else { /* |x| < 0.5, up to 1.7ulp error */
9285 x
= 0.5f
* log1pf(2 * x
+ 2 * x
* x
/ (1 - x
));
9287 } else { /* avoid overflow */
9288 x
= 0.5f
* log1pf(2 * (x
/ (1 - x
)));
9289 if (isinf(x
)) *_errno() = ERANGE
;
9294 #endif /* _MSVCR_VER>=120 */
9296 /*********************************************************************
9298 * scalbn (MSVCR120.@)
9299 * scalbln (MSVCR120.@)
9301 double CDECL
_scalb(double num
, __msvcrt_long power
)
9303 return ldexp(num
, power
);
9306 /*********************************************************************
9307 * _scalbf (MSVCRT.@)
9308 * scalbnf (MSVCR120.@)
9309 * scalblnf (MSVCR120.@)
9311 float CDECL
_scalbf(float num
, __msvcrt_long power
)
9313 return ldexp(num
, power
);
9318 /*********************************************************************
9319 * remainder (MSVCR120.@)
9321 * Copied from musl: src/math/remainder.c
9323 double CDECL
remainder(double x
, double y
)
9326 #if _MSVCR_VER == 120 && defined(__x86_64__)
9327 if (isnan(x
) || isnan(y
)) *_errno() = EDOM
;
9329 return remquo(x
, y
, &q
);
9332 /*********************************************************************
9333 * remainderf (MSVCR120.@)
9335 * Copied from musl: src/math/remainderf.c
9337 float CDECL
remainderf(float x
, float y
)
9340 #if _MSVCR_VER == 120 && defined(__x86_64__)
9341 if (isnan(x
) || isnan(y
)) *_errno() = EDOM
;
9343 return remquof(x
, y
, &q
);
9346 /*********************************************************************
9347 * remquo (MSVCR120.@)
9349 * Copied from musl: src/math/remquo.c
9351 double CDECL
remquo(double x
, double y
, int *quo
)
9353 UINT64 uxi
= *(UINT64
*)&x
;
9354 UINT64 uyi
= *(UINT64
*)&y
;
9355 int ex
= uxi
>> 52 & 0x7ff;
9356 int ey
= uyi
>> 52 & 0x7ff;
9363 if (y
== 0 || isinf(x
)) *_errno() = EDOM
;
9364 if (uyi
<< 1 == 0 || isnan(y
) || ex
== 0x7ff)
9365 return (x
* y
) / (x
* y
);
9369 /* normalize x and y */
9371 for (i
= uxi
<< 12; i
>> 63 == 0; ex
--, i
<<= 1);
9378 for (i
= uyi
<< 12; i
>> 63 == 0; ey
--, i
<<= 1);
9393 for (; ex
> ey
; ex
--) {
9410 for (; uxi
>> 52 == 0; uxi
<<= 1, ex
--);
9412 /* scale result and decide between |x| and |x|-|y| */
9415 uxi
|= (UINT64
)ex
<< 52;
9422 if (ex
== ey
|| (ex
+ 1 == ey
&& (2 * x
> y
|| (2 * x
== y
&& q
% 2)))) {
9427 *quo
= sx
^ sy
? -(int)q
: (int)q
;
9431 /*********************************************************************
9432 * remquof (MSVCR120.@)
9434 * Copied from musl: src/math/remquof.c
9436 float CDECL
remquof(float x
, float y
, int *quo
)
9438 UINT32 uxi
= *(UINT32
*)&x
;
9439 UINT32 uyi
= *(UINT32
*)&y
;
9440 int ex
= uxi
>> 23 & 0xff;
9441 int ey
= uyi
>> 23 & 0xff;
9447 if (y
== 0 || isinf(x
)) *_errno() = EDOM
;
9448 if (uyi
<< 1 == 0 || isnan(y
) || ex
== 0xff)
9449 return (x
* y
) / (x
* y
);
9453 /* normalize x and y */
9455 for (i
= uxi
<< 9; i
>> 31 == 0; ex
--, i
<<= 1);
9462 for (i
= uyi
<< 9; i
>> 31 == 0; ey
--, i
<<= 1);
9477 for (; ex
> ey
; ex
--) {
9494 for (; uxi
>> 23 == 0; uxi
<<= 1, ex
--);
9496 /* scale result and decide between |x| and |x|-|y| */
9499 uxi
|= (UINT32
)ex
<< 23;
9506 if (ex
== ey
|| (ex
+ 1 == ey
&& (2 * x
> y
|| (2 * x
== y
&& q
% 2)))) {
9511 *quo
= sx
^ sy
? -(int)q
: (int)q
;
9515 /* sin(pi*x) assuming x > 2^-100, if sin(pi*x)==0 the sign is arbitrary */
9516 static double sin_pi(double x
)
9520 /* spurious inexact if odd int */
9521 x
= 2.0 * (x
* 0.5 - floor(x
* 0.5)); /* x mod 2.0 */
9529 default: /* case 4: */
9530 case 0: return __sin(x
, 0.0, 0);
9531 case 1: return __cos(x
, 0.0);
9532 case 2: return __sin(-x
, 0.0, 0);
9533 case 3: return -__cos(x
, 0.0);
9537 /*********************************************************************
9538 * lgamma (MSVCR120.@)
9540 * Copied from musl: src/math/lgamma_r.c
9542 double CDECL
lgamma(double x
)
9544 static const double pi
= 3.14159265358979311600e+00,
9545 a0
= 7.72156649015328655494e-02,
9546 a1
= 3.22467033424113591611e-01,
9547 a2
= 6.73523010531292681824e-02,
9548 a3
= 2.05808084325167332806e-02,
9549 a4
= 7.38555086081402883957e-03,
9550 a5
= 2.89051383673415629091e-03,
9551 a6
= 1.19270763183362067845e-03,
9552 a7
= 5.10069792153511336608e-04,
9553 a8
= 2.20862790713908385557e-04,
9554 a9
= 1.08011567247583939954e-04,
9555 a10
= 2.52144565451257326939e-05,
9556 a11
= 4.48640949618915160150e-05,
9557 tc
= 1.46163214496836224576e+00,
9558 tf
= -1.21486290535849611461e-01,
9559 tt
= -3.63867699703950536541e-18,
9560 t0
= 4.83836122723810047042e-01,
9561 t1
= -1.47587722994593911752e-01,
9562 t2
= 6.46249402391333854778e-02,
9563 t3
= -3.27885410759859649565e-02,
9564 t4
= 1.79706750811820387126e-02,
9565 t5
= -1.03142241298341437450e-02,
9566 t6
= 6.10053870246291332635e-03,
9567 t7
= -3.68452016781138256760e-03,
9568 t8
= 2.25964780900612472250e-03,
9569 t9
= -1.40346469989232843813e-03,
9570 t10
= 8.81081882437654011382e-04,
9571 t11
= -5.38595305356740546715e-04,
9572 t12
= 3.15632070903625950361e-04,
9573 t13
= -3.12754168375120860518e-04,
9574 t14
= 3.35529192635519073543e-04,
9575 u0
= -7.72156649015328655494e-02,
9576 u1
= 6.32827064025093366517e-01,
9577 u2
= 1.45492250137234768737e+00,
9578 u3
= 9.77717527963372745603e-01,
9579 u4
= 2.28963728064692451092e-01,
9580 u5
= 1.33810918536787660377e-02,
9581 v1
= 2.45597793713041134822e+00,
9582 v2
= 2.12848976379893395361e+00,
9583 v3
= 7.69285150456672783825e-01,
9584 v4
= 1.04222645593369134254e-01,
9585 v5
= 3.21709242282423911810e-03,
9586 s0
= -7.72156649015328655494e-02,
9587 s1
= 2.14982415960608852501e-01,
9588 s2
= 3.25778796408930981787e-01,
9589 s3
= 1.46350472652464452805e-01,
9590 s4
= 2.66422703033638609560e-02,
9591 s5
= 1.84028451407337715652e-03,
9592 s6
= 3.19475326584100867617e-05,
9593 r1
= 1.39200533467621045958e+00,
9594 r2
= 7.21935547567138069525e-01,
9595 r3
= 1.71933865632803078993e-01,
9596 r4
= 1.86459191715652901344e-02,
9597 r5
= 7.77942496381893596434e-04,
9598 r6
= 7.32668430744625636189e-06,
9599 w0
= 4.18938533204672725052e-01,
9600 w1
= 8.33333333333329678849e-02,
9601 w2
= -2.77777777728775536470e-03,
9602 w3
= 7.93650558643019558500e-04,
9603 w4
= -5.95187557450339963135e-04,
9604 w5
= 8.36339918996282139126e-04,
9605 w6
= -1.63092934096575273989e-03;
9607 union {double f
; UINT64 i
;} u
= {x
};
9608 double t
, y
, z
, nadj
, p
, p1
, p2
, p3
, q
, r
, w
;
9612 /* purge off +-inf, NaN, +-0, tiny and negative arguments */
9614 ix
= u
.i
>> 32 & 0x7fffffff;
9615 if (ix
>= 0x7ff00000)
9617 if (ix
< (0x3ff - 70) << 20) { /* |x|<2**-70, return -log(|x|) */
9625 if (t
== 0.0) { /* -integer */
9627 return 1.0 / (x
- x
);
9631 nadj
= log(pi
/ (t
* x
));
9634 /* purge off 1 and 2 */
9635 if ((ix
== 0x3ff00000 || ix
== 0x40000000) && (UINT32
)u
.i
== 0)
9638 else if (ix
< 0x40000000) {
9639 if (ix
<= 0x3feccccc) { /* lgamma(x) = lgamma(x+1)-log(x) */
9641 if (ix
>= 0x3FE76944) {
9644 } else if (ix
>= 0x3FCDA661) {
9653 if (ix
>= 0x3FFBB4C3) { /* [1.7316,2] */
9656 } else if(ix
>= 0x3FF3B4C4) { /* [1.23,1.73] */
9667 p1
= a0
+ z
* (a2
+ z
* (a4
+ z
* (a6
+ z
* (a8
+ z
* a10
))));
9668 p2
= z
* (a1
+ z
* (a3
+ z
* (a5
+ z
* (a7
+ z
* (a9
+ z
* a11
)))));
9675 p1
= t0
+ w
* (t3
+ w
* (t6
+ w
* (t9
+ w
* t12
))); /* parallel comp */
9676 p2
= t1
+ w
* (t4
+ w
* (t7
+ w
* (t10
+ w
* t13
)));
9677 p3
= t2
+ w
* (t5
+ w
* (t8
+ w
* (t11
+ w
* t14
)));
9678 p
= z
* p1
- (tt
- w
* (p2
+ y
* p3
));
9682 p1
= y
* (u0
+ y
* (u1
+ y
* (u2
+ y
* (u3
+ y
* (u4
+ y
* u5
)))));
9683 p2
= 1.0 + y
* (v1
+ y
* (v2
+ y
* (v3
+ y
* (v4
+ y
* v5
))));
9684 r
+= -0.5 * y
+ p1
/ p2
;
9686 } else if (ix
< 0x40200000) { /* x < 8.0 */
9689 p
= y
* (s0
+ y
* (s1
+ y
* (s2
+ y
* (s3
+ y
* (s4
+ y
* (s5
+ y
* s6
))))));
9690 q
= 1.0 + y
* (r1
+ y
* (r2
+ y
* (r3
+ y
* (r4
+ y
* (r5
+ y
* r6
)))));
9691 r
= 0.5 * y
+ p
/ q
;
9692 z
= 1.0; /* lgamma(1+s) = log(s) + lgamma(s) */
9694 case 7: z
*= y
+ 6.0; /* fall through */
9695 case 6: z
*= y
+ 5.0; /* fall through */
9696 case 5: z
*= y
+ 4.0; /* fall through */
9697 case 4: z
*= y
+ 3.0; /* fall through */
9703 } else if (ix
< 0x43900000) { /* 8.0 <= x < 2**58 */
9707 w
= w0
+ z
* (w1
+ y
* (w2
+ y
* (w3
+ y
* (w4
+ y
* (w5
+ y
* w6
)))));
9708 r
= (x
- 0.5) * (t
- 1.0) + w
;
9709 } else /* 2**58 <= x <= inf */
9710 r
= x
* (log(x
) - 1.0);
9716 /* sin(pi*x) assuming x > 2^-100, if sin(pi*x)==0 the sign is arbitrary */
9717 static float sinf_pi(float x
)
9722 /* spurious inexact if odd int */
9723 x
= 2 * (x
* 0.5f
- floorf(x
* 0.5f
)); /* x mod 2.0 */
9730 default: /* case 4: */
9731 case 0: return __sindf(y
);
9732 case 1: return __cosdf(y
);
9733 case 2: return __sindf(-y
);
9734 case 3: return -__cosdf(y
);
9738 /*********************************************************************
9739 * lgammaf (MSVCR120.@)
9741 * Copied from musl: src/math/lgammaf_r.c
9743 float CDECL
lgammaf(float x
)
9745 static const float pi
= 3.1415927410e+00,
9746 a0
= 7.7215664089e-02,
9747 a1
= 3.2246702909e-01,
9748 a2
= 6.7352302372e-02,
9749 a3
= 2.0580807701e-02,
9750 a4
= 7.3855509982e-03,
9751 a5
= 2.8905137442e-03,
9752 a6
= 1.1927076848e-03,
9753 a7
= 5.1006977446e-04,
9754 a8
= 2.2086278477e-04,
9755 a9
= 1.0801156895e-04,
9756 a10
= 2.5214456400e-05,
9757 a11
= 4.4864096708e-05,
9758 tc
= 1.4616321325e+00,
9759 tf
= -1.2148628384e-01,
9760 tt
= 6.6971006518e-09,
9761 t0
= 4.8383611441e-01,
9762 t1
= -1.4758771658e-01,
9763 t2
= 6.4624942839e-02,
9764 t3
= -3.2788541168e-02,
9765 t4
= 1.7970675603e-02,
9766 t5
= -1.0314224288e-02,
9767 t6
= 6.1005386524e-03,
9768 t7
= -3.6845202558e-03,
9769 t8
= 2.2596477065e-03,
9770 t9
= -1.4034647029e-03,
9771 t10
= 8.8108185446e-04,
9772 t11
= -5.3859531181e-04,
9773 t12
= 3.1563205994e-04,
9774 t13
= -3.1275415677e-04,
9775 t14
= 3.3552918467e-04,
9776 u0
= -7.7215664089e-02,
9777 u1
= 6.3282704353e-01,
9778 u2
= 1.4549225569e+00,
9779 u3
= 9.7771751881e-01,
9780 u4
= 2.2896373272e-01,
9781 u5
= 1.3381091878e-02,
9782 v1
= 2.4559779167e+00,
9783 v2
= 2.1284897327e+00,
9784 v3
= 7.6928514242e-01,
9785 v4
= 1.0422264785e-01,
9786 v5
= 3.2170924824e-03,
9787 s0
= -7.7215664089e-02,
9788 s1
= 2.1498242021e-01,
9789 s2
= 3.2577878237e-01,
9790 s3
= 1.4635047317e-01,
9791 s4
= 2.6642270386e-02,
9792 s5
= 1.8402845599e-03,
9793 s6
= 3.1947532989e-05,
9794 r1
= 1.3920053244e+00,
9795 r2
= 7.2193557024e-01,
9796 r3
= 1.7193385959e-01,
9797 r4
= 1.8645919859e-02,
9798 r5
= 7.7794247773e-04,
9799 r6
= 7.3266842264e-06,
9800 w0
= 4.1893854737e-01,
9801 w1
= 8.3333335817e-02,
9802 w2
= -2.7777778450e-03,
9803 w3
= 7.9365057172e-04,
9804 w4
= -5.9518753551e-04,
9805 w5
= 8.3633989561e-04,
9806 w6
= -1.6309292987e-03;
9808 union {float f
; UINT32 i
;} u
= {x
};
9809 float t
, y
, z
, nadj
, p
, p1
, p2
, p3
, q
, r
, w
;
9813 /* purge off +-inf, NaN, +-0, tiny and negative arguments */
9815 ix
= u
.i
& 0x7fffffff;
9816 if (ix
>= 0x7f800000)
9818 if (ix
< 0x35000000) { /* |x| < 2**-21, return -log(|x|) */
9826 if (t
== 0.0f
) { /* -integer */
9828 return 1.0f
/ (x
- x
);
9832 nadj
= logf(pi
/ (t
* x
));
9835 /* purge off 1 and 2 */
9836 if (ix
== 0x3f800000 || ix
== 0x40000000)
9839 else if (ix
< 0x40000000) {
9840 if (ix
<= 0x3f666666) { /* lgamma(x) = lgamma(x+1)-log(x) */
9842 if (ix
>= 0x3f3b4a20) {
9845 } else if (ix
>= 0x3e6d3308) {
9846 y
= x
- (tc
- 1.0f
);
9854 if (ix
>= 0x3fdda618) { /* [1.7316,2] */
9857 } else if (ix
>= 0x3F9da620) { /* [1.23,1.73] */
9868 p1
= a0
+ z
* (a2
+ z
* (a4
+ z
* (a6
+ z
* (a8
+ z
* a10
))));
9869 p2
= z
* (a1
+ z
* (a3
+ z
* (a5
+ z
* (a7
+ z
* (a9
+ z
* a11
)))));
9876 p1
= t0
+ w
* (t3
+ w
* (t6
+ w
* (t9
+ w
* t12
))); /* parallel comp */
9877 p2
= t1
+ w
* (t4
+ w
* (t7
+ w
* (t10
+ w
* t13
)));
9878 p3
= t2
+ w
* (t5
+ w
* (t8
+ w
* (t11
+ w
* t14
)));
9879 p
= z
* p1
- (tt
- w
* (p2
+ y
* p3
));
9883 p1
= y
* (u0
+ y
* (u1
+ y
* (u2
+ y
* (u3
+ y
* (u4
+ y
* u5
)))));
9884 p2
= 1.0f
+ y
* (v1
+ y
* (v2
+ y
* (v3
+ y
* (v4
+ y
* v5
))));
9885 r
+= -0.5f
* y
+ p1
/ p2
;
9887 } else if (ix
< 0x41000000) { /* x < 8.0 */
9890 p
= y
* (s0
+ y
* (s1
+ y
* (s2
+ y
* (s3
+ y
* (s4
+ y
* (s5
+ y
* s6
))))));
9891 q
= 1.0f
+ y
* (r1
+ y
* (r2
+ y
* (r3
+ y
* (r4
+ y
* (r5
+ y
* r6
)))));
9892 r
= 0.5f
* y
+ p
/ q
;
9893 z
= 1.0f
; /* lgamma(1+s) = log(s) + lgamma(s) */
9895 case 7: z
*= y
+ 6.0f
; /* fall through */
9896 case 6: z
*= y
+ 5.0f
; /* fall through */
9897 case 5: z
*= y
+ 4.0f
; /* fall through */
9898 case 4: z
*= y
+ 3.0f
; /* fall through */
9904 } else if (ix
< 0x5c800000) { /* 8.0 <= x < 2**58 */
9908 w
= w0
+ z
* (w1
+ y
* (w2
+ y
* (w3
+ y
* (w4
+ y
* (w5
+ y
* w6
)))));
9909 r
= (x
- 0.5f
) * (t
- 1.0f
) + w
;
9910 } else /* 2**58 <= x <= inf */
9911 r
= x
* (logf(x
) - 1.0f
);
9917 static double tgamma_S(double x
)
9919 static const double Snum
[] = {
9920 23531376880.410759688572007674451636754734846804940,
9921 42919803642.649098768957899047001988850926355848959,
9922 35711959237.355668049440185451547166705960488635843,
9923 17921034426.037209699919755754458931112671403265390,
9924 6039542586.3520280050642916443072979210699388420708,
9925 1439720407.3117216736632230727949123939715485786772,
9926 248874557.86205415651146038641322942321632125127801,
9927 31426415.585400194380614231628318205362874684987640,
9928 2876370.6289353724412254090516208496135991145378768,
9929 186056.26539522349504029498971604569928220784236328,
9930 8071.6720023658162106380029022722506138218516325024,
9931 210.82427775157934587250973392071336271166969580291,
9932 2.5066282746310002701649081771338373386264310793408,
9934 static const double Sden
[] = {
9935 0, 39916800, 120543840, 150917976, 105258076, 45995730, 13339535,
9936 2637558, 357423, 32670, 1925, 66, 1,
9939 double num
= 0, den
= 0;
9942 /* to avoid overflow handle large x differently */
9944 for (i
= ARRAY_SIZE(Snum
) - 1; i
>= 0; i
--) {
9945 num
= num
* x
+ Snum
[i
];
9946 den
= den
* x
+ Sden
[i
];
9949 for (i
= 0; i
< ARRAY_SIZE(Snum
); i
++) {
9950 num
= num
/ x
+ Snum
[i
];
9951 den
= den
/ x
+ Sden
[i
];
9956 /*********************************************************************
9957 * tgamma (MSVCR120.@)
9959 * Copied from musl: src/math/tgamma.c
9961 double CDECL
tgamma(double x
)
9963 static const double gmhalf
= 5.524680040776729583740234375;
9964 static const double fact
[] = {
9965 1, 1, 2, 6, 24, 120, 720, 5040.0, 40320.0, 362880.0, 3628800.0, 39916800.0,
9966 479001600.0, 6227020800.0, 87178291200.0, 1307674368000.0, 20922789888000.0,
9967 355687428096000.0, 6402373705728000.0, 121645100408832000.0,
9968 2432902008176640000.0, 51090942171709440000.0, 1124000727777607680000.0,
9971 union {double f
; UINT64 i
;} u
= {x
};
9972 double absx
, y
, dy
, z
, r
;
9973 UINT32 ix
= u
.i
>> 32 & 0x7fffffff;
9974 int sign
= u
.i
>> 63;
9977 if (ix
>= 0x7ff00000) {
9978 /* tgamma(nan)=nan, tgamma(inf)=inf, tgamma(-inf)=nan with invalid */
9979 if (u
.i
== 0xfff0000000000000ULL
)
9981 return x
+ INFINITY
;
9983 if (ix
< (0x3ff - 54) << 20) {
9984 /* |x| < 2^-54: tgamma(x) ~ 1/x, +-0 raises div-by-zero */
9990 /* integer arguments */
9991 /* raise inexact when non-integer */
9992 if (x
== floor(x
)) {
9997 if (x
<= ARRAY_SIZE(fact
))
9998 return fact
[(int)x
- 1];
10001 /* x >= 172: tgamma(x)=inf with overflow */
10002 /* x =< -184: tgamma(x)=+-0 with underflow */
10003 if (ix
>= 0x40670000) { /* |x| >= 184 */
10004 *_errno() = ERANGE
;
10006 fp_barrierf(0x1p
-126 / x
);
10013 absx
= sign
? -x
: x
;
10015 /* handle the error of x + g - 0.5 */
10017 if (absx
> gmhalf
) {
10026 r
= tgamma_S(absx
) * exp(-y
);
10028 /* reflection formula for negative x */
10029 /* sinpi(absx) is not 0, integers are already handled */
10030 r
= -M_PI
/ (sin_pi(absx
) * absx
* r
);
10034 r
+= dy
* (gmhalf
+ 0.5) * r
/ y
;
10035 z
= pow(y
, 0.5 * z
);
10040 /*********************************************************************
10041 * tgammaf (MSVCR120.@)
10043 * Copied from musl: src/math/tgammaf.c
10045 float CDECL
tgammaf(float x
)
10050 /*********************************************************************
10053 double CDECL
nan(const char *tagp
)
10055 /* Windows ignores input (MSDN) */
10059 /*********************************************************************
10060 * nanf (MSVCR120.@)
10062 float CDECL
nanf(const char *tagp
)
10067 /*********************************************************************
10068 * _except1 (MSVCR120.@)
10070 * - find meaning of ignored cw and operation bits
10073 double CDECL
_except1(DWORD fpe
, _FP_OPERATION_CODE op
, double arg
, double res
, DWORD cw
, void *unk
)
10075 ULONG_PTR exception_arg
;
10076 DWORD exception
= 0;
10077 unsigned int fpword
= 0;
10081 TRACE("(%lx %x %lf %lf %lx %p)\n", fpe
, op
, arg
, res
, cw
, unk
);
10084 cw
= ((cw
>> 7) & 0x3f) | ((cw
>> 3) & 0xc00);
10086 operation
= op
<< 5;
10087 exception_arg
= (ULONG_PTR
)&operation
;
10089 if (fpe
& 0x1) { /* overflow */
10090 if ((fpe
== 0x1 && (cw
& 0x8)) || (fpe
==0x11 && (cw
& 0x28))) {
10091 /* 32-bit version also sets SW_INEXACT here */
10092 raise
|= FE_OVERFLOW
;
10093 if (fpe
& 0x10) raise
|= FE_INEXACT
;
10094 res
= signbit(res
) ? -INFINITY
: INFINITY
;
10096 exception
= EXCEPTION_FLT_OVERFLOW
;
10098 } else if (fpe
& 0x2) { /* underflow */
10099 if ((fpe
== 0x2 && (cw
& 0x10)) || (fpe
==0x12 && (cw
& 0x30))) {
10100 raise
|= FE_UNDERFLOW
;
10101 if (fpe
& 0x10) raise
|= FE_INEXACT
;
10102 res
= signbit(res
) ? -0.0 : 0.0;
10104 exception
= EXCEPTION_FLT_UNDERFLOW
;
10106 } else if (fpe
& 0x4) { /* zerodivide */
10107 if ((fpe
== 0x4 && (cw
& 0x4)) || (fpe
==0x14 && (cw
& 0x24))) {
10108 raise
|= FE_DIVBYZERO
;
10109 if (fpe
& 0x10) raise
|= FE_INEXACT
;
10111 exception
= EXCEPTION_FLT_DIVIDE_BY_ZERO
;
10113 } else if (fpe
& 0x8) { /* invalid */
10114 if (fpe
== 0x8 && (cw
& 0x1)) {
10115 raise
|= FE_INVALID
;
10117 exception
= EXCEPTION_FLT_INVALID_OPERATION
;
10119 } else if (fpe
& 0x10) { /* inexact */
10120 if (fpe
== 0x10 && (cw
& 0x20)) {
10121 raise
|= FE_INEXACT
;
10123 exception
= EXCEPTION_FLT_INEXACT_RESULT
;
10129 feraiseexcept(raise
);
10131 RaiseException(exception
, 0, 1, &exception_arg
);
10133 if (cw
& 0x1) fpword
|= _EM_INVALID
;
10134 if (cw
& 0x2) fpword
|= _EM_DENORMAL
;
10135 if (cw
& 0x4) fpword
|= _EM_ZERODIVIDE
;
10136 if (cw
& 0x8) fpword
|= _EM_OVERFLOW
;
10137 if (cw
& 0x10) fpword
|= _EM_UNDERFLOW
;
10138 if (cw
& 0x20) fpword
|= _EM_INEXACT
;
10139 switch (cw
& 0xc00)
10141 case 0xc00: fpword
|= _RC_UP
|_RC_DOWN
; break;
10142 case 0x800: fpword
|= _RC_UP
; break;
10143 case 0x400: fpword
|= _RC_DOWN
; break;
10145 switch (cw
& 0x300)
10147 case 0x0: fpword
|= _PC_24
; break;
10148 case 0x200: fpword
|= _PC_53
; break;
10149 case 0x300: fpword
|= _PC_64
; break;
10151 if (cw
& 0x1000) fpword
|= _IC_AFFINE
;
10152 _setfp(&fpword
, _MCW_EM
| _MCW_RC
| _MCW_PC
| _MCW_IC
, NULL
, 0);
10157 _Dcomplex
* CDECL
_Cbuild(_Dcomplex
*ret
, double r
, double i
)
10164 double CDECL
MSVCR120_creal(_Dcomplex z
)
10169 /*********************************************************************
10170 * ilogb (MSVCR120.@)
10172 int CDECL
ilogb(double x
)
10177 /*********************************************************************
10178 * ilogbf (MSVCR120.@)
10180 int CDECL
ilogbf(float x
)
10182 return __ilogbf(x
);
10184 #endif /* _MSVCR_VER>=120 */