Initial commit
[lnanohtmltiledmap.git] / math.c
blob4a1694902075ffaee17d0cbb2ac767de2d45b829
1 /*
2 * "slow math" ripping/stitching/cosmetics from musl lib 1.2.2, power of open
3 * source
4 */
5 #include <float.h>
6 #include <ulinux/compiler_types.h>
7 #include <ulinux/types.h>
8 #define u32 ulinux_u32
9 #define s32 ulinux_s32
10 #define f32 ulinux_f32
11 #define s64 ulinux_s64
12 #define u64 ulinux_u64
13 #define f64 ulinux_f64
14 /* cosmetized locally */
15 #define asu64(f) ((union{f64 _f; u64 _i;}){f})._i
16 #define asf64(i) ((union{u64 _i; f64 _f;}){i})._f
17 #define WANT_ROUNDING 1
18 #ifdef __GNUC__
19 #define predict_true(x) __builtin_expect(!!(x), 1)
20 #define predict_false(x) __builtin_expect(x, 0)
21 #else
22 #define predict_true(x) (x)
23 #define predict_false(x) (x)
24 #endif
25 #define NAN (0.0f/0.0f)
26 #define INFINITY 1e5000f
28 * the following is to workaround some compiler operations/optimizations
30 * XXX: OMFG! we have to stop that insanity non-sense and write 100% assembly
31 * (fast) or 100% C ("slow") for this kind of code. Hope riscv will be a success.
33 static inline f64 eval_as_f64(f64 x)
35 f64 y = x;
36 return y;
38 static inline f64 fp_barrier(f64 x)
40 volatile f64 y = x;
41 return y;
43 static f64 __math_divzero(u32 sign)
45 return fp_barrier(sign ? -1.0 : 1.0) / 0.0;
47 static f64 __math_invalid(f64 x)
49 return (x - x) / (x - x);
51 static void fp_force_eval_f32(f32 x)
53 volatile f32 y;
54 y = x;
56 static void fp_force_eval(f64 x)
58 volatile f64 y;
59 y = x;
61 static f64 __math_xflow(u32 sign, f64 y)
63 return eval_as_f64(fp_barrier(sign ? -y : y) * y);
65 static f64 __math_uflow(u32 sign)
67 return __math_xflow(sign, 0x1p-767);
69 static f64 __math_oflow(u32 sign)
71 return __math_xflow(sign, 0x1p769);
73 static u64 F64_BITS(f64 __f)
75 union {f64 __f; u64 __i;} __u;
76 __u.__f = __f;
77 return __u.__i;
79 #define isnan(x) ((F64_BITS(x) & -1UL>>1) > 0x7ffUL<<52)
80 #define EPS DBL_EPSILON
81 #define toint 1/EPS
82 static f64 floor(f64 x)
84 union {f64 f; u64 i;} u = {x};
85 s32 e = u.i >> 52 & 0x7ff;
86 f64 y;
88 if (e >= 0x3ff+52 || x == 0)
89 return x;
90 /* y = int(x) - x, where int(x) is an integer neighbor of x */
91 if (u.i >> 63)
92 y = x - toint + toint - x;
93 else
94 y = x + toint - toint - x;
95 /* special case because of non-nearest rounding modes */
96 if (e <= 0x3ff-1) {
97 fp_force_eval(y);
98 return u.i >> 63 ? -1 : 0;
100 if (y > 0)
101 return x + y - 1;
102 return x + y;
104 #undef toint
105 static f64 trunc(f64 x)
107 union {f64 f; u64 i;} u;
108 s32 e;
109 u64 m;
111 u.f = x;
112 e = (s32)(u.i >> 52 & 0x7ff) - 0x3ff + 12;
114 if (e >= 52 + 12)
115 return x;
116 if (e < 12)
117 e = 1;
118 m = -1UL >> e;
119 if ((u.i & m) == 0)
120 return x;
121 fp_force_eval(x + 0x1p120f);
122 u.i &= ~m;
123 return u.f;
125 #define toint 1/EPS
126 static f64 round(f64 x)
128 union {f64 f; u64 i;} u;
129 s32 e;
130 f64 y;
132 u.f = x;
133 e = u.i >> 52 & 0x7ff;
135 if (e >= 0x3ff+52)
136 return x;
137 if (u.i >> 63)
138 x = -x;
139 if (e < 0x3ff-1) {
140 /* raise inexact if x!=0 */
141 fp_force_eval(x + toint);
142 return 0*u.f;
144 y = x + toint - toint - x;
145 if (y > 0.5)
146 y = y + x - 1;
147 else if (y <= -0.5)
148 y = y + x + 1;
149 else
150 y = y + x;
151 if (u.i >> 63)
152 y = -y;
153 return y;
155 #undef toint
156 static f64 modf(f64 x, f64 *iptr)
158 union {f64 f; u64 i;} u = {x};
159 u64 mask;
160 s32 e = (s32)(u.i>>52 & 0x7ff) - 0x3ff;
162 /* no fractional part */
163 if (e >= 52) {
164 *iptr = x;
165 if (e == 0x400 && u.i<<12 != 0) /* nan */
166 return x;
167 u.i &= 1UL<<63;
168 return u.f;
171 /* no integral part*/
172 if (e < 0) {
173 u.i &= 1UL<<63;
174 *iptr = u.f;
175 return x;
178 mask = -1UL>>12>>e;
179 if ((u.i & mask) == 0) {
180 *iptr = x;
181 u.i &= 1UL<<63;
182 return u.f;
184 u.i &= ~mask;
185 *iptr = u.f;
186 return x - u.f;
188 static f64 fmod(f64 x, f64 y)
190 union {f64 f; u64 i;} ux = {x}, uy = {y};
191 s32 ex = ux.i>>52 & 0x7ff;
192 s32 ey = uy.i>>52 & 0x7ff;
193 s32 sx = ux.i>>63;
194 u64 i;
196 /* in the followings uxi should be ux.i, but then gcc wrongly adds */
197 /* float load/store to inner loops ruining performance and code size */
198 u64 uxi = ux.i;
200 if (uy.i<<1 == 0 || isnan(y) || ex == 0x7ff)
201 return (x*y)/(x*y);
202 if (uxi<<1 <= uy.i<<1) {
203 if (uxi<<1 == uy.i<<1)
204 return 0*x;
205 return x;
208 /* normalize x and y */
209 if (!ex) {
210 for (i = uxi<<12; i>>63 == 0; ex--, i <<= 1);
211 uxi <<= -ex + 1;
212 } else {
213 uxi &= -1UL >> 12;
214 uxi |= 1UL << 52;
216 if (!ey) {
217 for (i = uy.i<<12; i>>63 == 0; ey--, i <<= 1);
218 uy.i <<= -ey + 1;
219 } else {
220 uy.i &= -1UL >> 12;
221 uy.i |= 1UL << 52;
224 /* x mod y */
225 for (; ex > ey; ex--) {
226 i = uxi - uy.i;
227 if (i >> 63 == 0) {
228 if (i == 0)
229 return 0*x;
230 uxi = i;
232 uxi <<= 1;
234 i = uxi - uy.i;
235 if (i >> 63 == 0) {
236 if (i == 0)
237 return 0*x;
238 uxi = i;
240 for (; uxi>>52 == 0; uxi <<= 1, ex--);
242 /* scale result */
243 if (ex > 0) {
244 uxi -= 1UL << 52;
245 uxi |= (u64)ex << 52;
246 } else {
247 uxi >>= -ex + 1;
249 uxi |= (u64)sx << 63;
250 ux.i = uxi;
251 return ux.f;
254 * Double-precision log(x) function.
256 * Copyright (c) 2018, Arm Limited.
257 * SPDX-License-Identifier: MIT
259 #define LOG_TABLE_BITS 7
260 #define LOG_POLY_ORDER 6
261 #define LOG_POLY1_ORDER 12
262 #define N (1 << LOG_TABLE_BITS)
263 struct log_data {
264 f64 ln2hi;
265 f64 ln2lo;
266 f64 poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1. */
267 f64 poly1[LOG_POLY1_ORDER - 1];
268 struct {
269 f64 invc, logc;
270 } tab[1 << LOG_TABLE_BITS];
271 #if !__FP_FAST_FMA
272 struct {
273 f64 chi, clo;
274 } tab2[1 << LOG_TABLE_BITS];
275 #endif
276 } __log_data = {
277 .ln2hi = 0x1.62e42fefa3800p-1,
278 .ln2lo = 0x1.ef35793c76730p-45,
279 .poly1 = {
280 /* relative error: 0x1.c04d76cp-63 */
281 /* in -0x1p-4 0x1.09p-4 (|log(1+x)| > 0x1p-4 outside the interval) */
282 -0x1p-1,
283 0x1.5555555555577p-2,
284 -0x1.ffffffffffdcbp-3,
285 0x1.999999995dd0cp-3,
286 -0x1.55555556745a7p-3,
287 0x1.24924a344de3p-3,
288 -0x1.fffffa4423d65p-4,
289 0x1.c7184282ad6cap-4,
290 -0x1.999eb43b068ffp-4,
291 0x1.78182f7afd085p-4,
292 -0x1.5521375d145cdp-4,
294 .poly = {
295 /* relative error: 0x1.926199e8p-56 */
296 /* abs error: 0x1.882ff33p-65 */
297 /* in -0x1.fp-9 0x1.fp-9 */
298 -0x1.0000000000001p-1,
299 0x1.555555551305bp-2,
300 -0x1.fffffffeb459p-3,
301 0x1.999b324f10111p-3,
302 -0x1.55575e506c89fp-3,
304 /* Algorithm:
306 x = 2^k z
307 log(x) = k ln2 + log(c) + log(z/c)
308 log(z/c) = poly(z/c - 1)
310 where z is in [1.6p-1; 1.6p0] which is split into N subintervals and z falls
311 into the ith one, then table entries are computed as
313 tab[i].invc = 1/c
314 tab[i].logc = (double)log(c)
315 tab2[i].chi = (double)c
316 tab2[i].clo = (double)(c - (double)c)
318 where c is near the center of the subinterval and is chosen by trying +-2^29
319 floating point invc candidates around 1/center and selecting one for which
321 1) the rounding error in 0x1.8p9 + logc is 0,
322 2) the rounding error in z - chi - clo is < 0x1p-66 and
323 3) the rounding error in (double)log(c) is minimized (< 0x1p-66).
325 Note: 1) ensures that k*ln2hi + logc can be computed without rounding error,
326 2) ensures that z/c - 1 can be computed as (z - chi - clo)*invc with close to
327 a single rounding error when there is no fast fma for z*invc - 1, 3) ensures
328 that logc + poly(z/c - 1) has small error, however near x == 1 when
329 |log(x)| < 0x1p-4, this is not enough so that is special cased. */
330 .tab = {
331 {0x1.734f0c3e0de9fp+0, -0x1.7cc7f79e69000p-2},
332 {0x1.713786a2ce91fp+0, -0x1.76feec20d0000p-2},
333 {0x1.6f26008fab5a0p+0, -0x1.713e31351e000p-2},
334 {0x1.6d1a61f138c7dp+0, -0x1.6b85b38287800p-2},
335 {0x1.6b1490bc5b4d1p+0, -0x1.65d5590807800p-2},
336 {0x1.69147332f0cbap+0, -0x1.602d076180000p-2},
337 {0x1.6719f18224223p+0, -0x1.5a8ca86909000p-2},
338 {0x1.6524f99a51ed9p+0, -0x1.54f4356035000p-2},
339 {0x1.63356aa8f24c4p+0, -0x1.4f637c36b4000p-2},
340 {0x1.614b36b9ddc14p+0, -0x1.49da7fda85000p-2},
341 {0x1.5f66452c65c4cp+0, -0x1.445923989a800p-2},
342 {0x1.5d867b5912c4fp+0, -0x1.3edf439b0b800p-2},
343 {0x1.5babccb5b90dep+0, -0x1.396ce448f7000p-2},
344 {0x1.59d61f2d91a78p+0, -0x1.3401e17bda000p-2},
345 {0x1.5805612465687p+0, -0x1.2e9e2ef468000p-2},
346 {0x1.56397cee76bd3p+0, -0x1.2941b3830e000p-2},
347 {0x1.54725e2a77f93p+0, -0x1.23ec58cda8800p-2},
348 {0x1.52aff42064583p+0, -0x1.1e9e129279000p-2},
349 {0x1.50f22dbb2bddfp+0, -0x1.1956d2b48f800p-2},
350 {0x1.4f38f4734ded7p+0, -0x1.141679ab9f800p-2},
351 {0x1.4d843cfde2840p+0, -0x1.0edd094ef9800p-2},
352 {0x1.4bd3ec078a3c8p+0, -0x1.09aa518db1000p-2},
353 {0x1.4a27fc3e0258ap+0, -0x1.047e65263b800p-2},
354 {0x1.4880524d48434p+0, -0x1.feb224586f000p-3},
355 {0x1.46dce1b192d0bp+0, -0x1.f474a7517b000p-3},
356 {0x1.453d9d3391854p+0, -0x1.ea4443d103000p-3},
357 {0x1.43a2744b4845ap+0, -0x1.e020d44e9b000p-3},
358 {0x1.420b54115f8fbp+0, -0x1.d60a22977f000p-3},
359 {0x1.40782da3ef4b1p+0, -0x1.cc00104959000p-3},
360 {0x1.3ee8f5d57fe8fp+0, -0x1.c202956891000p-3},
361 {0x1.3d5d9a00b4ce9p+0, -0x1.b81178d811000p-3},
362 {0x1.3bd60c010c12bp+0, -0x1.ae2c9ccd3d000p-3},
363 {0x1.3a5242b75dab8p+0, -0x1.a45402e129000p-3},
364 {0x1.38d22cd9fd002p+0, -0x1.9a877681df000p-3},
365 {0x1.3755bc5847a1cp+0, -0x1.90c6d69483000p-3},
366 {0x1.35dce49ad36e2p+0, -0x1.87120a645c000p-3},
367 {0x1.34679984dd440p+0, -0x1.7d68fb4143000p-3},
368 {0x1.32f5cceffcb24p+0, -0x1.73cb83c627000p-3},
369 {0x1.3187775a10d49p+0, -0x1.6a39a9b376000p-3},
370 {0x1.301c8373e3990p+0, -0x1.60b3154b7a000p-3},
371 {0x1.2eb4ebb95f841p+0, -0x1.5737d76243000p-3},
372 {0x1.2d50a0219a9d1p+0, -0x1.4dc7b8fc23000p-3},
373 {0x1.2bef9a8b7fd2ap+0, -0x1.4462c51d20000p-3},
374 {0x1.2a91c7a0c1babp+0, -0x1.3b08abc830000p-3},
375 {0x1.293726014b530p+0, -0x1.31b996b490000p-3},
376 {0x1.27dfa5757a1f5p+0, -0x1.2875490a44000p-3},
377 {0x1.268b39b1d3bbfp+0, -0x1.1f3b9f879a000p-3},
378 {0x1.2539d838ff5bdp+0, -0x1.160c8252ca000p-3},
379 {0x1.23eb7aac9083bp+0, -0x1.0ce7f57f72000p-3},
380 {0x1.22a012ba940b6p+0, -0x1.03cdc49fea000p-3},
381 {0x1.2157996cc4132p+0, -0x1.f57bdbc4b8000p-4},
382 {0x1.201201dd2fc9bp+0, -0x1.e370896404000p-4},
383 {0x1.1ecf4494d480bp+0, -0x1.d17983ef94000p-4},
384 {0x1.1d8f5528f6569p+0, -0x1.bf9674ed8a000p-4},
385 {0x1.1c52311577e7cp+0, -0x1.adc79202f6000p-4},
386 {0x1.1b17c74cb26e9p+0, -0x1.9c0c3e7288000p-4},
387 {0x1.19e010c2c1ab6p+0, -0x1.8a646b372c000p-4},
388 {0x1.18ab07bb670bdp+0, -0x1.78d01b3ac0000p-4},
389 {0x1.1778a25efbcb6p+0, -0x1.674f145380000p-4},
390 {0x1.1648d354c31dap+0, -0x1.55e0e6d878000p-4},
391 {0x1.151b990275fddp+0, -0x1.4485cdea1e000p-4},
392 {0x1.13f0ea432d24cp+0, -0x1.333d94d6aa000p-4},
393 {0x1.12c8b7210f9dap+0, -0x1.22079f8c56000p-4},
394 {0x1.11a3028ecb531p+0, -0x1.10e4698622000p-4},
395 {0x1.107fbda8434afp+0, -0x1.ffa6c6ad20000p-5},
396 {0x1.0f5ee0f4e6bb3p+0, -0x1.dda8d4a774000p-5},
397 {0x1.0e4065d2a9fcep+0, -0x1.bbcece4850000p-5},
398 {0x1.0d244632ca521p+0, -0x1.9a1894012c000p-5},
399 {0x1.0c0a77ce2981ap+0, -0x1.788583302c000p-5},
400 {0x1.0af2f83c636d1p+0, -0x1.5715e67d68000p-5},
401 {0x1.09ddb98a01339p+0, -0x1.35c8a49658000p-5},
402 {0x1.08cabaf52e7dfp+0, -0x1.149e364154000p-5},
403 {0x1.07b9f2f4e28fbp+0, -0x1.e72c082eb8000p-6},
404 {0x1.06ab58c358f19p+0, -0x1.a55f152528000p-6},
405 {0x1.059eea5ecf92cp+0, -0x1.63d62cf818000p-6},
406 {0x1.04949cdd12c90p+0, -0x1.228fb8caa0000p-6},
407 {0x1.038c6c6f0ada9p+0, -0x1.c317b20f90000p-7},
408 {0x1.02865137932a9p+0, -0x1.419355daa0000p-7},
409 {0x1.0182427ea7348p+0, -0x1.81203c2ec0000p-8},
410 {0x1.008040614b195p+0, -0x1.0040979240000p-9},
411 {0x1.fe01ff726fa1ap-1, 0x1.feff384900000p-9},
412 {0x1.fa11cc261ea74p-1, 0x1.7dc41353d0000p-7},
413 {0x1.f6310b081992ep-1, 0x1.3cea3c4c28000p-6},
414 {0x1.f25f63ceeadcdp-1, 0x1.b9fc114890000p-6},
415 {0x1.ee9c8039113e7p-1, 0x1.1b0d8ce110000p-5},
416 {0x1.eae8078cbb1abp-1, 0x1.58a5bd001c000p-5},
417 {0x1.e741aa29d0c9bp-1, 0x1.95c8340d88000p-5},
418 {0x1.e3a91830a99b5p-1, 0x1.d276aef578000p-5},
419 {0x1.e01e009609a56p-1, 0x1.07598e598c000p-4},
420 {0x1.dca01e577bb98p-1, 0x1.253f5e30d2000p-4},
421 {0x1.d92f20b7c9103p-1, 0x1.42edd8b380000p-4},
422 {0x1.d5cac66fb5ccep-1, 0x1.606598757c000p-4},
423 {0x1.d272caa5ede9dp-1, 0x1.7da76356a0000p-4},
424 {0x1.cf26e3e6b2ccdp-1, 0x1.9ab434e1c6000p-4},
425 {0x1.cbe6da2a77902p-1, 0x1.b78c7bb0d6000p-4},
426 {0x1.c8b266d37086dp-1, 0x1.d431332e72000p-4},
427 {0x1.c5894bd5d5804p-1, 0x1.f0a3171de6000p-4},
428 {0x1.c26b533bb9f8cp-1, 0x1.067152b914000p-3},
429 {0x1.bf583eeece73fp-1, 0x1.147858292b000p-3},
430 {0x1.bc4fd75db96c1p-1, 0x1.2266ecdca3000p-3},
431 {0x1.b951e0c864a28p-1, 0x1.303d7a6c55000p-3},
432 {0x1.b65e2c5ef3e2cp-1, 0x1.3dfc33c331000p-3},
433 {0x1.b374867c9888bp-1, 0x1.4ba366b7a8000p-3},
434 {0x1.b094b211d304ap-1, 0x1.5933928d1f000p-3},
435 {0x1.adbe885f2ef7ep-1, 0x1.66acd2418f000p-3},
436 {0x1.aaf1d31603da2p-1, 0x1.740f8ec669000p-3},
437 {0x1.a82e63fd358a7p-1, 0x1.815c0f51af000p-3},
438 {0x1.a5740ef09738bp-1, 0x1.8e92954f68000p-3},
439 {0x1.a2c2a90ab4b27p-1, 0x1.9bb3602f84000p-3},
440 {0x1.a01a01393f2d1p-1, 0x1.a8bed1c2c0000p-3},
441 {0x1.9d79f24db3c1bp-1, 0x1.b5b515c01d000p-3},
442 {0x1.9ae2505c7b190p-1, 0x1.c2967ccbcc000p-3},
443 {0x1.9852ef297ce2fp-1, 0x1.cf635d5486000p-3},
444 {0x1.95cbaeea44b75p-1, 0x1.dc1bd3446c000p-3},
445 {0x1.934c69de74838p-1, 0x1.e8c01b8cfe000p-3},
446 {0x1.90d4f2f6752e6p-1, 0x1.f5509c0179000p-3},
447 {0x1.8e6528effd79dp-1, 0x1.00e6c121fb800p-2},
448 {0x1.8bfce9fcc007cp-1, 0x1.071b80e93d000p-2},
449 {0x1.899c0dabec30ep-1, 0x1.0d46b9e867000p-2},
450 {0x1.87427aa2317fbp-1, 0x1.13687334bd000p-2},
451 {0x1.84f00acb39a08p-1, 0x1.1980d67234800p-2},
452 {0x1.82a49e8653e55p-1, 0x1.1f8ffe0cc8000p-2},
453 {0x1.8060195f40260p-1, 0x1.2595fd7636800p-2},
454 {0x1.7e22563e0a329p-1, 0x1.2b9300914a800p-2},
455 {0x1.7beb377dcb5adp-1, 0x1.3187210436000p-2},
456 {0x1.79baa679725c2p-1, 0x1.377266dec1800p-2},
457 {0x1.77907f2170657p-1, 0x1.3d54ffbaf3000p-2},
458 {0x1.756cadbd6130cp-1, 0x1.432eee32fe000p-2},
460 #if !__FP_FAST_FMA
461 .tab2 = {
462 {0x1.61000014fb66bp-1, 0x1.e026c91425b3cp-56},
463 {0x1.63000034db495p-1, 0x1.dbfea48005d41p-55},
464 {0x1.650000d94d478p-1, 0x1.e7fa786d6a5b7p-55},
465 {0x1.67000074e6fadp-1, 0x1.1fcea6b54254cp-57},
466 {0x1.68ffffedf0faep-1, -0x1.c7e274c590efdp-56},
467 {0x1.6b0000763c5bcp-1, -0x1.ac16848dcda01p-55},
468 {0x1.6d0001e5cc1f6p-1, 0x1.33f1c9d499311p-55},
469 {0x1.6efffeb05f63ep-1, -0x1.e80041ae22d53p-56},
470 {0x1.710000e86978p-1, 0x1.bff6671097952p-56},
471 {0x1.72ffffc67e912p-1, 0x1.c00e226bd8724p-55},
472 {0x1.74fffdf81116ap-1, -0x1.e02916ef101d2p-57},
473 {0x1.770000f679c9p-1, -0x1.7fc71cd549c74p-57},
474 {0x1.78ffffa7ec835p-1, 0x1.1bec19ef50483p-55},
475 {0x1.7affffe20c2e6p-1, -0x1.07e1729cc6465p-56},
476 {0x1.7cfffed3fc9p-1, -0x1.08072087b8b1cp-55},
477 {0x1.7efffe9261a76p-1, 0x1.dc0286d9df9aep-55},
478 {0x1.81000049ca3e8p-1, 0x1.97fd251e54c33p-55},
479 {0x1.8300017932c8fp-1, -0x1.afee9b630f381p-55},
480 {0x1.850000633739cp-1, 0x1.9bfbf6b6535bcp-55},
481 {0x1.87000204289c6p-1, -0x1.bbf65f3117b75p-55},
482 {0x1.88fffebf57904p-1, -0x1.9006ea23dcb57p-55},
483 {0x1.8b00022bc04dfp-1, -0x1.d00df38e04b0ap-56},
484 {0x1.8cfffe50c1b8ap-1, -0x1.8007146ff9f05p-55},
485 {0x1.8effffc918e43p-1, 0x1.3817bd07a7038p-55},
486 {0x1.910001efa5fc7p-1, 0x1.93e9176dfb403p-55},
487 {0x1.9300013467bb9p-1, 0x1.f804e4b980276p-56},
488 {0x1.94fffe6ee076fp-1, -0x1.f7ef0d9ff622ep-55},
489 {0x1.96fffde3c12d1p-1, -0x1.082aa962638bap-56},
490 {0x1.98ffff4458a0dp-1, -0x1.7801b9164a8efp-55},
491 {0x1.9afffdd982e3ep-1, -0x1.740e08a5a9337p-55},
492 {0x1.9cfffed49fb66p-1, 0x1.fce08c19bep-60},
493 {0x1.9f00020f19c51p-1, -0x1.a3faa27885b0ap-55},
494 {0x1.a10001145b006p-1, 0x1.4ff489958da56p-56},
495 {0x1.a300007bbf6fap-1, 0x1.cbeab8a2b6d18p-55},
496 {0x1.a500010971d79p-1, 0x1.8fecadd78793p-55},
497 {0x1.a70001df52e48p-1, -0x1.f41763dd8abdbp-55},
498 {0x1.a90001c593352p-1, -0x1.ebf0284c27612p-55},
499 {0x1.ab0002a4f3e4bp-1, -0x1.9fd043cff3f5fp-57},
500 {0x1.acfffd7ae1ed1p-1, -0x1.23ee7129070b4p-55},
501 {0x1.aefffee510478p-1, 0x1.a063ee00edea3p-57},
502 {0x1.b0fffdb650d5bp-1, 0x1.a06c8381f0ab9p-58},
503 {0x1.b2ffffeaaca57p-1, -0x1.9011e74233c1dp-56},
504 {0x1.b4fffd995badcp-1, -0x1.9ff1068862a9fp-56},
505 {0x1.b7000249e659cp-1, 0x1.aff45d0864f3ep-55},
506 {0x1.b8ffff987164p-1, 0x1.cfe7796c2c3f9p-56},
507 {0x1.bafffd204cb4fp-1, -0x1.3ff27eef22bc4p-57},
508 {0x1.bcfffd2415c45p-1, -0x1.cffb7ee3bea21p-57},
509 {0x1.beffff86309dfp-1, -0x1.14103972e0b5cp-55},
510 {0x1.c0fffe1b57653p-1, 0x1.bc16494b76a19p-55},
511 {0x1.c2ffff1fa57e3p-1, -0x1.4feef8d30c6edp-57},
512 {0x1.c4fffdcbfe424p-1, -0x1.43f68bcec4775p-55},
513 {0x1.c6fffed54b9f7p-1, 0x1.47ea3f053e0ecp-55},
514 {0x1.c8fffeb998fd5p-1, 0x1.383068df992f1p-56},
515 {0x1.cb0002125219ap-1, -0x1.8fd8e64180e04p-57},
516 {0x1.ccfffdd94469cp-1, 0x1.e7ebe1cc7ea72p-55},
517 {0x1.cefffeafdc476p-1, 0x1.ebe39ad9f88fep-55},
518 {0x1.d1000169af82bp-1, 0x1.57d91a8b95a71p-56},
519 {0x1.d30000d0ff71dp-1, 0x1.9c1906970c7dap-55},
520 {0x1.d4fffea790fc4p-1, -0x1.80e37c558fe0cp-58},
521 {0x1.d70002edc87e5p-1, -0x1.f80d64dc10f44p-56},
522 {0x1.d900021dc82aap-1, -0x1.47c8f94fd5c5cp-56},
523 {0x1.dafffd86b0283p-1, 0x1.c7f1dc521617ep-55},
524 {0x1.dd000296c4739p-1, 0x1.8019eb2ffb153p-55},
525 {0x1.defffe54490f5p-1, 0x1.e00d2c652cc89p-57},
526 {0x1.e0fffcdabf694p-1, -0x1.f8340202d69d2p-56},
527 {0x1.e2fffdb52c8ddp-1, 0x1.b00c1ca1b0864p-56},
528 {0x1.e4ffff24216efp-1, 0x1.2ffa8b094ab51p-56},
529 {0x1.e6fffe88a5e11p-1, -0x1.7f673b1efbe59p-58},
530 {0x1.e9000119eff0dp-1, -0x1.4808d5e0bc801p-55},
531 {0x1.eafffdfa51744p-1, 0x1.80006d54320b5p-56},
532 {0x1.ed0001a127fa1p-1, -0x1.002f860565c92p-58},
533 {0x1.ef00007babcc4p-1, -0x1.540445d35e611p-55},
534 {0x1.f0ffff57a8d02p-1, -0x1.ffb3139ef9105p-59},
535 {0x1.f30001ee58ac7p-1, 0x1.a81acf2731155p-55},
536 {0x1.f4ffff5823494p-1, 0x1.a3f41d4d7c743p-55},
537 {0x1.f6ffffca94c6bp-1, -0x1.202f41c987875p-57},
538 {0x1.f8fffe1f9c441p-1, 0x1.77dd1f477e74bp-56},
539 {0x1.fafffd2e0e37ep-1, -0x1.f01199a7ca331p-57},
540 {0x1.fd0001c77e49ep-1, 0x1.181ee4bceacb1p-56},
541 {0x1.feffff7e0c331p-1, -0x1.e05370170875ap-57},
542 {0x1.00ffff465606ep+0, -0x1.a7ead491c0adap-55},
543 {0x1.02ffff3867a58p+0, -0x1.77f69c3fcb2ep-54},
544 {0x1.04ffffdfc0d17p+0, 0x1.7bffe34cb945bp-54},
545 {0x1.0700003cd4d82p+0, 0x1.20083c0e456cbp-55},
546 {0x1.08ffff9f2cbe8p+0, -0x1.dffdfbe37751ap-57},
547 {0x1.0b000010cda65p+0, -0x1.13f7faee626ebp-54},
548 {0x1.0d00001a4d338p+0, 0x1.07dfa79489ff7p-55},
549 {0x1.0effffadafdfdp+0, -0x1.7040570d66bcp-56},
550 {0x1.110000bbafd96p+0, 0x1.e80d4846d0b62p-55},
551 {0x1.12ffffae5f45dp+0, 0x1.dbffa64fd36efp-54},
552 {0x1.150000dd59ad9p+0, 0x1.a0077701250aep-54},
553 {0x1.170000f21559ap+0, 0x1.dfdf9e2e3deeep-55},
554 {0x1.18ffffc275426p+0, 0x1.10030dc3b7273p-54},
555 {0x1.1b000123d3c59p+0, 0x1.97f7980030188p-54},
556 {0x1.1cffff8299eb7p+0, -0x1.5f932ab9f8c67p-57},
557 {0x1.1effff48ad4p+0, 0x1.37fbf9da75bebp-54},
558 {0x1.210000c8b86a4p+0, 0x1.f806b91fd5b22p-54},
559 {0x1.2300003854303p+0, 0x1.3ffc2eb9fbf33p-54},
560 {0x1.24fffffbcf684p+0, 0x1.601e77e2e2e72p-56},
561 {0x1.26ffff52921d9p+0, 0x1.ffcbb767f0c61p-56},
562 {0x1.2900014933a3cp+0, -0x1.202ca3c02412bp-56},
563 {0x1.2b00014556313p+0, -0x1.2808233f21f02p-54},
564 {0x1.2cfffebfe523bp+0, -0x1.8ff7e384fdcf2p-55},
565 {0x1.2f0000bb8ad96p+0, -0x1.5ff51503041c5p-55},
566 {0x1.30ffffb7ae2afp+0, -0x1.10071885e289dp-55},
567 {0x1.32ffffeac5f7fp+0, -0x1.1ff5d3fb7b715p-54},
568 {0x1.350000ca66756p+0, 0x1.57f82228b82bdp-54},
569 {0x1.3700011fbf721p+0, 0x1.000bac40dd5ccp-55},
570 {0x1.38ffff9592fb9p+0, -0x1.43f9d2db2a751p-54},
571 {0x1.3b00004ddd242p+0, 0x1.57f6b707638e1p-55},
572 {0x1.3cffff5b2c957p+0, 0x1.a023a10bf1231p-56},
573 {0x1.3efffeab0b418p+0, 0x1.87f6d66b152bp-54},
574 {0x1.410001532aff4p+0, 0x1.7f8375f198524p-57},
575 {0x1.4300017478b29p+0, 0x1.301e672dc5143p-55},
576 {0x1.44fffe795b463p+0, 0x1.9ff69b8b2895ap-55},
577 {0x1.46fffe80475ep+0, -0x1.5c0b19bc2f254p-54},
578 {0x1.48fffef6fc1e7p+0, 0x1.b4009f23a2a72p-54},
579 {0x1.4afffe5bea704p+0, -0x1.4ffb7bf0d7d45p-54},
580 {0x1.4d000171027dep+0, -0x1.9c06471dc6a3dp-54},
581 {0x1.4f0000ff03ee2p+0, 0x1.77f890b85531cp-54},
582 {0x1.5100012dc4bd1p+0, 0x1.004657166a436p-57},
583 {0x1.530001605277ap+0, -0x1.6bfcece233209p-54},
584 {0x1.54fffecdb704cp+0, -0x1.902720505a1d7p-55},
585 {0x1.56fffef5f54a9p+0, 0x1.bbfe60ec96412p-54},
586 {0x1.5900017e61012p+0, 0x1.87ec581afef9p-55},
587 {0x1.5b00003c93e92p+0, -0x1.f41080abf0ccp-54},
588 {0x1.5d0001d4919bcp+0, -0x1.8812afb254729p-54},
589 {0x1.5efffe7b87a89p+0, -0x1.47eb780ed6904p-54},
591 #endif
593 #define T __log_data.tab
594 #define T2 __log_data.tab2
595 #define B __log_data.poly1
596 #define A __log_data.poly
597 #define Ln2hi __log_data.ln2hi
598 #define Ln2lo __log_data.ln2lo
599 #define OFF 0x3fe6000000000000
600 /* Top 16 bits of a double. */
601 static inline u32 top16(f64 x)
603 return asu64(x) >> 48;
605 static f64 log(f64 x)
607 f64 w, z, r, r2, r3, y, invc, logc, kd, hi, lo;
608 u64 ix, iz, tmp;
609 u32 top;
610 s32 k, i;
612 ix = asu64(x);
613 top = top16(x);
614 #define LO asu64(1.0 - 0x1p-4)
615 #define HI asu64(1.0 + 0x1.09p-4)
616 if (predict_false(ix - LO < HI - LO)) {
617 f64 rhi;
618 f64 rlo;
620 /* Handle close to 1.0 inputs separately. */
621 /* Fix sign of zero with downward rounding when x==1. */
622 if (WANT_ROUNDING && predict_false(ix == asu64(1.0)))
623 return 0;
624 r = x - 1.0;
625 r2 = r * r;
626 r3 = r * r2;
627 y = r3 *
628 (B[1] + r * B[2] + r2 * B[3] +
629 r3 * (B[4] + r * B[5] + r2 * B[6] +
630 r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10])));
631 /* Worst-case error is around 0.507 ULP. */
632 w = r * 0x1p27;
633 rhi = r + w - w;
634 rlo = r - rhi;
635 w = rhi * rhi * B[0]; /* B[0] == -0.5. */
636 hi = r + w;
637 lo = r - hi + w;
638 lo += B[0] * rlo * (rhi + r);
639 y += lo;
640 y += hi;
641 return eval_as_f64(y);
643 if (predict_false(top - 0x0010 >= 0x7ff0 - 0x0010)) {
644 /* x < 0x1p-1022 or inf or nan. */
645 if (ix * 2 == 0)
646 return __math_divzero(1);
647 if (ix == asu64(INFINITY)) /* log(inf) == inf. */
648 return x;
649 if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
650 return __math_invalid(x);
651 /* x is subnormal, normalize it. */
652 ix = asu64(x * 0x1p52);
653 ix -= 52UL << 52;
656 /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
657 The range is split into N subintervals.
658 The ith subinterval contains z and c is near its center. */
659 tmp = ix - OFF;
660 i = (s32)((tmp >> (52 - LOG_TABLE_BITS)) % N);
661 k = (s32)((s64)tmp >> 52); /* arithmetic shift */
662 iz = ix - (tmp & 0xfffUL << 52);
663 invc = T[i].invc;
664 logc = T[i].logc;
665 z = asf64(iz);
667 /* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */
668 /* r ~= z/c - 1, |r| < 1/(2*N). */
669 #if __FP_FAST_FMA
670 /* rounding error: 0x1p-55/N. */
671 r = __builtin_fma(z, invc, -1.0);
672 #else
673 /* rounding error: 0x1p-55/N + 0x1p-66. */
674 r = (z - T2[i].chi - T2[i].clo) * invc;
675 #endif
676 kd = (f64)k;
678 /* hi + lo = r + log(c) + k*Ln2. */
679 w = kd * Ln2hi + logc;
680 hi = w + r;
681 lo = w - hi + r + kd * Ln2lo;
683 /* log(x) = lo + (log1p(r) - r) + hi. */
684 r2 = r * r; /* rounding error: 0x1p-54/N^2. */
685 /* Worst case error if |y| > 0x1p-5:
686 0.5 + 4.13/N + abs-poly-error*2^57 ULP (+ 0.002 ULP without fma)
687 Worst case error if |y| > 0x1p-4:
688 0.5 + 2.06/N + abs-poly-error*2^56 ULP (+ 0.001 ULP without fma). */
689 y = lo + r2 * A[0] +
690 r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])) + hi;
691 return eval_as_f64(y);
693 #undef LOG_TABLE_BITS
694 #undef LOG_POLY_ORDER
695 #undef LOG_POLY1_ORDER
696 #undef T
697 #undef T2
698 #undef B
699 #undef A
700 #undef Ln2hi
701 #undef Ln2lo
702 #undef N
703 #undef OFF
704 #undef LO
705 #undef HI
706 static f64 scalbn(f64 x, s32 n)
708 union {f64 f; u64 i;} u;
709 f64 y = x;
711 if (n > 1023) {
712 y *= 0x1p1023;
713 n -= 1023;
714 if (n > 1023) {
715 y *= 0x1p1023;
716 n -= 1023;
717 if (n > 1023)
718 n = 1023;
720 } else if (n < -1022) {
721 /* make sure final n < -53 to avoid double
722 rounding in the subnormal range */
723 y *= 0x1p-1022 * 0x1p53;
724 n += 1022 - 53;
725 if (n < -1022) {
726 y *= 0x1p-1022 * 0x1p53;
727 n += 1022 - 53;
728 if (n < -1022)
729 n = -1022;
732 u.i = (u64)(0x3ff+n)<<52;
733 x = y * u.f;
734 return x;
736 /* origin: FreeBSD /usr/src/lib/msun/src/k_rem_pio2.c */
738 * ====================================================
739 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
741 * Developed at SunSoft, a Sun Microsystems, Inc. business.
742 * Permission to use, copy, modify, and distribute this
743 * software is freely granted, provided that this notice
744 * is preserved.
745 * ====================================================
748 * __rem_pio2_large(x,y,e0,nx,prec)
749 * double x[],y[]; int e0,nx,prec;
751 * __rem_pio2_large return the last three digits of N with
752 * y = x - N*pi/2
753 * so that |y| < pi/2.
755 * The method is to compute the integer (mod 8) and fraction parts of
756 * (2/pi)*x without doing the full multiplication. In general we
757 * skip the part of the product that are known to be a huge integer (
758 * more accurately, = 0 mod 8 ). Thus the number of operations are
759 * independent of the exponent of the input.
761 * (2/pi) is represented by an array of 24-bit integers in ipio2[].
763 * Input parameters:
764 * x[] The input value (must be positive) is broken into nx
765 * pieces of 24-bit integers in double precision format.
766 * x[i] will be the i-th 24 bit of x. The scaled exponent
767 * of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
768 * match x's up to 24 bits.
770 * Example of breaking a double positive z into x[0]+x[1]+x[2]:
771 * e0 = ilogb(z)-23
772 * z = scalbn(z,-e0)
773 * for i = 0,1,2
774 * x[i] = floor(z)
775 * z = (z-x[i])*2**24
778 * y[] ouput result in an array of double precision numbers.
779 * The dimension of y[] is:
780 * 24-bit precision 1
781 * 53-bit precision 2
782 * 64-bit precision 2
783 * 113-bit precision 3
784 * The actual value is the sum of them. Thus for 113-bit
785 * precison, one may have to do something like:
787 * long double t,w,r_head, r_tail;
788 * t = (long double)y[2] + (long double)y[1];
789 * w = (long double)y[0];
790 * r_head = t+w;
791 * r_tail = w - (r_head - t);
793 * e0 The exponent of x[0]. Must be <= 16360 or you need to
794 * expand the ipio2 table.
796 * nx dimension of x[]
798 * prec an integer indicating the precision:
799 * 0 24 bits (single)
800 * 1 53 bits (double)
801 * 2 64 bits (extended)
802 * 3 113 bits (quad)
804 * External function:
805 * double scalbn(), floor();
808 * Here is the description of some local variables:
810 * jk jk+1 is the initial number of terms of ipio2[] needed
811 * in the computation. The minimum and recommended value
812 * for jk is 3,4,4,6 for single, double, extended, and quad.
813 * jk+1 must be 2 larger than you might expect so that our
814 * recomputation test works. (Up to 24 bits in the integer
815 * part (the 24 bits of it that we compute) and 23 bits in
816 * the fraction part may be lost to cancelation before we
817 * recompute.)
819 * jz local integer variable indicating the number of
820 * terms of ipio2[] used.
822 * jx nx - 1
824 * jv index for pointing to the suitable ipio2[] for the
825 * computation. In general, we want
826 * ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
827 * is an integer. Thus
828 * e0-3-24*jv >= 0 or (e0-3)/24 >= jv
829 * Hence jv = max(0,(e0-3)/24).
831 * jp jp+1 is the number of terms in PIo2[] needed, jp = jk.
833 * q[] double array with integral value, representing the
834 * 24-bits chunk of the product of x and 2/pi.
836 * q0 the corresponding exponent of q[0]. Note that the
837 * exponent for q[i] would be q0-24*i.
839 * PIo2[] double precision array, obtained by cutting pi/2
840 * into 24 bits chunks.
842 * f[] ipio2[] in floating point
844 * iq[] integer array by breaking up q[] in 24-bits chunk.
846 * fq[] final product of x*(2/pi) in fq[0],..,fq[jk]
848 * ih integer. If >0 it indicates q[] is >= 0.5, hence
849 * it also indicates the *sign* of the result.
853 * Constants:
854 * The hexadecimal values are the intended ones for the following
855 * constants. The decimal values may be used, provided that the
856 * compiler will convert from decimal to binary accurately enough
857 * to produce the hexadecimal values shown.
859 static s32 init_jk[] = {3,4,4,6}; /* initial value for jk */
861 * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
863 * integer array, contains the (24*i)-th to (24*i+23)-th
864 * bit of 2/pi after binary point. The corresponding
865 * floating value is
867 * ipio2[i] * 2^(-24(i+1)).
869 * NB: This table must have at least (e0-3)/24 + jk terms.
870 * For quad precision (e0 <= 16360, jk = 6), this is 686.
872 static s32 ipio2[] = {
873 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
874 0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
875 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
876 0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41,
877 0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8,
878 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,
879 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
880 0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08,
881 0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,
882 0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,
883 0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
885 #if LDBL_MAX_EXP > 1024
886 0x47C419, 0xC367CD, 0xDCE809, 0x2A8359, 0xC4768B, 0x961CA6,
887 0xDDAF44, 0xD15719, 0x053EA5, 0xFF0705, 0x3F7E33, 0xE832C2,
888 0xDE4F98, 0x327DBB, 0xC33D26, 0xEF6B1E, 0x5EF89F, 0x3A1F35,
889 0xCAF27F, 0x1D87F1, 0x21907C, 0x7C246A, 0xFA6ED5, 0x772D30,
890 0x433B15, 0xC614B5, 0x9D19C3, 0xC2C4AD, 0x414D2C, 0x5D000C,
891 0x467D86, 0x2D71E3, 0x9AC69B, 0x006233, 0x7CD2B4, 0x97A7B4,
892 0xD55537, 0xF63ED7, 0x1810A3, 0xFC764D, 0x2A9D64, 0xABD770,
893 0xF87C63, 0x57B07A, 0xE71517, 0x5649C0, 0xD9D63B, 0x3884A7,
894 0xCB2324, 0x778AD6, 0x23545A, 0xB91F00, 0x1B0AF1, 0xDFCE19,
895 0xFF319F, 0x6A1E66, 0x615799, 0x47FBAC, 0xD87F7E, 0xB76522,
896 0x89E832, 0x60BFE6, 0xCDC4EF, 0x09366C, 0xD43F5D, 0xD7DE16,
897 0xDE3B58, 0x929BDE, 0x2822D2, 0xE88628, 0x4D58E2, 0x32CAC6,
898 0x16E308, 0xCB7DE0, 0x50C017, 0xA71DF3, 0x5BE018, 0x34132E,
899 0x621283, 0x014883, 0x5B8EF5, 0x7FB0AD, 0xF2E91E, 0x434A48,
900 0xD36710, 0xD8DDAA, 0x425FAE, 0xCE616A, 0xA4280A, 0xB499D3,
901 0xF2A606, 0x7F775C, 0x83C2A3, 0x883C61, 0x78738A, 0x5A8CAF,
902 0xBDD76F, 0x63A62D, 0xCBBFF4, 0xEF818D, 0x67C126, 0x45CA55,
903 0x36D9CA, 0xD2A828, 0x8D61C2, 0x77C912, 0x142604, 0x9B4612,
904 0xC459C4, 0x44C5C8, 0x91B24D, 0xF31700, 0xAD43D4, 0xE54929,
905 0x10D5FD, 0xFCBE00, 0xCC941E, 0xEECE70, 0xF53E13, 0x80F1EC,
906 0xC3E7B3, 0x28F8C7, 0x940593, 0x3E71C1, 0xB3092E, 0xF3450B,
907 0x9C1288, 0x7B20AB, 0x9FB52E, 0xC29247, 0x2F327B, 0x6D550C,
908 0x90A772, 0x1FE76B, 0x96CB31, 0x4A1679, 0xE27941, 0x89DFF4,
909 0x9794E8, 0x84E6E2, 0x973199, 0x6BED88, 0x365F5F, 0x0EFDBB,
910 0xB49A48, 0x6CA467, 0x427271, 0x325D8D, 0xB8159F, 0x09E5BC,
911 0x25318D, 0x3974F7, 0x1C0530, 0x010C0D, 0x68084B, 0x58EE2C,
912 0x90AA47, 0x02E774, 0x24D6BD, 0xA67DF7, 0x72486E, 0xEF169F,
913 0xA6948E, 0xF691B4, 0x5153D1, 0xF20ACF, 0x339820, 0x7E4BF5,
914 0x6863B2, 0x5F3EDD, 0x035D40, 0x7F8985, 0x295255, 0xC06437,
915 0x10D86D, 0x324832, 0x754C5B, 0xD4714E, 0x6E5445, 0xC1090B,
916 0x69F52A, 0xD56614, 0x9D0727, 0x50045D, 0xDB3BB4, 0xC576EA,
917 0x17F987, 0x7D6B49, 0xBA271D, 0x296996, 0xACCCC6, 0x5414AD,
918 0x6AE290, 0x89D988, 0x50722C, 0xBEA404, 0x940777, 0x7030F3,
919 0x27FC00, 0xA871EA, 0x49C266, 0x3DE064, 0x83DD97, 0x973FA3,
920 0xFD9443, 0x8C860D, 0xDE4131, 0x9D3992, 0x8C70DD, 0xE7B717,
921 0x3BDF08, 0x2B3715, 0xA0805C, 0x93805A, 0x921110, 0xD8E80F,
922 0xAF806C, 0x4BFFDB, 0x0F9038, 0x761859, 0x15A562, 0xBBCB61,
923 0xB989C7, 0xBD4010, 0x04F2D2, 0x277549, 0xF6B6EB, 0xBB22DB,
924 0xAA140A, 0x2F2689, 0x768364, 0x333B09, 0x1A940E, 0xAA3A51,
925 0xC2A31D, 0xAEEDAF, 0x12265C, 0x4DC26D, 0x9C7A2D, 0x9756C0,
926 0x833F03, 0xF6F009, 0x8C402B, 0x99316D, 0x07B439, 0x15200C,
927 0x5BC3D8, 0xC492F5, 0x4BADC6, 0xA5CA4E, 0xCD37A7, 0x36A9E6,
928 0x9492AB, 0x6842DD, 0xDE6319, 0xEF8C76, 0x528B68, 0x37DBFC,
929 0xABA1AE, 0x3115DF, 0xA1AE00, 0xDAFB0C, 0x664D64, 0xB705ED,
930 0x306529, 0xBF5657, 0x3AFF47, 0xB9F96A, 0xF3BE75, 0xDF9328,
931 0x3080AB, 0xF68C66, 0x15CB04, 0x0622FA, 0x1DE4D9, 0xA4B33D,
932 0x8F1B57, 0x09CD36, 0xE9424E, 0xA4BE13, 0xB52333, 0x1AAAF0,
933 0xA8654F, 0xA5C1D2, 0x0F3F0B, 0xCD785B, 0x76F923, 0x048B7B,
934 0x721789, 0x53A6C6, 0xE26E6F, 0x00EBEF, 0x584A9B, 0xB7DAC4,
935 0xBA66AA, 0xCFCF76, 0x1D02D1, 0x2DF1B1, 0xC1998C, 0x77ADC3,
936 0xDA4886, 0xA05DF7, 0xF480C6, 0x2FF0AC, 0x9AECDD, 0xBC5C3F,
937 0x6DDED0, 0x1FC790, 0xB6DB2A, 0x3A25A3, 0x9AAF00, 0x9353AD,
938 0x0457B6, 0xB42D29, 0x7E804B, 0xA707DA, 0x0EAA76, 0xA1597B,
939 0x2A1216, 0x2DB7DC, 0xFDE5FA, 0xFEDB89, 0xFDBE89, 0x6C76E4,
940 0xFCA906, 0x70803E, 0x156E85, 0xFF87FD, 0x073E28, 0x336761,
941 0x86182A, 0xEABD4D, 0xAFE7B3, 0x6E6D8F, 0x396795, 0x5BBF31,
942 0x48D784, 0x16DF30, 0x432DC7, 0x356125, 0xCE70C9, 0xB8CB30,
943 0xFD6CBF, 0xA200A4, 0xE46C05, 0xA0DD5A, 0x476F21, 0xD21262,
944 0x845CB9, 0x496170, 0xE0566B, 0x015299, 0x375550, 0xB7D51E,
945 0xC4F133, 0x5F6E13, 0xE4305D, 0xA92E85, 0xC3B21D, 0x3632A1,
946 0xA4B708, 0xD4B1EA, 0x21F716, 0xE4698F, 0x77FF27, 0x80030C,
947 0x2D408D, 0xA0CD4F, 0x99A520, 0xD3A2B3, 0x0A5D2F, 0x42F9B4,
948 0xCBDA11, 0xD0BE7D, 0xC1DB9B, 0xBD17AB, 0x81A2CA, 0x5C6A08,
949 0x17552E, 0x550027, 0xF0147F, 0x8607E1, 0x640B14, 0x8D4196,
950 0xDEBE87, 0x2AFDDA, 0xB6256B, 0x34897B, 0xFEF305, 0x9EBFB9,
951 0x4F6A68, 0xA82A4A, 0x5AC44F, 0xBCF82D, 0x985AD7, 0x95C7F4,
952 0x8D4D0D, 0xA63A20, 0x5F57A4, 0xB13F14, 0x953880, 0x0120CC,
953 0x86DD71, 0xB6DEC9, 0xF560BF, 0x11654D, 0x6B0701, 0xACB08C,
954 0xD0C0B2, 0x485551, 0x0EFB1E, 0xC37295, 0x3B06A3, 0x3540C0,
955 0x7BDC06, 0xCC45E0, 0xFA294E, 0xC8CAD6, 0x41F3E8, 0xDE647C,
956 0xD8649B, 0x31BED9, 0xC397A4, 0xD45877, 0xC5E369, 0x13DAF0,
957 0x3C3ABA, 0x461846, 0x5F7555, 0xF5BDD2, 0xC6926E, 0x5D2EAC,
958 0xED440E, 0x423E1C, 0x87C461, 0xE9FD29, 0xF3D6E7, 0xCA7C22,
959 0x35916F, 0xC5E008, 0x8DD7FF, 0xE26A6E, 0xC6FDB0, 0xC10893,
960 0x745D7C, 0xB2AD6B, 0x9D6ECD, 0x7B723E, 0x6A11C6, 0xA9CFF7,
961 0xDF7329, 0xBAC9B5, 0x5100B7, 0x0DB2E2, 0x24BA74, 0x607DE5,
962 0x8AD874, 0x2C150D, 0x0C1881, 0x94667E, 0x162901, 0x767A9F,
963 0xBEFDFD, 0xEF4556, 0x367ED9, 0x13D9EC, 0xB9BA8B, 0xFC97C4,
964 0x27A831, 0xC36EF1, 0x36C594, 0x56A8D8, 0xB5A8B4, 0x0ECCCF,
965 0x2D8912, 0x34576F, 0x89562C, 0xE3CE99, 0xB920D6, 0xAA5E6B,
966 0x9C2A3E, 0xCC5F11, 0x4A0BFD, 0xFBF4E1, 0x6D3B8E, 0x2C86E2,
967 0x84D4E9, 0xA9B4FC, 0xD1EEEF, 0xC9352E, 0x61392F, 0x442138,
968 0xC8D91B, 0x0AFC81, 0x6A4AFB, 0xD81C2F, 0x84B453, 0x8C994E,
969 0xCC2254, 0xDC552A, 0xD6C6C0, 0x96190B, 0xB8701A, 0x649569,
970 0x605A26, 0xEE523F, 0x0F117F, 0x11B5F4, 0xF5CBFC, 0x2DBC34,
971 0xEEBC34, 0xCC5DE8, 0x605EDD, 0x9B8E67, 0xEF3392, 0xB817C9,
972 0x9B5861, 0xBC57E1, 0xC68351, 0x103ED8, 0x4871DD, 0xDD1C2D,
973 0xA118AF, 0x462C21, 0xD7F359, 0x987AD9, 0xC0549E, 0xFA864F,
974 0xFC0656, 0xAE79E5, 0x362289, 0x22AD38, 0xDC9367, 0xAAE855,
975 0x382682, 0x9BE7CA, 0xA40D51, 0xB13399, 0x0ED7A9, 0x480569,
976 0xF0B265, 0xA7887F, 0x974C88, 0x36D1F9, 0xB39221, 0x4A827B,
977 0x21CF98, 0xDC9F40, 0x5547DC, 0x3A74E1, 0x42EB67, 0xDF9DFE,
978 0x5FD45E, 0xA4677B, 0x7AACBA, 0xA2F655, 0x23882B, 0x55BA41,
979 0x086E59, 0x862A21, 0x834739, 0xE6E389, 0xD49EE5, 0x40FB49,
980 0xE956FF, 0xCA0F1C, 0x8A59C5, 0x2BFA94, 0xC5C1D3, 0xCFC50F,
981 0xAE5ADB, 0x86C547, 0x624385, 0x3B8621, 0x94792C, 0x876110,
982 0x7B4C2A, 0x1A2C80, 0x12BF43, 0x902688, 0x893C78, 0xE4C4A8,
983 0x7BDBE5, 0xC23AC4, 0xEAF426, 0x8A67F7, 0xBF920D, 0x2BA365,
984 0xB1933D, 0x0B7CBD, 0xDC51A4, 0x63DD27, 0xDDE169, 0x19949A,
985 0x9529A8, 0x28CE68, 0xB4ED09, 0x209F44, 0xCA984E, 0x638270,
986 0x237C7E, 0x32B90F, 0x8EF5A7, 0xE75614, 0x08F121, 0x2A9DB5,
987 0x4D7E6F, 0x5119A5, 0xABF9B5, 0xD6DF82, 0x61DD96, 0x023616,
988 0x9F3AC4, 0xA1A283, 0x6DED72, 0x7A8D39, 0xA9B882, 0x5C326B,
989 0x5B2746, 0xED3400, 0x7700D2, 0x55F4FC, 0x4D5901, 0x8071E0,
990 #endif
993 static f64 PIo2[] = {
994 1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
995 7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
996 5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
997 3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
998 1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
999 1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
1000 2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
1001 2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
1003 static s32 __rem_pio2_large(f64 *x, f64 *y, s32 e0, s32 nx, s32 prec)
1005 s32 jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
1006 f64 z,fw,f[20],fq[20],q[20];
1008 /* initialize jk*/
1009 jk = init_jk[prec];
1010 jp = jk;
1012 /* determine jx,jv,q0, note that 3>q0 */
1013 jx = nx-1;
1014 jv = (e0-3)/24; if(jv<0) jv=0;
1015 q0 = e0-24*(jv+1);
1017 /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
1018 j = jv-jx; m = jx+jk;
1019 for (i=0; i<=m; i++,j++)
1020 f[i] = j<0 ? 0.0 : (f64)ipio2[j];
1022 /* compute q[0],q[1],...q[jk] */
1023 for (i=0; i<=jk; i++) {
1024 for (j=0,fw=0.0; j<=jx; j++)
1025 fw += x[j]*f[jx+i-j];
1026 q[i] = fw;
1029 jz = jk;
1030 recompute:
1031 /* distill q[] into iq[] reversingly */
1032 for (i=0,j=jz,z=q[jz]; j>0; i++,j--) {
1033 fw = (f64)(s32)(0x1p-24*z);
1034 iq[i] = (s32)(z - 0x1p24*fw);
1035 z = q[j-1]+fw;
1038 /* compute n */
1039 z = scalbn(z,q0); /* actual value of z */
1040 z -= 8.0*floor(z*0.125); /* trim off integer >= 8 */
1041 n = (s32)z;
1042 z -= (f64)n;
1043 ih = 0;
1044 if (q0 > 0) { /* need iq[jz-1] to determine n */
1045 i = iq[jz-1]>>(24-q0); n += i;
1046 iq[jz-1] -= i<<(24-q0);
1047 ih = iq[jz-1]>>(23-q0);
1049 else if (q0 == 0) ih = iq[jz-1]>>23;
1050 else if (z >= 0.5) ih = 2;
1052 if (ih > 0) { /* q > 0.5 */
1053 n += 1; carry = 0;
1054 for (i=0; i<jz; i++) { /* compute 1-q */
1055 j = iq[i];
1056 if (carry == 0) {
1057 if (j != 0) {
1058 carry = 1;
1059 iq[i] = 0x1000000 - j;
1061 } else
1062 iq[i] = 0xffffff - j;
1064 if (q0 > 0) { /* rare case: chance is 1 in 12 */
1065 switch(q0) {
1066 case 1:
1067 iq[jz-1] &= 0x7fffff; break;
1068 case 2:
1069 iq[jz-1] &= 0x3fffff; break;
1072 if (ih == 2) {
1073 z = 1.0 - z;
1074 if (carry != 0)
1075 z -= scalbn(1.0,q0);
1079 /* check if recomputation is needed */
1080 if (z == 0.0) {
1081 j = 0;
1082 for (i=jz-1; i>=jk; i--) j |= iq[i];
1083 if (j == 0) { /* need recomputation */
1084 for (k=1; iq[jk-k]==0; k++); /* k = no. of terms needed */
1086 for (i=jz+1; i<=jz+k; i++) { /* add q[jz+1] to q[jz+k] */
1087 f[jx+i] = (f64)ipio2[jv+i];
1088 for (j=0,fw=0.0; j<=jx; j++)
1089 fw += x[j]*f[jx+i-j];
1090 q[i] = fw;
1092 jz += k;
1093 goto recompute;
1097 /* chop off zero terms */
1098 if (z == 0.0) {
1099 jz -= 1;
1100 q0 -= 24;
1101 while (iq[jz] == 0) {
1102 jz--;
1103 q0 -= 24;
1105 } else { /* break z into 24-bit if necessary */
1106 z = scalbn(z,-q0);
1107 if (z >= 0x1p24) {
1108 fw = (f64)(s32)(0x1p-24*z);
1109 iq[jz] = (s32)(z - 0x1p24*fw);
1110 jz += 1;
1111 q0 += 24;
1112 iq[jz] = (s32)fw;
1113 } else
1114 iq[jz] = (s32)z;
1117 /* convert integer "bit" chunk to floating-point value */
1118 fw = scalbn(1.0,q0);
1119 for (i=jz; i>=0; i--) {
1120 q[i] = fw*(f64)iq[i];
1121 fw *= 0x1p-24;
1124 /* compute PIo2[0,...,jp]*q[jz,...,0] */
1125 for(i=jz; i>=0; i--) {
1126 for (fw=0.0,k=0; k<=jp && k<=jz-i; k++)
1127 fw += PIo2[k]*q[i+k];
1128 fq[jz-i] = fw;
1131 /* compress fq[] into y[] */
1132 switch(prec) {
1133 case 0:
1134 fw = 0.0;
1135 for (i=jz; i>=0; i--)
1136 fw += fq[i];
1137 y[0] = ih==0 ? fw : -fw;
1138 break;
1139 case 1:
1140 case 2:
1141 fw = 0.0;
1142 for (i=jz; i>=0; i--)
1143 fw += fq[i];
1144 // TODO: drop excess precision here once double_t is used
1145 // XXX: our double_t is double
1146 //fw = (f64)fw;
1147 y[0] = ih==0 ? fw : -fw;
1148 fw = fq[0]-fw;
1149 for (i=1; i<=jz; i++)
1150 fw += fq[i];
1151 y[1] = ih==0 ? fw : -fw;
1152 break;
1153 case 3: /* painful */
1154 for (i=jz; i>0; i--) {
1155 fw = fq[i-1]+fq[i];
1156 fq[i] += fq[i-1]-fw;
1157 fq[i-1] = fw;
1159 for (i=jz; i>1; i--) {
1160 fw = fq[i-1]+fq[i];
1161 fq[i] += fq[i-1]-fw;
1162 fq[i-1] = fw;
1164 for (fw=0.0,i=jz; i>=2; i--)
1165 fw += fq[i];
1166 if (ih==0) {
1167 y[0] = fq[0]; y[1] = fq[1]; y[2] = fw;
1168 } else {
1169 y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;
1172 return n&7;
1174 /* origin: FreeBSD /usr/src/lib/msun/src/k_cos.c */
1176 * ====================================================
1177 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
1179 * Developed at SunSoft, a Sun Microsystems, Inc. business.
1180 * Permission to use, copy, modify, and distribute this
1181 * software is freely granted, provided that this notice
1182 * is preserved.
1183 * ====================================================
1186 * __cos( x, y )
1187 * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
1188 * Input x is assumed to be bounded by ~pi/4 in magnitude.
1189 * Input y is the tail of x.
1191 * Algorithm
1192 * 1. Since cos(-x) = cos(x), we need only to consider positive x.
1193 * 2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
1194 * 3. cos(x) is approximated by a polynomial of degree 14 on
1195 * [0,pi/4]
1196 * 4 14
1197 * cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
1198 * where the remez error is
1200 * | 2 4 6 8 10 12 14 | -58
1201 * |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2
1202 * | |
1204 * 4 6 8 10 12 14
1205 * 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then
1206 * cos(x) ~ 1 - x*x/2 + r
1207 * since cos(x+y) ~ cos(x) - sin(x)*y
1208 * ~ cos(x) - x*y,
1209 * a correction term is necessary in cos(x) and hence
1210 * cos(x+y) = 1 - (x*x/2 - (r - x*y))
1211 * For better accuracy, rearrange to
1212 * cos(x+y) ~ w + (tmp + (r-x*y))
1213 * where w = 1 - x*x/2 and tmp is a tiny correction term
1214 * (1 - x*x/2 == w + tmp exactly in infinite precision).
1215 * The exactness of w + tmp in infinite precision depends on w
1216 * and tmp having the same precision as x. If they have extra
1217 * precision due to compiler bugs, then the extra precision is
1218 * only good provided it is retained in all terms of the final
1219 * expression for cos(). Retention happens in all cases tested
1220 * under FreeBSD, so don't pessimize things by forcibly clipping
1221 * any extra precision in w.
1223 #define C1 4.16666666666666019037e-02 /* 0x3FA55555, 0x5555554C */
1224 #define C2 -1.38888888888741095749e-03 /* 0xBF56C16C, 0x16C15177 */
1225 #define C3 2.48015872894767294178e-05 /* 0x3EFA01A0, 0x19CB1590 */
1226 #define C4 -2.75573143513906633035e-07 /* 0xBE927E4F, 0x809C52AD */
1227 #define C5 2.08757232129817482790e-09 /* 0x3E21EE9E, 0xBDB4B1C4 */
1228 #define C6 -1.13596475577881948265e-11 /* 0xBDA8FAE9, 0xBE8838D4 */
1229 static f64 __cos(f64 x, f64 y)
1231 f64 hz,z,r,w;
1233 z = x*x;
1234 w = z*z;
1235 r = z*(C1+z*(C2+z*C3)) + w*w*(C4+z*(C5+z*C6));
1236 hz = 0.5*z;
1237 w = 1.0-hz;
1238 return w + (((1.0-w)-hz) + (z*r-x*y));
1240 #undef C1
1241 #undef C2
1242 #undef C3
1243 #undef C4
1244 #undef C5
1245 #undef C6
1246 /* origin: FreeBSD /usr/src/lib/msun/src/e_rem_pio2.c */
1248 * ====================================================
1249 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
1251 * Developed at SunSoft, a Sun Microsystems, Inc. business.
1252 * Permission to use, copy, modify, and distribute this
1253 * software is freely granted, provided that this notice
1254 * is preserved.
1255 * ====================================================
1257 * Optimized by Bruce D. Evans.
1259 /* __rem_pio2(x,y)
1261 * return the remainder of x rem pi/2 in y[0]+y[1]
1262 * use __rem_pio2_large() for large x
1265 * invpio2: 53 bits of 2/pi
1266 * pio2_1: first 33 bit of pi/2
1267 * pio2_1t: pi/2 - pio2_1
1268 * pio2_2: second 33 bit of pi/2
1269 * pio2_2t: pi/2 - (pio2_1+pio2_2)
1270 * pio2_3: third 33 bit of pi/2
1271 * pio2_3t: pi/2 - (pio2_1+pio2_2+pio2_3)
1273 #define toint 1.5/EPS
1274 #define pio4 0x1.921fb54442d18p-1
1275 #define invpio2 6.36619772367581382433e-01 /* 0x3FE45F30, 0x6DC9C883 */
1276 #define pio2_1 1.57079632673412561417e+00 /* 0x3FF921FB, 0x54400000 */
1277 #define pio2_1t 6.07710050650619224932e-11 /* 0x3DD0B461, 0x1A626331 */
1278 #define pio2_2 6.07710050630396597660e-11 /* 0x3DD0B461, 0x1A600000 */
1279 #define pio2_2t 2.02226624879595063154e-21 /* 0x3BA3198A, 0x2E037073 */
1280 #define pio2_3 2.02226624871116645580e-21 /* 0x3BA3198A, 0x2E000000 */
1281 #define pio2_3t 8.47842766036889956997e-32 /* 0x397B839A, 0x252049C1 */
1282 /* caller must handle the case when reduction is not needed: |x| ~<= pi/4 */
1283 static s32 __rem_pio2(f64 x, f64 *y)
1285 union {f64 f; u64 i;} u = {x};
1286 f64 z,w,t,r,fn;
1287 f64 tx[3],ty[2];
1288 u32 ix;
1289 s32 sign, n, ex, ey, i;
1291 sign = u.i>>63;
1292 ix = u.i>>32 & 0x7fffffff;
1293 if (ix <= 0x400f6a7a) { /* |x| ~<= 5pi/4 */
1294 if ((ix & 0xfffff) == 0x921fb) /* |x| ~= pi/2 or 2pi/2 */
1295 goto medium; /* cancellation -- use medium case */
1296 if (ix <= 0x4002d97c) { /* |x| ~<= 3pi/4 */
1297 if (!sign) {
1298 z = x - pio2_1; /* one round good to 85 bits */
1299 y[0] = z - pio2_1t;
1300 y[1] = (z-y[0]) - pio2_1t;
1301 return 1;
1302 } else {
1303 z = x + pio2_1;
1304 y[0] = z + pio2_1t;
1305 y[1] = (z-y[0]) + pio2_1t;
1306 return -1;
1308 } else {
1309 if (!sign) {
1310 z = x - 2*pio2_1;
1311 y[0] = z - 2*pio2_1t;
1312 y[1] = (z-y[0]) - 2*pio2_1t;
1313 return 2;
1314 } else {
1315 z = x + 2*pio2_1;
1316 y[0] = z + 2*pio2_1t;
1317 y[1] = (z-y[0]) + 2*pio2_1t;
1318 return -2;
1322 if (ix <= 0x401c463b) { /* |x| ~<= 9pi/4 */
1323 if (ix <= 0x4015fdbc) { /* |x| ~<= 7pi/4 */
1324 if (ix == 0x4012d97c) /* |x| ~= 3pi/2 */
1325 goto medium;
1326 if (!sign) {
1327 z = x - 3*pio2_1;
1328 y[0] = z - 3*pio2_1t;
1329 y[1] = (z-y[0]) - 3*pio2_1t;
1330 return 3;
1331 } else {
1332 z = x + 3*pio2_1;
1333 y[0] = z + 3*pio2_1t;
1334 y[1] = (z-y[0]) + 3*pio2_1t;
1335 return -3;
1337 } else {
1338 if (ix == 0x401921fb) /* |x| ~= 4pi/2 */
1339 goto medium;
1340 if (!sign) {
1341 z = x - 4*pio2_1;
1342 y[0] = z - 4*pio2_1t;
1343 y[1] = (z-y[0]) - 4*pio2_1t;
1344 return 4;
1345 } else {
1346 z = x + 4*pio2_1;
1347 y[0] = z + 4*pio2_1t;
1348 y[1] = (z-y[0]) + 4*pio2_1t;
1349 return -4;
1353 if (ix < 0x413921fb) { /* |x| ~< 2^20*(pi/2), medium size */
1354 medium:
1355 /* rint(x/(pi/2)) */
1356 fn = (f64)x*invpio2 + toint - toint;
1357 n = (s32)fn;
1358 r = x - fn*pio2_1;
1359 w = fn*pio2_1t; /* 1st round, good to 85 bits */
1360 /* Matters with directed rounding. */
1361 if (predict_false(r - w < -pio4)) {
1362 n--;
1363 fn--;
1364 r = x - fn*pio2_1;
1365 w = fn*pio2_1t;
1366 } else if (predict_false(r - w > pio4)) {
1367 n++;
1368 fn++;
1369 r = x - fn*pio2_1;
1370 w = fn*pio2_1t;
1372 y[0] = r - w;
1373 u.f = y[0];
1374 ey = u.i>>52 & 0x7ff;
1375 ex = ix>>20;
1376 if (ex - ey > 16) { /* 2nd round, good to 118 bits */
1377 t = r;
1378 w = fn*pio2_2;
1379 r = t - w;
1380 w = fn*pio2_2t - ((t-r)-w);
1381 y[0] = r - w;
1382 u.f = y[0];
1383 ey = u.i>>52 & 0x7ff;
1384 if (ex - ey > 49) { /* 3rd round, good to 151 bits, covers all cases */
1385 t = r;
1386 w = fn*pio2_3;
1387 r = t - w;
1388 w = fn*pio2_3t - ((t-r)-w);
1389 y[0] = r - w;
1392 y[1] = (r - y[0]) - w;
1393 return n;
1396 * all other (large) arguments
1398 if (ix >= 0x7ff00000) { /* x is inf or NaN */
1399 y[0] = y[1] = x - x;
1400 return 0;
1402 /* set z = scalbn(|x|,-ilogb(x)+23) */
1403 u.f = x;
1404 u.i &= (u64)-1>>12;
1405 u.i |= (u64)(0x3ff + 23)<<52;
1406 z = u.f;
1407 for (i=0; i < 2; i++) {
1408 tx[i] = (f64)(s32)z;
1409 z = (z-tx[i])*0x1p24;
1411 tx[i] = z;
1412 /* skip zero terms, first term is non-zero */
1413 while (tx[i] == 0.0)
1414 i--;
1415 n = __rem_pio2_large(tx,ty,(s32)(ix>>20)-(0x3ff+23),i+1,1);
1416 if (sign) {
1417 y[0] = -ty[0];
1418 y[1] = -ty[1];
1419 return -n;
1421 y[0] = ty[0];
1422 y[1] = ty[1];
1423 return n;
1425 #undef toint
1426 #undef pio4
1427 #undef invpio2
1428 #undef pio2_1
1429 #undef pio2_1t
1430 #undef pio2_2
1431 #undef pio2_2t
1432 #undef pio2_3
1433 #undef pio2_3t
1434 /* origin: FreeBSD /usr/src/lib/msun/src/k_sin.c */
1436 * ====================================================
1437 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
1439 * Developed at SunSoft, a Sun Microsystems, Inc. business.
1440 * Permission to use, copy, modify, and distribute this
1441 * software is freely granted, provided that this notice
1442 * is preserved.
1443 * ====================================================
1445 /* __sin( x, y, iy)
1446 * kernel sin function on ~[-pi/4, pi/4] (except on -0), pi/4 ~ 0.7854
1447 * Input x is assumed to be bounded by ~pi/4 in magnitude.
1448 * Input y is the tail of x.
1449 * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
1451 * Algorithm
1452 * 1. Since sin(-x) = -sin(x), we need only to consider positive x.
1453 * 2. Callers must return sin(-0) = -0 without calling here since our
1454 * odd polynomial is not evaluated in a way that preserves -0.
1455 * Callers may do the optimization sin(x) ~ x for tiny x.
1456 * 3. sin(x) is approximated by a polynomial of degree 13 on
1457 * [0,pi/4]
1458 * 3 13
1459 * sin(x) ~ x + S1*x + ... + S6*x
1460 * where
1462 * |sin(x) 2 4 6 8 10 12 | -58
1463 * |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2
1464 * | x |
1466 * 4. sin(x+y) = sin(x) + sin'(x')*y
1467 * ~ sin(x) + (1-x*x/2)*y
1468 * For better accuracy, let
1469 * 3 2 2 2 2
1470 * r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
1471 * then 3 2
1472 * sin(x) = x + (S1*x + (x *(r-y/2)+y))
1474 #define S1 -1.66666666666666324348e-01 /* 0xBFC55555, 0x55555549 */
1475 #define S2 8.33333333332248946124e-03 /* 0x3F811111, 0x1110F8A6 */
1476 #define S3 -1.98412698298579493134e-04 /* 0xBF2A01A0, 0x19C161D5 */
1477 #define S4 2.75573137070700676789e-06 /* 0x3EC71DE3, 0x57B1FE7D */
1478 #define S5 -2.50507602534068634195e-08 /* 0xBE5AE5E6, 0x8A2B9CEB */
1479 #define S6 1.58969099521155010221e-10 /* 0x3DE5D93A, 0x5ACFD57C */
1480 static f64 __sin(f64 x, f64 y, s32 iy)
1482 f64 z,r,v,w;
1484 z = x*x;
1485 w = z*z;
1486 r = S2 + z*(S3 + z*S4) + z*w*(S5 + z*S6);
1487 v = z*x;
1488 if (iy == 0)
1489 return x + v*(S1 + z*r);
1490 else
1491 return x - ((z*(0.5*y - v*r) - y) - v*S1);
1493 #undef S1
1494 #undef S2
1495 #undef S3
1496 #undef S4
1497 #undef S5
1498 #undef S6
1499 /* origin: FreeBSD /usr/src/lib/msun/src/s_cos.c */
1501 * ====================================================
1502 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
1504 * Developed at SunPro, a Sun Microsystems, Inc. business.
1505 * Permission to use, copy, modify, and distribute this
1506 * software is freely granted, provided that this notice
1507 * is preserved.
1508 * ====================================================
1510 /* cos(x)
1511 * Return cosine function of x.
1513 * kernel function:
1514 * __sin ... sine function on [-pi/4,pi/4]
1515 * __cos ... cosine function on [-pi/4,pi/4]
1516 * __rem_pio2 ... argument reduction routine
1518 * Method.
1519 * Let S,C and T denote the sin, cos and tan respectively on
1520 * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
1521 * in [-pi/4 , +pi/4], and let n = k mod 4.
1522 * We have
1524 * n sin(x) cos(x) tan(x)
1525 * ----------------------------------------------------------
1526 * 0 S C T
1527 * 1 C -S -1/T
1528 * 2 -S -C T
1529 * 3 -C S -1/T
1530 * ----------------------------------------------------------
1532 * Special cases:
1533 * Let trig be any of sin, cos, or tan.
1534 * trig(+-INF) is NaN, with signals;
1535 * trig(NaN) is that NaN;
1537 * Accuracy:
1538 * TRIG(x) returns trig(x) nearly rounded
1540 static f64 cos(f64 x)
1542 f64 y[2];
1543 u32 ix;
1544 u32 n;
1546 ix = asu64(x) >> 32; /* get high word */
1547 ix &= 0x7fffffff;
1549 /* |x| ~< pi/4 */
1550 if (ix <= 0x3fe921fb) {
1551 if (ix < 0x3e46a09e) { /* |x| < 2**-27 * sqrt(2) */
1552 /* raise inexact if x!=0 */
1553 fp_force_eval(x + 0x1p120f);
1554 return 1.0;
1556 return __cos(x, 0);
1559 /* cos(Inf or NaN) is NaN */
1560 if (ix >= 0x7ff00000)
1561 return x-x;
1563 /* argument reduction */
1564 n = __rem_pio2(x, y);
1565 switch (n&3) {
1566 case 0: return __cos(y[0], y[1]);
1567 case 1: return -__sin(y[0], y[1], 1);
1568 case 2: return -__cos(y[0], y[1]);
1569 default:
1570 return __sin(y[0], y[1], 1);
1573 /* origin: FreeBSD /usr/src/lib/msun/src/s_sin.c */
1575 * ====================================================
1576 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
1578 * Developed at SunPro, a Sun Microsystems, Inc. business.
1579 * Permission to use, copy, modify, and distribute this
1580 * software is freely granted, provided that this notice
1581 * is preserved.
1582 * ====================================================
1584 /* sin(x)
1585 * Return sine function of x.
1587 * kernel function:
1588 * __sin ... sine function on [-pi/4,pi/4]
1589 * __cos ... cose function on [-pi/4,pi/4]
1590 * __rem_pio2 ... argument reduction routine
1592 * Method.
1593 * Let S,C and T denote the sin, cos and tan respectively on
1594 * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
1595 * in [-pi/4 , +pi/4], and let n = k mod 4.
1596 * We have
1598 * n sin(x) cos(x) tan(x)
1599 * ----------------------------------------------------------
1600 * 0 S C T
1601 * 1 C -S -1/T
1602 * 2 -S -C T
1603 * 3 -C S -1/T
1604 * ----------------------------------------------------------
1606 * Special cases:
1607 * Let trig be any of sin, cos, or tan.
1608 * trig(+-INF) is NaN, with signals;
1609 * trig(NaN) is that NaN;
1611 * Accuracy:
1612 * TRIG(x) returns trig(x) nearly rounded
1614 static f64 sin(f64 x)
1616 f64 y[2];
1617 u32 ix;
1618 u32 n;
1620 /* High word of x. */
1621 ix = asu64(x) >> 32; /* get high word */
1622 ix &= 0x7fffffff;
1624 /* |x| ~< pi/4 */
1625 if (ix <= 0x3fe921fb) {
1626 if (ix < 0x3e500000) { /* |x| < 2**-26 */
1627 /* raise inexact if x != 0 and underflow if subnormal*/
1628 fp_force_eval(ix < 0x00100000 ? x/0x1p120f : x+0x1p120f);
1629 return x;
1631 return __sin(x, 0.0, 0);
1634 /* sin(Inf or NaN) is NaN */
1635 if (ix >= 0x7ff00000)
1636 return x - x;
1638 /* argument reduction needed */
1639 n = __rem_pio2(x, y);
1640 switch (n&3) {
1641 case 0: return __sin(y[0], y[1], 1);
1642 case 1: return __cos(y[0], y[1]);
1643 case 2: return -__sin(y[0], y[1], 1);
1644 default:
1645 return -__cos(y[0], y[1]);
1648 static f64 fabs(f64 x)
1650 union {f64 f; u64 i;} u;
1652 u.f = x;
1653 u.i &= -1UL/2;
1654 return u.f;
1656 /* origin: FreeBSD /usr/src/lib/msun/src/s_atan.c */
1658 * ====================================================
1659 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
1661 * Developed at SunPro, a Sun Microsystems, Inc. business.
1662 * Permission to use, copy, modify, and distribute this
1663 * software is freely granted, provided that this notice
1664 * is preserved.
1665 * ====================================================
1667 /* atan(x)
1668 * Method
1669 * 1. Reduce x to positive by atan(x) = -atan(-x).
1670 * 2. According to the integer k=4t+0.25 chopped, t=x, the argument
1671 * is further reduced to one of the following intervals and the
1672 * arctangent of t is evaluated by the corresponding formula:
1674 * [0,7/16] atan(x) = t-t^3*(a1+t^2*(a2+...(a10+t^2*a11)...)
1675 * [7/16,11/16] atan(x) = atan(1/2) + atan( (t-0.5)/(1+t/2) )
1676 * [11/16.19/16] atan(x) = atan( 1 ) + atan( (t-1)/(1+t) )
1677 * [19/16,39/16] atan(x) = atan(3/2) + atan( (t-1.5)/(1+1.5t) )
1678 * [39/16,INF] atan(x) = atan(INF) + atan( -1/t )
1680 * Constants:
1681 * The hexadecimal values are the intended ones for the following
1682 * constants. The decimal values may be used, provided that the
1683 * compiler will convert from decimal to binary accurately enough
1684 * to produce the hexadecimal values shown.
1686 static f64 atanhi[] = {
1687 4.63647609000806093515e-01, /* atan(0.5)hi 0x3FDDAC67, 0x0561BB4F */
1688 7.85398163397448278999e-01, /* atan(1.0)hi 0x3FE921FB, 0x54442D18 */
1689 9.82793723247329054082e-01, /* atan(1.5)hi 0x3FEF730B, 0xD281F69B */
1690 1.57079632679489655800e+00, /* atan(inf)hi 0x3FF921FB, 0x54442D18 */
1693 static f64 atanlo[] = {
1694 2.26987774529616870924e-17, /* atan(0.5)lo 0x3C7A2B7F, 0x222F65E2 */
1695 3.06161699786838301793e-17, /* atan(1.0)lo 0x3C81A626, 0x33145C07 */
1696 1.39033110312309984516e-17, /* atan(1.5)lo 0x3C700788, 0x7AF0CBBD */
1697 6.12323399573676603587e-17, /* atan(inf)lo 0x3C91A626, 0x33145C07 */
1700 static f64 aT[] = {
1701 3.33333333333329318027e-01, /* 0x3FD55555, 0x5555550D */
1702 -1.99999999998764832476e-01, /* 0xBFC99999, 0x9998EBC4 */
1703 1.42857142725034663711e-01, /* 0x3FC24924, 0x920083FF */
1704 -1.11111104054623557880e-01, /* 0xBFBC71C6, 0xFE231671 */
1705 9.09088713343650656196e-02, /* 0x3FB745CD, 0xC54C206E */
1706 -7.69187620504482999495e-02, /* 0xBFB3B0F2, 0xAF749A6D */
1707 6.66107313738753120669e-02, /* 0x3FB10D66, 0xA0D03D51 */
1708 -5.83357013379057348645e-02, /* 0xBFADDE2D, 0x52DEFD9A */
1709 4.97687799461593236017e-02, /* 0x3FA97B4B, 0x24760DEB */
1710 -3.65315727442169155270e-02, /* 0xBFA2B444, 0x2C6A6C2F */
1711 1.62858201153657823623e-02, /* 0x3F90AD3A, 0xE322DA11 */
1714 static f64 atan(f64 x)
1716 f64 w,s1,s2,z;
1717 u32 ix,sign;
1718 s32 id;
1720 ix = asu64(x) >> 32; /* get high word */
1721 sign = ix >> 31;
1722 ix &= 0x7fffffff;
1723 if (ix >= 0x44100000) { /* if |x| >= 2^66 */
1724 if (isnan(x))
1725 return x;
1726 z = atanhi[3] + 0x1p-120f;
1727 return sign ? -z : z;
1729 if (ix < 0x3fdc0000) { /* |x| < 0.4375 */
1730 if (ix < 0x3e400000) { /* |x| < 2^-27 */
1731 if (ix < 0x00100000)
1732 /* raise underflow for subnormal x */
1733 fp_force_eval_f32((f32)x);
1734 return x;
1736 id = -1;
1737 } else {
1738 x = fabs(x);
1739 if (ix < 0x3ff30000) { /* |x| < 1.1875 */
1740 if (ix < 0x3fe60000) { /* 7/16 <= |x| < 11/16 */
1741 id = 0;
1742 x = (2.0*x-1.0)/(2.0+x);
1743 } else { /* 11/16 <= |x| < 19/16 */
1744 id = 1;
1745 x = (x-1.0)/(x+1.0);
1747 } else {
1748 if (ix < 0x40038000) { /* |x| < 2.4375 */
1749 id = 2;
1750 x = (x-1.5)/(1.0+1.5*x);
1751 } else { /* 2.4375 <= |x| < 2^66 */
1752 id = 3;
1753 x = -1.0/x;
1757 /* end of argument reduction */
1758 z = x*x;
1759 w = z*z;
1760 /* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
1761 s1 = z*(aT[0]+w*(aT[2]+w*(aT[4]+w*(aT[6]+w*(aT[8]+w*aT[10])))));
1762 s2 = w*(aT[1]+w*(aT[3]+w*(aT[5]+w*(aT[7]+w*aT[9]))));
1763 if (id < 0)
1764 return x - x*(s1+s2);
1765 z = atanhi[id] - (x*(s1+s2) - atanlo[id] - x);
1766 return sign ? -z : z;
1769 * Shared data between exp, exp2 and pow.
1771 * Copyright (c) 2018, Arm Limited.
1772 * SPDX-License-Identifier: MIT
1775 * Copyright (c) 2018, Arm Limited.
1776 * SPDX-License-Identifier: MIT
1778 #define EXP_TABLE_BITS 7
1779 #define EXP_POLY_ORDER 5
1780 #define EXP_USE_TOINT_NARROW 0
1781 #define EXP2_POLY_ORDER 5
1782 #define N (1 << EXP_TABLE_BITS)
1783 static struct exp_data {
1784 f64 invln2N;
1785 f64 shift;
1786 f64 negln2hiN;
1787 f64 negln2loN;
1788 f64 poly[4]; /* Last four coefficients. */
1789 f64 exp2_shift;
1790 f64 exp2_poly[EXP2_POLY_ORDER];
1791 u64 tab[2*(1 << EXP_TABLE_BITS)];
1792 } __exp_data = {
1793 // N/ln2
1794 .invln2N = 0x1.71547652b82fep0 * N,
1795 // -ln2/N
1796 .negln2hiN = -0x1.62e42fefa0000p-8,
1797 .negln2loN = -0x1.cf79abc9e3b3ap-47,
1798 // Used for rounding when !TOINT_INTRINSICS
1799 #if EXP_USE_TOINT_NARROW
1800 .shift = 0x1800000000.8p0,
1801 #else
1802 .shift = 0x1.8p52,
1803 #endif
1804 // exp polynomial coefficients.
1805 .poly = {
1806 // abs error: 1.555*2^-66
1807 // ulp error: 0.509 (0.511 without fma)
1808 // if |x| < ln2/256+eps
1809 // abs error if |x| < ln2/256+0x1p-15: 1.09*2^-65
1810 // abs error if |x| < ln2/128: 1.7145*2^-56
1811 0x1.ffffffffffdbdp-2,
1812 0x1.555555555543cp-3,
1813 0x1.55555cf172b91p-5,
1814 0x1.1111167a4d017p-7,
1816 .exp2_shift = 0x1.8p52 / N,
1817 // exp2 polynomial coefficients.
1818 .exp2_poly = {
1819 // abs error: 1.2195*2^-65
1820 // ulp error: 0.507 (0.511 without fma)
1821 // if |x| < 1/256
1822 // abs error if |x| < 1/128: 1.9941*2^-56
1823 0x1.62e42fefa39efp-1,
1824 0x1.ebfbdff82c424p-3,
1825 0x1.c6b08d70cf4b5p-5,
1826 0x1.3b2abd24650ccp-7,
1827 0x1.5d7e09b4e3a84p-10,
1829 // 2^(k/N) ~= H[k]*(1 + T[k]) for int k in [0,N)
1830 // tab[2*k] = asuint64(T[k])
1831 // tab[2*k+1] = asuint64(H[k]) - (k << 52)/N
1832 .tab = {
1833 0x0, 0x3ff0000000000000,
1834 0x3c9b3b4f1a88bf6e, 0x3feff63da9fb3335,
1835 0xbc7160139cd8dc5d, 0x3fefec9a3e778061,
1836 0xbc905e7a108766d1, 0x3fefe315e86e7f85,
1837 0x3c8cd2523567f613, 0x3fefd9b0d3158574,
1838 0xbc8bce8023f98efa, 0x3fefd06b29ddf6de,
1839 0x3c60f74e61e6c861, 0x3fefc74518759bc8,
1840 0x3c90a3e45b33d399, 0x3fefbe3ecac6f383,
1841 0x3c979aa65d837b6d, 0x3fefb5586cf9890f,
1842 0x3c8eb51a92fdeffc, 0x3fefac922b7247f7,
1843 0x3c3ebe3d702f9cd1, 0x3fefa3ec32d3d1a2,
1844 0xbc6a033489906e0b, 0x3fef9b66affed31b,
1845 0xbc9556522a2fbd0e, 0x3fef9301d0125b51,
1846 0xbc5080ef8c4eea55, 0x3fef8abdc06c31cc,
1847 0xbc91c923b9d5f416, 0x3fef829aaea92de0,
1848 0x3c80d3e3e95c55af, 0x3fef7a98c8a58e51,
1849 0xbc801b15eaa59348, 0x3fef72b83c7d517b,
1850 0xbc8f1ff055de323d, 0x3fef6af9388c8dea,
1851 0x3c8b898c3f1353bf, 0x3fef635beb6fcb75,
1852 0xbc96d99c7611eb26, 0x3fef5be084045cd4,
1853 0x3c9aecf73e3a2f60, 0x3fef54873168b9aa,
1854 0xbc8fe782cb86389d, 0x3fef4d5022fcd91d,
1855 0x3c8a6f4144a6c38d, 0x3fef463b88628cd6,
1856 0x3c807a05b0e4047d, 0x3fef3f49917ddc96,
1857 0x3c968efde3a8a894, 0x3fef387a6e756238,
1858 0x3c875e18f274487d, 0x3fef31ce4fb2a63f,
1859 0x3c80472b981fe7f2, 0x3fef2b4565e27cdd,
1860 0xbc96b87b3f71085e, 0x3fef24dfe1f56381,
1861 0x3c82f7e16d09ab31, 0x3fef1e9df51fdee1,
1862 0xbc3d219b1a6fbffa, 0x3fef187fd0dad990,
1863 0x3c8b3782720c0ab4, 0x3fef1285a6e4030b,
1864 0x3c6e149289cecb8f, 0x3fef0cafa93e2f56,
1865 0x3c834d754db0abb6, 0x3fef06fe0a31b715,
1866 0x3c864201e2ac744c, 0x3fef0170fc4cd831,
1867 0x3c8fdd395dd3f84a, 0x3feefc08b26416ff,
1868 0xbc86a3803b8e5b04, 0x3feef6c55f929ff1,
1869 0xbc924aedcc4b5068, 0x3feef1a7373aa9cb,
1870 0xbc9907f81b512d8e, 0x3feeecae6d05d866,
1871 0xbc71d1e83e9436d2, 0x3feee7db34e59ff7,
1872 0xbc991919b3ce1b15, 0x3feee32dc313a8e5,
1873 0x3c859f48a72a4c6d, 0x3feedea64c123422,
1874 0xbc9312607a28698a, 0x3feeda4504ac801c,
1875 0xbc58a78f4817895b, 0x3feed60a21f72e2a,
1876 0xbc7c2c9b67499a1b, 0x3feed1f5d950a897,
1877 0x3c4363ed60c2ac11, 0x3feece086061892d,
1878 0x3c9666093b0664ef, 0x3feeca41ed1d0057,
1879 0x3c6ecce1daa10379, 0x3feec6a2b5c13cd0,
1880 0x3c93ff8e3f0f1230, 0x3feec32af0d7d3de,
1881 0x3c7690cebb7aafb0, 0x3feebfdad5362a27,
1882 0x3c931dbdeb54e077, 0x3feebcb299fddd0d,
1883 0xbc8f94340071a38e, 0x3feeb9b2769d2ca7,
1884 0xbc87deccdc93a349, 0x3feeb6daa2cf6642,
1885 0xbc78dec6bd0f385f, 0x3feeb42b569d4f82,
1886 0xbc861246ec7b5cf6, 0x3feeb1a4ca5d920f,
1887 0x3c93350518fdd78e, 0x3feeaf4736b527da,
1888 0x3c7b98b72f8a9b05, 0x3feead12d497c7fd,
1889 0x3c9063e1e21c5409, 0x3feeab07dd485429,
1890 0x3c34c7855019c6ea, 0x3feea9268a5946b7,
1891 0x3c9432e62b64c035, 0x3feea76f15ad2148,
1892 0xbc8ce44a6199769f, 0x3feea5e1b976dc09,
1893 0xbc8c33c53bef4da8, 0x3feea47eb03a5585,
1894 0xbc845378892be9ae, 0x3feea34634ccc320,
1895 0xbc93cedd78565858, 0x3feea23882552225,
1896 0x3c5710aa807e1964, 0x3feea155d44ca973,
1897 0xbc93b3efbf5e2228, 0x3feea09e667f3bcd,
1898 0xbc6a12ad8734b982, 0x3feea012750bdabf,
1899 0xbc6367efb86da9ee, 0x3fee9fb23c651a2f,
1900 0xbc80dc3d54e08851, 0x3fee9f7df9519484,
1901 0xbc781f647e5a3ecf, 0x3fee9f75e8ec5f74,
1902 0xbc86ee4ac08b7db0, 0x3fee9f9a48a58174,
1903 0xbc8619321e55e68a, 0x3fee9feb564267c9,
1904 0x3c909ccb5e09d4d3, 0x3feea0694fde5d3f,
1905 0xbc7b32dcb94da51d, 0x3feea11473eb0187,
1906 0x3c94ecfd5467c06b, 0x3feea1ed0130c132,
1907 0x3c65ebe1abd66c55, 0x3feea2f336cf4e62,
1908 0xbc88a1c52fb3cf42, 0x3feea427543e1a12,
1909 0xbc9369b6f13b3734, 0x3feea589994cce13,
1910 0xbc805e843a19ff1e, 0x3feea71a4623c7ad,
1911 0xbc94d450d872576e, 0x3feea8d99b4492ed,
1912 0x3c90ad675b0e8a00, 0x3feeaac7d98a6699,
1913 0x3c8db72fc1f0eab4, 0x3feeace5422aa0db,
1914 0xbc65b6609cc5e7ff, 0x3feeaf3216b5448c,
1915 0x3c7bf68359f35f44, 0x3feeb1ae99157736,
1916 0xbc93091fa71e3d83, 0x3feeb45b0b91ffc6,
1917 0xbc5da9b88b6c1e29, 0x3feeb737b0cdc5e5,
1918 0xbc6c23f97c90b959, 0x3feeba44cbc8520f,
1919 0xbc92434322f4f9aa, 0x3feebd829fde4e50,
1920 0xbc85ca6cd7668e4b, 0x3feec0f170ca07ba,
1921 0x3c71affc2b91ce27, 0x3feec49182a3f090,
1922 0x3c6dd235e10a73bb, 0x3feec86319e32323,
1923 0xbc87c50422622263, 0x3feecc667b5de565,
1924 0x3c8b1c86e3e231d5, 0x3feed09bec4a2d33,
1925 0xbc91bbd1d3bcbb15, 0x3feed503b23e255d,
1926 0x3c90cc319cee31d2, 0x3feed99e1330b358,
1927 0x3c8469846e735ab3, 0x3feede6b5579fdbf,
1928 0xbc82dfcd978e9db4, 0x3feee36bbfd3f37a,
1929 0x3c8c1a7792cb3387, 0x3feee89f995ad3ad,
1930 0xbc907b8f4ad1d9fa, 0x3feeee07298db666,
1931 0xbc55c3d956dcaeba, 0x3feef3a2b84f15fb,
1932 0xbc90a40e3da6f640, 0x3feef9728de5593a,
1933 0xbc68d6f438ad9334, 0x3feeff76f2fb5e47,
1934 0xbc91eee26b588a35, 0x3fef05b030a1064a,
1935 0x3c74ffd70a5fddcd, 0x3fef0c1e904bc1d2,
1936 0xbc91bdfbfa9298ac, 0x3fef12c25bd71e09,
1937 0x3c736eae30af0cb3, 0x3fef199bdd85529c,
1938 0x3c8ee3325c9ffd94, 0x3fef20ab5fffd07a,
1939 0x3c84e08fd10959ac, 0x3fef27f12e57d14b,
1940 0x3c63cdaf384e1a67, 0x3fef2f6d9406e7b5,
1941 0x3c676b2c6c921968, 0x3fef3720dcef9069,
1942 0xbc808a1883ccb5d2, 0x3fef3f0b555dc3fa,
1943 0xbc8fad5d3ffffa6f, 0x3fef472d4a07897c,
1944 0xbc900dae3875a949, 0x3fef4f87080d89f2,
1945 0x3c74a385a63d07a7, 0x3fef5818dcfba487,
1946 0xbc82919e2040220f, 0x3fef60e316c98398,
1947 0x3c8e5a50d5c192ac, 0x3fef69e603db3285,
1948 0x3c843a59ac016b4b, 0x3fef7321f301b460,
1949 0xbc82d52107b43e1f, 0x3fef7c97337b9b5f,
1950 0xbc892ab93b470dc9, 0x3fef864614f5a129,
1951 0x3c74b604603a88d3, 0x3fef902ee78b3ff6,
1952 0x3c83c5ec519d7271, 0x3fef9a51fbc74c83,
1953 0xbc8ff7128fd391f0, 0x3fefa4afa2a490da,
1954 0xbc8dae98e223747d, 0x3fefaf482d8e67f1,
1955 0x3c8ec3bc41aa2008, 0x3fefba1bee615a27,
1956 0x3c842b94c3a9eb32, 0x3fefc52b376bba97,
1957 0x3c8a64a931d185ee, 0x3fefd0765b6e4540,
1958 0xbc8e37bae43be3ed, 0x3fefdbfdad9cbe14,
1959 0x3c77893b4d91cd9d, 0x3fefe7c1819e90d8,
1960 0x3c5305c14160cc89, 0x3feff3c22b8f71f1,
1964 * Double-precision e^x function.
1966 * Copyright (c) 2018, Arm Limited.
1967 * SPDX-License-Identifier: MIT
1969 #define N (1 << EXP_TABLE_BITS)
1970 #define InvLn2N __exp_data.invln2N
1971 #define NegLn2hiN __exp_data.negln2hiN
1972 #define NegLn2loN __exp_data.negln2loN
1973 #define Shift __exp_data.shift
1974 #define T __exp_data.tab
1975 #define C2 __exp_data.poly[5 - EXP_POLY_ORDER]
1976 #define C3 __exp_data.poly[6 - EXP_POLY_ORDER]
1977 #define C4 __exp_data.poly[7 - EXP_POLY_ORDER]
1978 #define C5 __exp_data.poly[8 - EXP_POLY_ORDER]
1979 /* Handle cases that may overflow or underflow when computing the result that
1980 is scale*(1+TMP) without intermediate rounding. The bit representation of
1981 scale is in SBITS, however it has a computed exponent that may have
1982 overflown into the sign bit so that needs to be adjusted before using it as
1983 a double. (int32_t)KI is the k used in the argument reduction and exponent
1984 adjustment of scale, positive k here means the result may overflow and
1985 negative k means the result may underflow. */
1986 static f64 exp_specialcase(f64 tmp, u64 sbits, u64 ki)
1988 f64 scale, y;
1990 if ((ki & 0x80000000) == 0) {
1991 /* k > 0, the exponent of scale might have overflowed by <= 460. */
1992 sbits -= 1009ul << 52;
1993 scale = asf64(sbits);
1994 y = 0x1p1009 * (scale + scale * tmp);
1995 return eval_as_f64(y);
1997 /* k < 0, need special care in the subnormal range. */
1998 sbits += 1022ul << 52;
1999 scale = asf64(sbits);
2000 y = scale + scale * tmp;
2001 if (y < 1.0) {
2002 /* Round y to the right precision before scaling it into the subnormal
2003 range to avoid double rounding that can cause 0.5+E/2 ulp error where
2004 E is the worst-case ulp error outside the subnormal range. So this
2005 is only useful if the goal is better than 1 ulp worst-case error. */
2006 f64 hi, lo;
2007 lo = scale - y + scale * tmp;
2008 hi = 1.0 + y;
2009 lo = 1.0 - hi + y + lo;
2010 y = eval_as_f64(hi + lo) - 1.0;
2011 /* Avoid -0.0 with downward rounding. */
2012 if (WANT_ROUNDING && y == 0.0)
2013 y = 0.0;
2014 /* The underflow exception needs to be signaled explicitly. */
2015 fp_force_eval(fp_barrier(0x1p-1022) * 0x1p-1022);
2017 y = 0x1p-1022 * y;
2018 return eval_as_f64(y);
2020 /* Top 12 bits of a double (sign and exponent bits). */
2021 static u32 top12(f64 x)
2023 return asu64(x) >> 52;
2025 static f64 exp(f64 x)
2027 u32 abstop;
2028 u64 ki, idx, top, sbits;
2029 f64 kd, z, r, r2, scale, tail, tmp;
2031 abstop = top12(x) & 0x7ff;
2032 if (predict_false(abstop - top12(0x1p-54) >= top12(512.0) - top12(0x1p-54))) {
2033 if (abstop - top12(0x1p-54) >= 0x80000000)
2034 /* Avoid spurious underflow for tiny x. */
2035 /* Note: 0 is common input. */
2036 return WANT_ROUNDING ? 1.0 + x : 1.0;
2037 if (abstop >= top12(1024.0)) {
2038 if (asu64(x) == asu64(-INFINITY))
2039 return 0.0;
2040 if (abstop >= top12(INFINITY))
2041 return 1.0 + x;
2042 if (asu64(x) >> 63)
2043 return __math_uflow(0);
2044 else
2045 return __math_oflow(0);
2047 /* Large x is special cased below. */
2048 abstop = 0;
2051 /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
2052 /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
2053 z = InvLn2N * x;
2054 #if TOINT_INTRINSICS
2055 kd = roundtoint(z);
2056 ki = converttoint(z);
2057 #elif EXP_USE_TOINT_NARROW
2058 /* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */
2059 kd = eval_as_double(z + Shift);
2060 ki = asuint64(kd) >> 16;
2061 kd = (double_t)(int32_t)ki;
2062 #else
2063 /* z - kd is in [-1, 1] in non-nearest rounding modes. */
2064 kd = eval_as_f64(z + Shift);
2065 ki = asu64(kd);
2066 kd -= Shift;
2067 #endif
2068 r = x + kd * NegLn2hiN + kd * NegLn2loN;
2069 /* 2^(k/N) ~= scale * (1 + tail). */
2070 idx = 2 * (ki % N);
2071 top = ki << (52 - EXP_TABLE_BITS);
2072 tail = asf64(T[idx]);
2073 /* This is only a valid scale when -1023*N < k < 1024*N. */
2074 sbits = T[idx + 1] + top;
2075 /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
2076 /* Evaluation is optimized assuming superscalar pipelined execution. */
2077 r2 = r * r;
2078 /* Without fma the worst case error is 0.25/N ulp larger. */
2079 /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
2080 tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
2081 if (predict_false(abstop == 0))
2082 return exp_specialcase(tmp, sbits, ki);
2083 scale = asf64(sbits);
2084 /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
2085 is no spurious underflow here even without fma. */
2086 return eval_as_f64(scale + scale * tmp);
2088 #undef EXP_TABLE_BITS
2089 #undef EXP_POLY_ORDER
2090 #undef EXP_USE_TOINT_NARROW
2091 #undef EXP2_POLY_ORDER
2092 #undef N
2093 #undef InvLn2N
2094 #undef NegLn2hiN
2095 #undef NegLn2loN
2096 #undef Shift
2097 #undef T
2098 #undef C2
2099 #undef C3
2100 #undef C4
2101 #undef C5
2102 /*----------------------------------------------------------------------------*/
2103 #undef u32
2104 #undef s32
2105 #undef s64
2106 #undef u64
2107 #undef f64
2108 #undef WANT_ROUNDING
2109 #undef asu64
2110 #undef asf64
2111 #undef predict_true
2112 #undef predict_false
2113 #undef NAN
2114 #undef INFINITY
2115 #undef EPS
2116 #undef isnan