2 * Double-precision log2(x) function.
4 * Copyright (c) 2018, Arm Limited.
5 * SPDX-License-Identifier: MIT
11 #include "log2_data.h"
13 #define T __log2_data.tab
14 #define T2 __log2_data.tab2
15 #define B __log2_data.poly1
16 #define A __log2_data.poly
17 #define InvLn2hi __log2_data.invln2hi
18 #define InvLn2lo __log2_data.invln2lo
19 #define N (1 << LOG2_TABLE_BITS)
20 #define OFF 0x3fe6000000000000
22 /* Top 16 bits of a double. */
23 static inline uint32_t top16(double x
)
25 return asuint64(x
) >> 48;
30 double_t z
, r
, r2
, r4
, y
, invc
, logc
, kd
, hi
, lo
, t1
, t2
, t3
, p
;
37 #define LO asuint64(1.0 - 0x1.5b51p-5)
38 #define HI asuint64(1.0 + 0x1.6ab2p-5)
39 if (predict_false(ix
- LO
< HI
- LO
)) {
40 /* Handle close to 1.0 inputs separately. */
41 /* Fix sign of zero with downward rounding when x==1. */
42 if (WANT_ROUNDING
&& predict_false(ix
== asuint64(1.0)))
47 lo
= r
* InvLn2lo
+ __builtin_fma(r
, InvLn2hi
, -hi
);
50 rhi
= asdouble(asuint64(r
) & -1ULL << 32);
53 lo
= rlo
* InvLn2hi
+ r
* InvLn2lo
;
55 r2
= r
* r
; /* rounding error: 0x1p-62. */
57 /* Worst-case error is less than 0.54 ULP (0.55 ULP without fma). */
58 p
= r2
* (B
[0] + r
* B
[1]);
61 lo
+= r4
* (B
[2] + r
* B
[3] + r2
* (B
[4] + r
* B
[5]) +
62 r4
* (B
[6] + r
* B
[7] + r2
* (B
[8] + r
* B
[9])));
64 return eval_as_double(y
);
66 if (predict_false(top
- 0x0010 >= 0x7ff0 - 0x0010)) {
67 /* x < 0x1p-1022 or inf or nan. */
69 return __math_divzero(1);
70 if (ix
== asuint64(INFINITY
)) /* log(inf) == inf. */
72 if ((top
& 0x8000) || (top
& 0x7ff0) == 0x7ff0)
73 return __math_invalid(x
);
74 /* x is subnormal, normalize it. */
75 ix
= asuint64(x
* 0x1p
52);
79 /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
80 The range is split into N subintervals.
81 The ith subinterval contains z and c is near its center. */
83 i
= (tmp
>> (52 - LOG2_TABLE_BITS
)) % N
;
84 k
= (int64_t)tmp
>> 52; /* arithmetic shift */
85 iz
= ix
- (tmp
& 0xfffULL
<< 52);
91 /* log2(x) = log2(z/c) + log2(c) + k. */
92 /* r ~= z/c - 1, |r| < 1/(2*N). */
94 /* rounding error: 0x1p-55/N. */
95 r
= __builtin_fma(z
, invc
, -1.0);
97 t2
= r
* InvLn2lo
+ __builtin_fma(r
, InvLn2hi
, -t1
);
100 /* rounding error: 0x1p-55/N + 0x1p-65. */
101 r
= (z
- T2
[i
].chi
- T2
[i
].clo
) * invc
;
102 rhi
= asdouble(asuint64(r
) & -1ULL << 32);
105 t2
= rlo
* InvLn2hi
+ r
* InvLn2lo
;
108 /* hi + lo = r/ln2 + log2(c) + k. */
111 lo
= t3
- hi
+ t1
+ t2
;
113 /* log2(r+1) = r/ln2 + r^2*poly(r). */
114 /* Evaluation is optimized assuming superscalar pipelined execution. */
115 r2
= r
* r
; /* rounding error: 0x1p-54/N^2. */
117 /* Worst-case error if |y| > 0x1p-4: 0.547 ULP (0.550 ULP without fma).
118 ~ 0.5 + 2/N/ln2 + abs-poly-error*0x1p56 ULP (+ 0.003 ULP without fma). */
119 p
= A
[0] + r
* A
[1] + r2
* (A
[2] + r
* A
[3]) + r4
* (A
[4] + r
* A
[5]);
120 y
= lo
+ r2
* p
+ hi
;
121 return eval_as_double(y
);