1 // Copyright 2010 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
7 // The original C code, the long comment, and the constants
8 // below are from FreeBSD's /usr/src/lib/msun/src/s_expm1.c
9 // and came with this notice. The go code is a simplified
10 // version of the original C.
12 // ====================================================
13 // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
15 // Developed at SunPro, a Sun Microsystems, Inc. business.
16 // Permission to use, copy, modify, and distribute this
17 // software is freely granted, provided that this notice
19 // ====================================================
22 // Returns exp(x)-1, the exponential of x minus 1.
25 // 1. Argument reduction:
26 // Given x, find r and integer k such that
28 // x = k*ln2 + r, |r| <= 0.5*ln2 ~ 0.34658
30 // Here a correction term c will be computed to compensate
31 // the error in r when rounded to a floating-point number.
33 // 2. Approximating expm1(r) by a special rational function on
34 // the interval [0,0.34658]:
36 // r*(exp(r)+1)/(exp(r)-1) = 2+ r**2/6 - r**4/360 + ...
37 // we define R1(r*r) by
38 // r*(exp(r)+1)/(exp(r)-1) = 2+ r**2/6 * R1(r*r)
40 // R1(r**2) = 6/r *((exp(r)+1)/(exp(r)-1) - 2/r)
41 // = 6/r * ( 1 + 2.0*(1/(exp(r)-1) - 1/r))
42 // = 1 - r**2/60 + r**4/2520 - r**6/100800 + ...
43 // We use a special Reme algorithm on [0,0.347] to generate
44 // a polynomial of degree 5 in r*r to approximate R1. The
45 // maximum error of this polynomial approximation is bounded
46 // by 2**-61. In other words,
47 // R1(z) ~ 1.0 + Q1*z + Q2*z**2 + Q3*z**3 + Q4*z**4 + Q5*z**5
48 // where Q1 = -1.6666666666666567384E-2,
49 // Q2 = 3.9682539681370365873E-4,
50 // Q3 = -9.9206344733435987357E-6,
51 // Q4 = 2.5051361420808517002E-7,
52 // Q5 = -6.2843505682382617102E-9;
53 // (where z=r*r, and the values of Q1 to Q5 are listed below)
54 // with error bounded by
56 // | 1.0+Q1*z+...+Q5*z - R1(z) | <= 2
59 // expm1(r) = exp(r)-1 is then computed by the following
60 // specific way which minimize the accumulation rounding error:
62 // r r [ 3 - (R1 + R1*r/2) ]
63 // expm1(r) = r + --- + --- * [--------------------]
64 // 2 2 [ 6 - r*(3 - R1*r/2) ]
66 // To compensate the error in the argument reduction, we use
67 // expm1(r+c) = expm1(r) + c + expm1(r)*c
68 // ~ expm1(r) + c + r*c
69 // Thus c+r*c will be added in as the correction terms for
70 // expm1(r+c). Now rearrange the term to avoid optimization
73 // ({ ( r [ R1 - (3 - R1*r/2) ] ) } r )
74 // expm1(r+c)~r - ({r*(--- * [--------------------]-c)-c} - --- )
75 // ({ ( 2 [ 6 - r*(3 - R1*r/2) ] ) } 2 )
79 // 3. Scale back to obtain expm1(x):
80 // From step 1, we have
81 // expm1(x) = either 2**k*[expm1(r)+1] - 1
82 // = or 2**k*[expm1(r) + (1-2**-k)]
83 // 4. Implementation notes:
84 // (A). To save one multiplication, we scale the coefficient Qi
85 // to Qi*2**i, and replace z by (x**2)/2.
86 // (B). To achieve maximum accuracy, we compute expm1(x) by
87 // (i) if x < -56*ln2, return -1.0, (raise inexact if x!=inf)
88 // (ii) if k=0, return r-E
89 // (iii) if k=-1, return 0.5*(r-E)-0.5
90 // (iv) if k=1 if r < -0.25, return 2*((r+0.5)- E)
91 // else return 1.0+2.0*(r-E);
92 // (v) if (k<-2||k>56) return 2**k(1-(E-r)) - 1 (or exp(x)-1)
93 // (vi) if k <= 20, return 2**k((1-2**-k)-(E-r)), else
94 // (vii) return 2**k(1-((E+2**-k)-r))
97 // expm1(INF) is INF, expm1(NaN) is NaN;
98 // expm1(-INF) is -1, and
99 // for finite argument, only expm1(0)=0 is exact.
102 // according to an error analysis, the error is always less than
103 // 1 ulp (unit in the last place).
107 // if x > 7.09782712893383973096e+02 then expm1(x) overflow
110 // The hexadecimal values are the intended ones for the following
111 // constants. The decimal values may be used, provided that the
112 // compiler will convert from decimal to binary accurately enough
113 // to produce the hexadecimal values shown.
116 // Expm1 returns e**x - 1, the base-e exponential of x minus 1.
117 // It is more accurate than Exp(x) - 1 when x is near zero.
119 // Special cases are:
120 // Expm1(+Inf) = +Inf
123 // Very large values overflow to -1 or +Inf.
126 func libc_expm1(float64) float64
128 func Expm1(x
float64) float64 {
132 func expm1(x
float64) float64 {
134 Othreshold
= 7.09782712893383973096e+02 // 0x40862E42FEFA39EF
135 Ln2X56
= 3.88162421113569373274e+01 // 0x4043687a9f1af2b1
136 Ln2HalfX3
= 1.03972077083991796413e+00 // 0x3ff0a2b23f3bab73
137 Ln2Half
= 3.46573590279972654709e-01 // 0x3fd62e42fefa39ef
138 Ln2Hi
= 6.93147180369123816490e-01 // 0x3fe62e42fee00000
139 Ln2Lo
= 1.90821492927058770002e-10 // 0x3dea39ef35793c76
140 InvLn2
= 1.44269504088896338700e+00 // 0x3ff71547652b82fe
141 Tiny
= 1.0 / (1 << 54) // 2**-54 = 0x3c90000000000000
142 // scaled coefficients related to expm1
143 Q1
= -3.33333333333331316428e-02 // 0xBFA11111111110F4
144 Q2
= 1.58730158725481460165e-03 // 0x3F5A01A019FE5585
145 Q3
= -7.93650757867487942473e-05 // 0xBF14CE199EAADBB7
146 Q4
= 4.00821782732936239552e-06 // 0x3ED0CFCA86E65239
147 Q5
= -2.01099218183624371326e-07 // 0xBE8AFDB76E09C32D
152 case IsInf(x
, 1) ||
IsNaN(x
):
165 // filter out huge argument
166 if absx
>= Ln2X56
{ // if |x| >= 56 * ln2
167 if absx
>= Othreshold
{ // if |x| >= 709.78...
168 return Inf(1) // overflow
171 return -1 // x < -56*ln2, return -1.0
175 // argument reduction
178 if absx
> Ln2Half
{ // if |x| > 0.5 * ln2
180 if absx
< Ln2HalfX3
{ // and |x| < 1.5 * ln2
192 k
= int(InvLn2
*x
+ 0.5)
194 k
= int(InvLn2
*x
- 0.5)
197 hi
= x
- t
*Ln2Hi
// t * Ln2Hi is exact here
202 } else if absx
< Tiny
{ // when |x| < 2**-54, return x
208 // x is now in primary range
211 r1
:= 1 + hxs
*(Q1
+hxs
*(Q2
+hxs
*(Q3
+hxs
*(Q4
+hxs
*Q5
))))
213 e
:= hxs
* ((r1
- t
) / (6.0 - x
*t
))
219 return 0.5*(x
-e
) - 0.5
222 return -2 * (e
- (x
+ 0.5))
225 case k
<= -2 || k
> 56: // suffice to return exp(x)-1
227 y
= Float64frombits(Float64bits(y
) + uint64(k
)<<52) // add k to y's exponent
231 t
:= Float64frombits(0x3ff0000000000000 - (0x20000000000000 >> uint(k
))) // t=1-2**-k
233 y
= Float64frombits(Float64bits(y
) + uint64(k
)<<52) // add k to y's exponent
236 t
:= Float64frombits(uint64((0x3ff - k
) << 52)) // 2**-k
239 y
= Float64frombits(Float64bits(y
) + uint64(k
)<<52) // add k to y's exponent
242 return x
- (x
*e
- hxs
) // c is 0