libgo/go/math/expm1.go

   1 // Copyright 2010 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package math
   6
   7 // The original C code, the long comment, and the constants
   8 // below are from FreeBSD's /usr/src/lib/msun/src/s_expm1.c
   9 // and came with this notice. The go code is a simplified
  10 // version of the original C.
  11 //
  12 // ====================================================
  13 // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
  14 //
  15 // Developed at SunPro, a Sun Microsystems, Inc. business.
  16 // Permission to use, copy, modify, and distribute this
  17 // software is freely granted, provided that this notice
  18 // is preserved.
  19 // ====================================================
  20 //
  21 // expm1(x)
  22 // Returns exp(x)-1, the exponential of x minus 1.
  23 //
  24 // Method
  25 //   1. Argument reduction:
  26 //      Given x, find r and integer k such that
  27 //
  28 //               x = k*ln2 + r,  |r| <= 0.5*ln2 ~ 0.34658
  29 //
  30 //      Here a correction term c will be computed to compensate
  31 //      the error in r when rounded to a floating-point number.
  32 //
  33 //   2. Approximating expm1(r) by a special rational function on
  34 //      the interval [0,0.34658]:
  35 //      Since
  36 //          r*(exp(r)+1)/(exp(r)-1) = 2+ r**2/6 - r**4/360 + ...
  37 //      we define R1(r*r) by
  38 //          r*(exp(r)+1)/(exp(r)-1) = 2+ r**2/6 * R1(r*r)
  39 //      That is,
  40 //          R1(r**2) = 6/r *((exp(r)+1)/(exp(r)-1) - 2/r)
  41 //                   = 6/r * ( 1 + 2.0*(1/(exp(r)-1) - 1/r))
  42 //                   = 1 - r**2/60 + r**4/2520 - r**6/100800 + ...
  43 //      We use a special Reme algorithm on [0,0.347] to generate
  44 //      a polynomial of degree 5 in r*r to approximate R1. The
  45 //      maximum error of this polynomial approximation is bounded
  46 //      by 2**-61. In other words,
  47 //          R1(z) ~ 1.0 + Q1*z + Q2*z**2 + Q3*z**3 + Q4*z**4 + Q5*z**5
  48 //      where   Q1  =  -1.6666666666666567384E-2,
  49 //              Q2  =   3.9682539681370365873E-4,
  50 //              Q3  =  -9.9206344733435987357E-6,
  51 //              Q4  =   2.5051361420808517002E-7,
  52 //              Q5  =  -6.2843505682382617102E-9;
  53 //      (where z=r*r, and the values of Q1 to Q5 are listed below)
  54 //      with error bounded by
  55 //          |                  5           |     -61
  56 //          | 1.0+Q1*z+...+Q5*z   -  R1(z) | <= 2
  57 //          |                              |
  58 //
  59 //      expm1(r) = exp(r)-1 is then computed by the following
  60 //      specific way which minimize the accumulation rounding error:
  61 //                             2     3
  62 //                            r     r    [ 3 - (R1 + R1*r/2)  ]
  63 //            expm1(r) = r + --- + --- * [--------------------]
  64 //                            2     2    [ 6 - r*(3 - R1*r/2) ]
  65 //
  66 //      To compensate the error in the argument reduction, we use
  67 //              expm1(r+c) = expm1(r) + c + expm1(r)*c
  68 //                         ~ expm1(r) + c + r*c
  69 //      Thus c+r*c will be added in as the correction terms for
  70 //      expm1(r+c). Now rearrange the term to avoid optimization
  71 //      screw up:
  72 //                      (      2                                    2 )
  73 //                      ({  ( r    [ R1 -  (3 - R1*r/2) ]  )  }    r  )
  74 //       expm1(r+c)~r - ({r*(--- * [--------------------]-c)-c} - --- )
  75 //                      ({  ( 2    [ 6 - r*(3 - R1*r/2) ]  )  }    2  )
  76 //                      (                                             )
  77 //
  78 //                 = r - E
  79 //   3. Scale back to obtain expm1(x):
  80 //      From step 1, we have
  81 //         expm1(x) = either 2**k*[expm1(r)+1] - 1
  82 //                  = or     2**k*[expm1(r) + (1-2**-k)]
  83 //   4. Implementation notes:
  84 //      (A). To save one multiplication, we scale the coefficient Qi
  85 //           to Qi*2**i, and replace z by (x**2)/2.
  86 //      (B). To achieve maximum accuracy, we compute expm1(x) by
  87 //        (i)   if x < -56*ln2, return -1.0, (raise inexact if x!=inf)
  88 //        (ii)  if k=0, return r-E
  89 //        (iii) if k=-1, return 0.5*(r-E)-0.5
  90 //        (iv)  if k=1 if r < -0.25, return 2*((r+0.5)- E)
  91 //                     else          return  1.0+2.0*(r-E);
  92 //        (v)   if (k<-2||k>56) return 2**k(1-(E-r)) - 1 (or exp(x)-1)
  93 //        (vi)  if k <= 20, return 2**k((1-2**-k)-(E-r)), else
  94 //        (vii) return 2**k(1-((E+2**-k)-r))
  95 //
  96 // Special cases:
  97 //      expm1(INF) is INF, expm1(NaN) is NaN;
  98 //      expm1(-INF) is -1, and
  99 //      for finite argument, only expm1(0)=0 is exact.
 100 //
 101 // Accuracy:
 102 //      according to an error analysis, the error is always less than
 103 //      1 ulp (unit in the last place).
 104 //
 105 // Misc. info.
 106 //      For IEEE double
 107 //          if x >  7.09782712893383973096e+02 then expm1(x) overflow
 108 //
 109 // Constants:
 110 // The hexadecimal values are the intended ones for the following
 111 // constants. The decimal values may be used, provided that the
 112 // compiler will convert from decimal to binary accurately enough
 113 // to produce the hexadecimal values shown.
 114 //
 115
 116 // Expm1 returns e**x - 1, the base-e exponential of x minus 1.
 117 // It is more accurate than Exp(x) - 1 when x is near zero.
 118 //
 119 // Special cases are:
 120 //      Expm1(+Inf) = +Inf
 121 //      Expm1(-Inf) = -1
 122 //      Expm1(NaN) = NaN
 123 // Very large values overflow to -1 or +Inf.
 124
 125 //extern expm1
 126 func libc_expm1(float64) float64
 127
 128 func Expm1(x float64) float64 {
 129         return libc_expm1(x)
 130 }
 131
 132 func expm1(x float64) float64 {
 133         const (
 134                 Othreshold = 7.09782712893383973096e+02 // 0x40862E42FEFA39EF
 135                 Ln2X56     = 3.88162421113569373274e+01 // 0x4043687a9f1af2b1
 136                 Ln2HalfX3  = 1.03972077083991796413e+00 // 0x3ff0a2b23f3bab73
 137                 Ln2Half    = 3.46573590279972654709e-01 // 0x3fd62e42fefa39ef
 138                 Ln2Hi      = 6.93147180369123816490e-01 // 0x3fe62e42fee00000
 139                 Ln2Lo      = 1.90821492927058770002e-10 // 0x3dea39ef35793c76
 140                 InvLn2     = 1.44269504088896338700e+00 // 0x3ff71547652b82fe
 141                 Tiny       = 1.0 / (1 << 54)            // 2**-54 = 0x3c90000000000000
 142                 // scaled coefficients related to expm1
 143                 Q1 = -3.33333333333331316428e-02 // 0xBFA11111111110F4
 144                 Q2 = 1.58730158725481460165e-03  // 0x3F5A01A019FE5585
 145                 Q3 = -7.93650757867487942473e-05 // 0xBF14CE199EAADBB7
 146                 Q4 = 4.00821782732936239552e-06  // 0x3ED0CFCA86E65239
 147                 Q5 = -2.01099218183624371326e-07 // 0xBE8AFDB76E09C32D
 148         )
 149
 150         // special cases
 151         switch {
 152         case IsInf(x, 1) || IsNaN(x):
 153                 return x
 154         case IsInf(x, -1):
 155                 return -1
 156         }
 157
 158         absx := x
 159         sign := false
 160         if x < 0 {
 161                 absx = -absx
 162                 sign = true
 163         }
 164
 165         // filter out huge argument
 166         if absx >= Ln2X56 { // if |x| >= 56 * ln2
 167                 if sign {
 168                         return -1 // x < -56*ln2, return -1
 169                 }
 170                 if absx >= Othreshold { // if |x| >= 709.78...
 171                         return Inf(1)
 172                 }
 173         }
 174
 175         // argument reduction
 176         var c float64
 177         var k int
 178         if absx > Ln2Half { // if  |x| > 0.5 * ln2
 179                 var hi, lo float64
 180                 if absx < Ln2HalfX3 { // and |x| < 1.5 * ln2
 181                         if !sign {
 182                                 hi = x - Ln2Hi
 183                                 lo = Ln2Lo
 184                                 k = 1
 185                         } else {
 186                                 hi = x + Ln2Hi
 187                                 lo = -Ln2Lo
 188                                 k = -1
 189                         }
 190                 } else {
 191                         if !sign {
 192                                 k = int(InvLn2*x + 0.5)
 193                         } else {
 194                                 k = int(InvLn2*x - 0.5)
 195                         }
 196                         t := float64(k)
 197                         hi = x - t*Ln2Hi // t * Ln2Hi is exact here
 198                         lo = t * Ln2Lo
 199                 }
 200                 x = hi - lo
 201                 c = (hi - x) - lo
 202         } else if absx < Tiny { // when |x| < 2**-54, return x
 203                 return x
 204         } else {
 205                 k = 0
 206         }
 207
 208         // x is now in primary range
 209         hfx := 0.5 * x
 210         hxs := x * hfx
 211         r1 := 1 + hxs*(Q1+hxs*(Q2+hxs*(Q3+hxs*(Q4+hxs*Q5))))
 212         t := 3 - r1*hfx
 213         e := hxs * ((r1 - t) / (6.0 - x*t))
 214         if k != 0 {
 215                 e = (x*(e-c) - c)
 216                 e -= hxs
 217                 switch {
 218                 case k == -1:
 219                         return 0.5*(x-e) - 0.5
 220                 case k == 1:
 221                         if x < -0.25 {
 222                                 return -2 * (e - (x + 0.5))
 223                         }
 224                         return 1 + 2*(x-e)
 225                 case k <= -2 || k > 56: // suffice to return exp(x)-1
 226                         y := 1 - (e - x)
 227                         y = Float64frombits(Float64bits(y) + uint64(k)<<52) // add k to y's exponent
 228                         return y - 1
 229                 }
 230                 if k < 20 {
 231                         t := Float64frombits(0x3ff0000000000000 - (0x20000000000000 >> uint(k))) // t=1-2**-k
 232                         y := t - (e - x)
 233                         y = Float64frombits(Float64bits(y) + uint64(k)<<52) // add k to y's exponent
 234                         return y
 235                 }
 236                 t := Float64frombits(uint64(0x3ff-k) << 52) // 2**-k
 237                 y := x - (e + t)
 238                 y++
 239                 y = Float64frombits(Float64bits(y) + uint64(k)<<52) // add k to y's exponent
 240                 return y
 241         }
 242         return x - (x*e - hxs) // c is 0
 243 }