sysdeps/ieee754/ldbl-96/s_erfl.c

   1 /*
   2  * ====================================================
   3  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
   4  *
   5  * Developed at SunPro, a Sun Microsystems, Inc. business.
   6  * Permission to use, copy, modify, and distribute this
   7  * software is freely granted, provided that this notice
   8  * is preserved.
   9  * ====================================================
  10  */
  11
  12 /* Long double expansions are
  13   Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
  14   and are incorporated herein by permission of the author.  The author
  15   reserves the right to distribute this material elsewhere under different
  16   copying permissions.  These modifications are distributed here under
  17   the following terms:
  18
  19     This library is free software; you can redistribute it and/or
  20     modify it under the terms of the GNU Lesser General Public
  21     License as published by the Free Software Foundation; either
  22     version 2.1 of the License, or (at your option) any later version.
  23
  24     This library is distributed in the hope that it will be useful,
  25     but WITHOUT ANY WARRANTY; without even the implied warranty of
  26     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  27     Lesser General Public License for more details.
  28
  29     You should have received a copy of the GNU Lesser General Public
  30     License along with this library; if not, see
  31     <http://www.gnu.org/licenses/>.  */
  32
  33 /* double erf(double x)
  34  * double erfc(double x)
  35  *                           x
  36  *                    2      |\
  37  *     erf(x)  =  ---------  | exp(-t*t)dt
  38  *                 sqrt(pi) \|
  39  *                           0
  40  *
  41  *     erfc(x) =  1-erf(x)
  42  *  Note that
  43  *              erf(-x) = -erf(x)
  44  *              erfc(-x) = 2 - erfc(x)
  45  *
  46  * Method:
  47  *      1. For |x| in [0, 0.84375]
  48  *          erf(x)  = x + x*R(x^2)
  49  *          erfc(x) = 1 - erf(x)           if x in [-.84375,0.25]
  50  *                  = 0.5 + ((0.5-x)-x*R)  if x in [0.25,0.84375]
  51  *         Remark. The formula is derived by noting
  52  *          erf(x) = (2/sqrt(pi))*(x - x^3/3 + x^5/10 - x^7/42 + ....)
  53  *         and that
  54  *          2/sqrt(pi) = 1.128379167095512573896158903121545171688
  55  *         is close to one. The interval is chosen because the fix
  56  *         point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is
  57  *         near 0.6174), and by some experiment, 0.84375 is chosen to
  58  *         guarantee the error is less than one ulp for erf.
  59  *
  60  *      2. For |x| in [0.84375,1.25], let s = |x| - 1, and
  61  *         c = 0.84506291151 rounded to single (24 bits)
  62  *      erf(x)  = sign(x) * (c  + P1(s)/Q1(s))
  63  *      erfc(x) = (1-c)  - P1(s)/Q1(s) if x > 0
  64  *                        1+(c+P1(s)/Q1(s))    if x < 0
  65  *         Remark: here we use the taylor series expansion at x=1.
  66  *              erf(1+s) = erf(1) + s*Poly(s)
  67  *                       = 0.845.. + P1(s)/Q1(s)
  68  *         Note that |P1/Q1|< 0.078 for x in [0.84375,1.25]
  69  *
  70  *      3. For x in [1.25,1/0.35(~2.857143)],
  71  *      erfc(x) = (1/x)*exp(-x*x-0.5625+R1(z)/S1(z))
  72  *              z=1/x^2
  73  *      erf(x)  = 1 - erfc(x)
  74  *
  75  *      4. For x in [1/0.35,107]
  76  *      erfc(x) = (1/x)*exp(-x*x-0.5625+R2/S2) if x > 0
  77  *                      = 2.0 - (1/x)*exp(-x*x-0.5625+R2(z)/S2(z))
  78  *                             if -6.666<x<0
  79  *                      = 2.0 - tiny            (if x <= -6.666)
  80  *              z=1/x^2
  81  *      erf(x)  = sign(x)*(1.0 - erfc(x)) if x < 6.666, else
  82  *      erf(x)  = sign(x)*(1.0 - tiny)
  83  *      Note1:
  84  *         To compute exp(-x*x-0.5625+R/S), let s be a single
  85  *         precision number and s := x; then
  86  *              -x*x = -s*s + (s-x)*(s+x)
  87  *              exp(-x*x-0.5626+R/S) =
  88  *                      exp(-s*s-0.5625)*exp((s-x)*(s+x)+R/S);
  89  *      Note2:
  90  *         Here 4 and 5 make use of the asymptotic series
  91  *                        exp(-x*x)
  92  *              erfc(x) ~ ---------- * ( 1 + Poly(1/x^2) )
  93  *                        x*sqrt(pi)
  94  *
  95  *      5. For inf > x >= 107
  96  *      erf(x)  = sign(x) *(1 - tiny)  (raise inexact)
  97  *      erfc(x) = tiny*tiny (raise underflow) if x > 0
  98  *                      = 2 - tiny if x<0
  99  *
 100  *      7. Special case:
 101  *      erf(0)  = 0, erf(inf)  = 1, erf(-inf) = -1,
 102  *      erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2,
 103  *              erfc/erf(NaN) is NaN
 104  */
 105
 106
 107 #include <errno.h>
 108 #include <float.h>
 109 #include <math.h>
 110 #include <math_private.h>
 111
 112 static const long double
 113 tiny = 1e-4931L,
 114   half = 0.5L,
 115   one = 1.0L,
 116   two = 2.0L,
 117         /* c = (float)0.84506291151 */
 118   erx = 0.845062911510467529296875L,
 119 /*
 120  * Coefficients for approximation to  erf on [0,0.84375]
 121  */
 122   /* 2/sqrt(pi) - 1 */
 123   efx = 1.2837916709551257389615890312154517168810E-1L,
 124
 125   pp[6] = {
 126     1.122751350964552113068262337278335028553E6L,
 127     -2.808533301997696164408397079650699163276E6L,
 128     -3.314325479115357458197119660818768924100E5L,
 129     -6.848684465326256109712135497895525446398E4L,
 130     -2.657817695110739185591505062971929859314E3L,
 131     -1.655310302737837556654146291646499062882E2L,
 132   },
 133
 134   qq[6] = {
 135     8.745588372054466262548908189000448124232E6L,
 136     3.746038264792471129367533128637019611485E6L,
 137     7.066358783162407559861156173539693900031E5L,
 138     7.448928604824620999413120955705448117056E4L,
 139     4.511583986730994111992253980546131408924E3L,
 140     1.368902937933296323345610240009071254014E2L,
 141     /* 1.000000000000000000000000000000000000000E0 */
 142   },
 143
 144 /*
 145  * Coefficients for approximation to  erf  in [0.84375,1.25]
 146  */
 147 /* erf(x+1) = 0.845062911510467529296875 + pa(x)/qa(x)
 148    -0.15625 <= x <= +.25
 149    Peak relative error 8.5e-22  */
 150
 151   pa[8] = {
 152     -1.076952146179812072156734957705102256059E0L,
 153      1.884814957770385593365179835059971587220E2L,
 154     -5.339153975012804282890066622962070115606E1L,
 155      4.435910679869176625928504532109635632618E1L,
 156      1.683219516032328828278557309642929135179E1L,
 157     -2.360236618396952560064259585299045804293E0L,
 158      1.852230047861891953244413872297940938041E0L,
 159      9.394994446747752308256773044667843200719E-2L,
 160   },
 161
 162   qa[7] =  {
 163     4.559263722294508998149925774781887811255E2L,
 164     3.289248982200800575749795055149780689738E2L,
 165     2.846070965875643009598627918383314457912E2L,
 166     1.398715859064535039433275722017479994465E2L,
 167     6.060190733759793706299079050985358190726E1L,
 168     2.078695677795422351040502569964299664233E1L,
 169     4.641271134150895940966798357442234498546E0L,
 170     /* 1.000000000000000000000000000000000000000E0 */
 171   },
 172
 173 /*
 174  * Coefficients for approximation to  erfc in [1.25,1/0.35]
 175  */
 176 /* erfc(1/x) = x exp (-1/x^2 - 0.5625 + ra(x^2)/sa(x^2))
 177    1/2.85711669921875 < 1/x < 1/1.25
 178    Peak relative error 3.1e-21  */
 179
 180     ra[] = {
 181       1.363566591833846324191000679620738857234E-1L,
 182       1.018203167219873573808450274314658434507E1L,
 183       1.862359362334248675526472871224778045594E2L,
 184       1.411622588180721285284945138667933330348E3L,
 185       5.088538459741511988784440103218342840478E3L,
 186       8.928251553922176506858267311750789273656E3L,
 187       7.264436000148052545243018622742770549982E3L,
 188       2.387492459664548651671894725748959751119E3L,
 189       2.220916652813908085449221282808458466556E2L,
 190     },
 191
 192     sa[] = {
 193       -1.382234625202480685182526402169222331847E1L,
 194       -3.315638835627950255832519203687435946482E2L,
 195       -2.949124863912936259747237164260785326692E3L,
 196       -1.246622099070875940506391433635999693661E4L,
 197       -2.673079795851665428695842853070996219632E4L,
 198       -2.880269786660559337358397106518918220991E4L,
 199       -1.450600228493968044773354186390390823713E4L,
 200       -2.874539731125893533960680525192064277816E3L,
 201       -1.402241261419067750237395034116942296027E2L,
 202       /* 1.000000000000000000000000000000000000000E0 */
 203     },
 204 /*
 205  * Coefficients for approximation to  erfc in [1/.35,107]
 206  */
 207 /* erfc(1/x) = x exp (-1/x^2 - 0.5625 + rb(x^2)/sb(x^2))
 208    1/6.6666259765625 < 1/x < 1/2.85711669921875
 209    Peak relative error 4.2e-22  */
 210     rb[] = {
 211       -4.869587348270494309550558460786501252369E-5L,
 212       -4.030199390527997378549161722412466959403E-3L,
 213       -9.434425866377037610206443566288917589122E-2L,
 214       -9.319032754357658601200655161585539404155E-1L,
 215       -4.273788174307459947350256581445442062291E0L,
 216       -8.842289940696150508373541814064198259278E0L,
 217       -7.069215249419887403187988144752613025255E0L,
 218       -1.401228723639514787920274427443330704764E0L,
 219     },
 220
 221     sb[] = {
 222       4.936254964107175160157544545879293019085E-3L,
 223       1.583457624037795744377163924895349412015E-1L,
 224       1.850647991850328356622940552450636420484E0L,
 225       9.927611557279019463768050710008450625415E0L,
 226       2.531667257649436709617165336779212114570E1L,
 227       2.869752886406743386458304052862814690045E1L,
 228       1.182059497870819562441683560749192539345E1L,
 229       /* 1.000000000000000000000000000000000000000E0 */
 230     },
 231 /* erfc(1/x) = x exp (-1/x^2 - 0.5625 + rc(x^2)/sc(x^2))
 232    1/107 <= 1/x <= 1/6.6666259765625
 233    Peak relative error 1.1e-21  */
 234     rc[] = {
 235       -8.299617545269701963973537248996670806850E-5L,
 236       -6.243845685115818513578933902532056244108E-3L,
 237       -1.141667210620380223113693474478394397230E-1L,
 238       -7.521343797212024245375240432734425789409E-1L,
 239       -1.765321928311155824664963633786967602934E0L,
 240       -1.029403473103215800456761180695263439188E0L,
 241     },
 242
 243     sc[] = {
 244       8.413244363014929493035952542677768808601E-3L,
 245       2.065114333816877479753334599639158060979E-1L,
 246       1.639064941530797583766364412782135680148E0L,
 247       4.936788463787115555582319302981666347450E0L,
 248       5.005177727208955487404729933261347679090E0L,
 249       /* 1.000000000000000000000000000000000000000E0 */
 250     };
 251
 252 long double
 253 __erfl (long double x)
 254 {
 255   long double R, S, P, Q, s, y, z, r;
 256   int32_t ix, i;
 257   u_int32_t se, i0, i1;
 258
 259   GET_LDOUBLE_WORDS (se, i0, i1, x);
 260   ix = se & 0x7fff;
 261
 262   if (ix >= 0x7fff)
 263     {                           /* erf(nan)=nan */
 264       i = ((se & 0xffff) >> 15) << 1;
 265       return (long double) (1 - i) + one / x;   /* erf(+-inf)=+-1 */
 266     }
 267
 268   ix = (ix << 16) | (i0 >> 16);
 269   if (ix < 0x3ffed800) /* |x|<0.84375 */
 270     {
 271       if (ix < 0x3fde8000) /* |x|<2**-33 */
 272         {
 273           if (ix < 0x00080000)
 274             {
 275               /* Avoid spurious underflow.  */
 276               long double ret = 0.0625 * (16.0 * x + (16.0 * efx) * x);
 277               if (fabsl (ret) < LDBL_MIN)
 278                 {
 279                   long double force_underflow = ret * ret;
 280                   math_force_eval (force_underflow);
 281                 }
 282               return ret;
 283             }
 284           return x + efx * x;
 285         }
 286       z = x * x;
 287       r = pp[0] + z * (pp[1]
 288           + z * (pp[2] + z * (pp[3] + z * (pp[4] + z * pp[5]))));
 289       s = qq[0] + z * (qq[1]
 290           + z * (qq[2] + z * (qq[3] + z * (qq[4] + z * (qq[5] + z)))));
 291       y = r / s;
 292       return x + x * y;
 293     }
 294   if (ix < 0x3fffa000) /* 1.25 */
 295     {                           /* 0.84375 <= |x| < 1.25 */
 296       s = fabsl (x) - one;
 297       P = pa[0] + s * (pa[1] + s * (pa[2]
 298         + s * (pa[3] + s * (pa[4] + s * (pa[5] + s * (pa[6] + s * pa[7]))))));
 299       Q = qa[0] + s * (qa[1] + s * (qa[2]
 300         + s * (qa[3] + s * (qa[4] + s * (qa[5] + s * (qa[6] + s))))));
 301       if ((se & 0x8000) == 0)
 302         return erx + P / Q;
 303       else
 304         return -erx - P / Q;
 305     }
 306   if (ix >= 0x4001d555) /* 6.6666259765625 */
 307     {                           /* inf>|x|>=6.666 */
 308       if ((se & 0x8000) == 0)
 309         return one - tiny;
 310       else
 311         return tiny - one;
 312     }
 313   x = fabsl (x);
 314   s = one / (x * x);
 315   if (ix < 0x4000b6db) /* 2.85711669921875 */
 316     {
 317       R = ra[0] + s * (ra[1] + s * (ra[2] + s * (ra[3] + s * (ra[4] +
 318           s * (ra[5] + s * (ra[6] + s * (ra[7] + s * ra[8])))))));
 319       S = sa[0] + s * (sa[1] + s * (sa[2] + s * (sa[3] + s * (sa[4] +
 320           s * (sa[5] + s * (sa[6] + s * (sa[7] + s * (sa[8] + s))))))));
 321     }
 322   else
 323     {                           /* |x| >= 1/0.35 */
 324       R = rb[0] + s * (rb[1] + s * (rb[2] + s * (rb[3] + s * (rb[4] +
 325          s * (rb[5] + s * (rb[6] + s * rb[7]))))));
 326       S = sb[0] + s * (sb[1] + s * (sb[2] + s * (sb[3] + s * (sb[4] +
 327          s * (sb[5] + s * (sb[6] + s))))));
 328     }
 329   z = x;
 330   GET_LDOUBLE_WORDS (i, i0, i1, z);
 331   i1 = 0;
 332   SET_LDOUBLE_WORDS (z, i, i0, i1);
 333   r =
 334     __ieee754_expl (-z * z - 0.5625) * __ieee754_expl ((z - x) * (z + x) +
 335                                                      R / S);
 336   if ((se & 0x8000) == 0)
 337     return one - r / x;
 338   else
 339     return r / x - one;
 340 }
 341
 342 weak_alias (__erfl, erfl)
 343 long double
 344 __erfcl (long double x)
 345 {
 346   int32_t hx, ix;
 347   long double R, S, P, Q, s, y, z, r;
 348   u_int32_t se, i0, i1;
 349
 350   GET_LDOUBLE_WORDS (se, i0, i1, x);
 351   ix = se & 0x7fff;
 352   if (ix >= 0x7fff)
 353     {                           /* erfc(nan)=nan */
 354       /* erfc(+-inf)=0,2 */
 355       return (long double) (((se & 0xffff) >> 15) << 1) + one / x;
 356     }
 357
 358   ix = (ix << 16) | (i0 >> 16);
 359   if (ix < 0x3ffed800) /* |x|<0.84375 */
 360     {
 361       if (ix < 0x3fbe0000) /* |x|<2**-65 */
 362         return one - x;
 363       z = x * x;
 364       r = pp[0] + z * (pp[1]
 365           + z * (pp[2] + z * (pp[3] + z * (pp[4] + z * pp[5]))));
 366       s = qq[0] + z * (qq[1]
 367           + z * (qq[2] + z * (qq[3] + z * (qq[4] + z * (qq[5] + z)))));
 368       y = r / s;
 369       if (ix < 0x3ffd8000) /* x<1/4 */
 370         {
 371           return one - (x + x * y);
 372         }
 373       else
 374         {
 375           r = x * y;
 376           r += (x - half);
 377           return half - r;
 378         }
 379     }
 380   if (ix < 0x3fffa000) /* 1.25 */
 381     {                           /* 0.84375 <= |x| < 1.25 */
 382       s = fabsl (x) - one;
 383       P = pa[0] + s * (pa[1] + s * (pa[2]
 384         + s * (pa[3] + s * (pa[4] + s * (pa[5] + s * (pa[6] + s * pa[7]))))));
 385       Q = qa[0] + s * (qa[1] + s * (qa[2]
 386         + s * (qa[3] + s * (qa[4] + s * (qa[5] + s * (qa[6] + s))))));
 387       if ((se & 0x8000) == 0)
 388         {
 389           z = one - erx;
 390           return z - P / Q;
 391         }
 392       else
 393         {
 394           z = erx + P / Q;
 395           return one + z;
 396         }
 397     }
 398   if (ix < 0x4005d600) /* 107 */
 399     {                           /* |x|<107 */
 400       x = fabsl (x);
 401       s = one / (x * x);
 402       if (ix < 0x4000b6db) /* 2.85711669921875 */
 403         {                       /* |x| < 1/.35 ~ 2.857143 */
 404           R = ra[0] + s * (ra[1] + s * (ra[2] + s * (ra[3] + s * (ra[4] +
 405               s * (ra[5] + s * (ra[6] + s * (ra[7] + s * ra[8])))))));
 406           S = sa[0] + s * (sa[1] + s * (sa[2] + s * (sa[3] + s * (sa[4] +
 407               s * (sa[5] + s * (sa[6] + s * (sa[7] + s * (sa[8] + s))))))));
 408         }
 409       else if (ix < 0x4001d555) /* 6.6666259765625 */
 410         {                       /* 6.666 > |x| >= 1/.35 ~ 2.857143 */
 411           R = rb[0] + s * (rb[1] + s * (rb[2] + s * (rb[3] + s * (rb[4] +
 412               s * (rb[5] + s * (rb[6] + s * rb[7]))))));
 413           S = sb[0] + s * (sb[1] + s * (sb[2] + s * (sb[3] + s * (sb[4] +
 414               s * (sb[5] + s * (sb[6] + s))))));
 415         }
 416       else
 417         {                       /* |x| >= 6.666 */
 418           if (se & 0x8000)
 419             return two - tiny;  /* x < -6.666 */
 420
 421           R = rc[0] + s * (rc[1] + s * (rc[2] + s * (rc[3] +
 422                                                     s * (rc[4] + s * rc[5]))));
 423           S = sc[0] + s * (sc[1] + s * (sc[2] + s * (sc[3] +
 424                                                     s * (sc[4] + s))));
 425         }
 426       z = x;
 427       GET_LDOUBLE_WORDS (hx, i0, i1, z);
 428       i1 = 0;
 429       i0 &= 0xffffff00;
 430       SET_LDOUBLE_WORDS (z, hx, i0, i1);
 431       r = __ieee754_expl (-z * z - 0.5625) *
 432         __ieee754_expl ((z - x) * (z + x) + R / S);
 433       if ((se & 0x8000) == 0)
 434         {
 435           long double ret = r / x;
 436           if (ret == 0)
 437             __set_errno (ERANGE);
 438           return ret;
 439         }
 440       else
 441         return two - r / x;
 442     }
 443   else
 444     {
 445       if ((se & 0x8000) == 0)
 446         {
 447           __set_errno (ERANGE);
 448           return tiny * tiny;
 449         }
 450       else
 451         return two - tiny;
 452     }
 453 }
 454
 455 weak_alias (__erfcl, erfcl)