sysdeps/ieee754/ldbl-96/s_erfl.c

   1 /*
   2  * ====================================================
   3  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
   4  *
   5  * Developed at SunPro, a Sun Microsystems, Inc. business.
   6  * Permission to use, copy, modify, and distribute this
   7  * software is freely granted, provided that this notice
   8  * is preserved.
   9  * ====================================================
  10  */
  11
  12 /* Long double expansions are
  13   Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
  14   and are incorporated herein by permission of the author.  The author
  15   reserves the right to distribute this material elsewhere under different
  16   copying permissions.  These modifications are distributed here under
  17   the following terms:
  18
  19     This library is free software; you can redistribute it and/or
  20     modify it under the terms of the GNU Lesser General Public
  21     License as published by the Free Software Foundation; either
  22     version 2.1 of the License, or (at your option) any later version.
  23
  24     This library is distributed in the hope that it will be useful,
  25     but WITHOUT ANY WARRANTY; without even the implied warranty of
  26     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  27     Lesser General Public License for more details.
  28
  29     You should have received a copy of the GNU Lesser General Public
  30     License along with this library; if not, see
  31     <http://www.gnu.org/licenses/>.  */
  32
  33 /* double erf(double x)
  34  * double erfc(double x)
  35  *                           x
  36  *                    2      |\
  37  *     erf(x)  =  ---------  | exp(-t*t)dt
  38  *                 sqrt(pi) \|
  39  *                           0
  40  *
  41  *     erfc(x) =  1-erf(x)
  42  *  Note that
  43  *              erf(-x) = -erf(x)
  44  *              erfc(-x) = 2 - erfc(x)
  45  *
  46  * Method:
  47  *      1. For |x| in [0, 0.84375]
  48  *          erf(x)  = x + x*R(x^2)
  49  *          erfc(x) = 1 - erf(x)           if x in [-.84375,0.25]
  50  *                  = 0.5 + ((0.5-x)-x*R)  if x in [0.25,0.84375]
  51  *         Remark. The formula is derived by noting
  52  *          erf(x) = (2/sqrt(pi))*(x - x^3/3 + x^5/10 - x^7/42 + ....)
  53  *         and that
  54  *          2/sqrt(pi) = 1.128379167095512573896158903121545171688
  55  *         is close to one. The interval is chosen because the fix
  56  *         point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is
  57  *         near 0.6174), and by some experiment, 0.84375 is chosen to
  58  *         guarantee the error is less than one ulp for erf.
  59  *
  60  *      2. For |x| in [0.84375,1.25], let s = |x| - 1, and
  61  *         c = 0.84506291151 rounded to single (24 bits)
  62  *      erf(x)  = sign(x) * (c  + P1(s)/Q1(s))
  63  *      erfc(x) = (1-c)  - P1(s)/Q1(s) if x > 0
  64  *                        1+(c+P1(s)/Q1(s))    if x < 0
  65  *         Remark: here we use the taylor series expansion at x=1.
  66  *              erf(1+s) = erf(1) + s*Poly(s)
  67  *                       = 0.845.. + P1(s)/Q1(s)
  68  *         Note that |P1/Q1|< 0.078 for x in [0.84375,1.25]
  69  *
  70  *      3. For x in [1.25,1/0.35(~2.857143)],
  71  *      erfc(x) = (1/x)*exp(-x*x-0.5625+R1(z)/S1(z))
  72  *              z=1/x^2
  73  *      erf(x)  = 1 - erfc(x)
  74  *
  75  *      4. For x in [1/0.35,107]
  76  *      erfc(x) = (1/x)*exp(-x*x-0.5625+R2/S2) if x > 0
  77  *                      = 2.0 - (1/x)*exp(-x*x-0.5625+R2(z)/S2(z))
  78  *                             if -6.666<x<0
  79  *                      = 2.0 - tiny            (if x <= -6.666)
  80  *              z=1/x^2
  81  *      erf(x)  = sign(x)*(1.0 - erfc(x)) if x < 6.666, else
  82  *      erf(x)  = sign(x)*(1.0 - tiny)
  83  *      Note1:
  84  *         To compute exp(-x*x-0.5625+R/S), let s be a single
  85  *         precision number and s := x; then
  86  *              -x*x = -s*s + (s-x)*(s+x)
  87  *              exp(-x*x-0.5626+R/S) =
  88  *                      exp(-s*s-0.5625)*exp((s-x)*(s+x)+R/S);
  89  *      Note2:
  90  *         Here 4 and 5 make use of the asymptotic series
  91  *                        exp(-x*x)
  92  *              erfc(x) ~ ---------- * ( 1 + Poly(1/x^2) )
  93  *                        x*sqrt(pi)
  94  *
  95  *      5. For inf > x >= 107
  96  *      erf(x)  = sign(x) *(1 - tiny)  (raise inexact)
  97  *      erfc(x) = tiny*tiny (raise underflow) if x > 0
  98  *                      = 2 - tiny if x<0
  99  *
 100  *      7. Special case:
 101  *      erf(0)  = 0, erf(inf)  = 1, erf(-inf) = -1,
 102  *      erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2,
 103  *              erfc/erf(NaN) is NaN
 104  */
 105
 106
 107 #include <errno.h>
 108 #include <float.h>
 109 #include <math.h>
 110 #include <math_private.h>
 111
 112 static const long double
 113 tiny = 1e-4931L,
 114   half = 0.5L,
 115   one = 1.0L,
 116   two = 2.0L,
 117         /* c = (float)0.84506291151 */
 118   erx = 0.845062911510467529296875L,
 119 /*
 120  * Coefficients for approximation to  erf on [0,0.84375]
 121  */
 122   /* 2/sqrt(pi) - 1 */
 123   efx = 1.2837916709551257389615890312154517168810E-1L,
 124
 125   pp[6] = {
 126     1.122751350964552113068262337278335028553E6L,
 127     -2.808533301997696164408397079650699163276E6L,
 128     -3.314325479115357458197119660818768924100E5L,
 129     -6.848684465326256109712135497895525446398E4L,
 130     -2.657817695110739185591505062971929859314E3L,
 131     -1.655310302737837556654146291646499062882E2L,
 132   },
 133
 134   qq[6] = {
 135     8.745588372054466262548908189000448124232E6L,
 136     3.746038264792471129367533128637019611485E6L,
 137     7.066358783162407559861156173539693900031E5L,
 138     7.448928604824620999413120955705448117056E4L,
 139     4.511583986730994111992253980546131408924E3L,
 140     1.368902937933296323345610240009071254014E2L,
 141     /* 1.000000000000000000000000000000000000000E0 */
 142   },
 143
 144 /*
 145  * Coefficients for approximation to  erf  in [0.84375,1.25]
 146  */
 147 /* erf(x+1) = 0.845062911510467529296875 + pa(x)/qa(x)
 148    -0.15625 <= x <= +.25
 149    Peak relative error 8.5e-22  */
 150
 151   pa[8] = {
 152     -1.076952146179812072156734957705102256059E0L,
 153      1.884814957770385593365179835059971587220E2L,
 154     -5.339153975012804282890066622962070115606E1L,
 155      4.435910679869176625928504532109635632618E1L,
 156      1.683219516032328828278557309642929135179E1L,
 157     -2.360236618396952560064259585299045804293E0L,
 158      1.852230047861891953244413872297940938041E0L,
 159      9.394994446747752308256773044667843200719E-2L,
 160   },
 161
 162   qa[7] =  {
 163     4.559263722294508998149925774781887811255E2L,
 164     3.289248982200800575749795055149780689738E2L,
 165     2.846070965875643009598627918383314457912E2L,
 166     1.398715859064535039433275722017479994465E2L,
 167     6.060190733759793706299079050985358190726E1L,
 168     2.078695677795422351040502569964299664233E1L,
 169     4.641271134150895940966798357442234498546E0L,
 170     /* 1.000000000000000000000000000000000000000E0 */
 171   },
 172
 173 /*
 174  * Coefficients for approximation to  erfc in [1.25,1/0.35]
 175  */
 176 /* erfc(1/x) = x exp (-1/x^2 - 0.5625 + ra(x^2)/sa(x^2))
 177    1/2.85711669921875 < 1/x < 1/1.25
 178    Peak relative error 3.1e-21  */
 179
 180     ra[] = {
 181       1.363566591833846324191000679620738857234E-1L,
 182       1.018203167219873573808450274314658434507E1L,
 183       1.862359362334248675526472871224778045594E2L,
 184       1.411622588180721285284945138667933330348E3L,
 185       5.088538459741511988784440103218342840478E3L,
 186       8.928251553922176506858267311750789273656E3L,
 187       7.264436000148052545243018622742770549982E3L,
 188       2.387492459664548651671894725748959751119E3L,
 189       2.220916652813908085449221282808458466556E2L,
 190     },
 191
 192     sa[] = {
 193       -1.382234625202480685182526402169222331847E1L,
 194       -3.315638835627950255832519203687435946482E2L,
 195       -2.949124863912936259747237164260785326692E3L,
 196       -1.246622099070875940506391433635999693661E4L,
 197       -2.673079795851665428695842853070996219632E4L,
 198       -2.880269786660559337358397106518918220991E4L,
 199       -1.450600228493968044773354186390390823713E4L,
 200       -2.874539731125893533960680525192064277816E3L,
 201       -1.402241261419067750237395034116942296027E2L,
 202       /* 1.000000000000000000000000000000000000000E0 */
 203     },
 204 /*
 205  * Coefficients for approximation to  erfc in [1/.35,107]
 206  */
 207 /* erfc(1/x) = x exp (-1/x^2 - 0.5625 + rb(x^2)/sb(x^2))
 208    1/6.6666259765625 < 1/x < 1/2.85711669921875
 209    Peak relative error 4.2e-22  */
 210     rb[] = {
 211       -4.869587348270494309550558460786501252369E-5L,
 212       -4.030199390527997378549161722412466959403E-3L,
 213       -9.434425866377037610206443566288917589122E-2L,
 214       -9.319032754357658601200655161585539404155E-1L,
 215       -4.273788174307459947350256581445442062291E0L,
 216       -8.842289940696150508373541814064198259278E0L,
 217       -7.069215249419887403187988144752613025255E0L,
 218       -1.401228723639514787920274427443330704764E0L,
 219     },
 220
 221     sb[] = {
 222       4.936254964107175160157544545879293019085E-3L,
 223       1.583457624037795744377163924895349412015E-1L,
 224       1.850647991850328356622940552450636420484E0L,
 225       9.927611557279019463768050710008450625415E0L,
 226       2.531667257649436709617165336779212114570E1L,
 227       2.869752886406743386458304052862814690045E1L,
 228       1.182059497870819562441683560749192539345E1L,
 229       /* 1.000000000000000000000000000000000000000E0 */
 230     },
 231 /* erfc(1/x) = x exp (-1/x^2 - 0.5625 + rc(x^2)/sc(x^2))
 232    1/107 <= 1/x <= 1/6.6666259765625
 233    Peak relative error 1.1e-21  */
 234     rc[] = {
 235       -8.299617545269701963973537248996670806850E-5L,
 236       -6.243845685115818513578933902532056244108E-3L,
 237       -1.141667210620380223113693474478394397230E-1L,
 238       -7.521343797212024245375240432734425789409E-1L,
 239       -1.765321928311155824664963633786967602934E0L,
 240       -1.029403473103215800456761180695263439188E0L,
 241     },
 242
 243     sc[] = {
 244       8.413244363014929493035952542677768808601E-3L,
 245       2.065114333816877479753334599639158060979E-1L,
 246       1.639064941530797583766364412782135680148E0L,
 247       4.936788463787115555582319302981666347450E0L,
 248       5.005177727208955487404729933261347679090E0L,
 249       /* 1.000000000000000000000000000000000000000E0 */
 250     };
 251
 252 long double
 253 __erfl (long double x)
 254 {
 255   long double R, S, P, Q, s, y, z, r;
 256   int32_t ix, i;
 257   u_int32_t se, i0, i1;
 258
 259   GET_LDOUBLE_WORDS (se, i0, i1, x);
 260   ix = se & 0x7fff;
 261
 262   if (ix >= 0x7fff)
 263     {                           /* erf(nan)=nan */
 264       i = ((se & 0xffff) >> 15) << 1;
 265       return (long double) (1 - i) + one / x;   /* erf(+-inf)=+-1 */
 266     }
 267
 268   ix = (ix << 16) | (i0 >> 16);
 269   if (ix < 0x3ffed800) /* |x|<0.84375 */
 270     {
 271       if (ix < 0x3fde8000) /* |x|<2**-33 */
 272         {
 273           if (ix < 0x00080000)
 274             {
 275               /* Avoid spurious underflow.  */
 276               long double ret = 0.0625 * (16.0 * x + (16.0 * efx) * x);
 277               math_check_force_underflow (ret);
 278               return ret;
 279             }
 280           return x + efx * x;
 281         }
 282       z = x * x;
 283       r = pp[0] + z * (pp[1]
 284           + z * (pp[2] + z * (pp[3] + z * (pp[4] + z * pp[5]))));
 285       s = qq[0] + z * (qq[1]
 286           + z * (qq[2] + z * (qq[3] + z * (qq[4] + z * (qq[5] + z)))));
 287       y = r / s;
 288       return x + x * y;
 289     }
 290   if (ix < 0x3fffa000) /* 1.25 */
 291     {                           /* 0.84375 <= |x| < 1.25 */
 292       s = fabsl (x) - one;
 293       P = pa[0] + s * (pa[1] + s * (pa[2]
 294         + s * (pa[3] + s * (pa[4] + s * (pa[5] + s * (pa[6] + s * pa[7]))))));
 295       Q = qa[0] + s * (qa[1] + s * (qa[2]
 296         + s * (qa[3] + s * (qa[4] + s * (qa[5] + s * (qa[6] + s))))));
 297       if ((se & 0x8000) == 0)
 298         return erx + P / Q;
 299       else
 300         return -erx - P / Q;
 301     }
 302   if (ix >= 0x4001d555) /* 6.6666259765625 */
 303     {                           /* inf>|x|>=6.666 */
 304       if ((se & 0x8000) == 0)
 305         return one - tiny;
 306       else
 307         return tiny - one;
 308     }
 309   x = fabsl (x);
 310   s = one / (x * x);
 311   if (ix < 0x4000b6db) /* 2.85711669921875 */
 312     {
 313       R = ra[0] + s * (ra[1] + s * (ra[2] + s * (ra[3] + s * (ra[4] +
 314           s * (ra[5] + s * (ra[6] + s * (ra[7] + s * ra[8])))))));
 315       S = sa[0] + s * (sa[1] + s * (sa[2] + s * (sa[3] + s * (sa[4] +
 316           s * (sa[5] + s * (sa[6] + s * (sa[7] + s * (sa[8] + s))))))));
 317     }
 318   else
 319     {                           /* |x| >= 1/0.35 */
 320       R = rb[0] + s * (rb[1] + s * (rb[2] + s * (rb[3] + s * (rb[4] +
 321          s * (rb[5] + s * (rb[6] + s * rb[7]))))));
 322       S = sb[0] + s * (sb[1] + s * (sb[2] + s * (sb[3] + s * (sb[4] +
 323          s * (sb[5] + s * (sb[6] + s))))));
 324     }
 325   z = x;
 326   GET_LDOUBLE_WORDS (i, i0, i1, z);
 327   i1 = 0;
 328   SET_LDOUBLE_WORDS (z, i, i0, i1);
 329   r =
 330     __ieee754_expl (-z * z - 0.5625) * __ieee754_expl ((z - x) * (z + x) +
 331                                                      R / S);
 332   if ((se & 0x8000) == 0)
 333     return one - r / x;
 334   else
 335     return r / x - one;
 336 }
 337
 338 weak_alias (__erfl, erfl)
 339 long double
 340 __erfcl (long double x)
 341 {
 342   int32_t hx, ix;
 343   long double R, S, P, Q, s, y, z, r;
 344   u_int32_t se, i0, i1;
 345
 346   GET_LDOUBLE_WORDS (se, i0, i1, x);
 347   ix = se & 0x7fff;
 348   if (ix >= 0x7fff)
 349     {                           /* erfc(nan)=nan */
 350       /* erfc(+-inf)=0,2 */
 351       return (long double) (((se & 0xffff) >> 15) << 1) + one / x;
 352     }
 353
 354   ix = (ix << 16) | (i0 >> 16);
 355   if (ix < 0x3ffed800) /* |x|<0.84375 */
 356     {
 357       if (ix < 0x3fbe0000) /* |x|<2**-65 */
 358         return one - x;
 359       z = x * x;
 360       r = pp[0] + z * (pp[1]
 361           + z * (pp[2] + z * (pp[3] + z * (pp[4] + z * pp[5]))));
 362       s = qq[0] + z * (qq[1]
 363           + z * (qq[2] + z * (qq[3] + z * (qq[4] + z * (qq[5] + z)))));
 364       y = r / s;
 365       if (ix < 0x3ffd8000) /* x<1/4 */
 366         {
 367           return one - (x + x * y);
 368         }
 369       else
 370         {
 371           r = x * y;
 372           r += (x - half);
 373           return half - r;
 374         }
 375     }
 376   if (ix < 0x3fffa000) /* 1.25 */
 377     {                           /* 0.84375 <= |x| < 1.25 */
 378       s = fabsl (x) - one;
 379       P = pa[0] + s * (pa[1] + s * (pa[2]
 380         + s * (pa[3] + s * (pa[4] + s * (pa[5] + s * (pa[6] + s * pa[7]))))));
 381       Q = qa[0] + s * (qa[1] + s * (qa[2]
 382         + s * (qa[3] + s * (qa[4] + s * (qa[5] + s * (qa[6] + s))))));
 383       if ((se & 0x8000) == 0)
 384         {
 385           z = one - erx;
 386           return z - P / Q;
 387         }
 388       else
 389         {
 390           z = erx + P / Q;
 391           return one + z;
 392         }
 393     }
 394   if (ix < 0x4005d600) /* 107 */
 395     {                           /* |x|<107 */
 396       x = fabsl (x);
 397       s = one / (x * x);
 398       if (ix < 0x4000b6db) /* 2.85711669921875 */
 399         {                       /* |x| < 1/.35 ~ 2.857143 */
 400           R = ra[0] + s * (ra[1] + s * (ra[2] + s * (ra[3] + s * (ra[4] +
 401               s * (ra[5] + s * (ra[6] + s * (ra[7] + s * ra[8])))))));
 402           S = sa[0] + s * (sa[1] + s * (sa[2] + s * (sa[3] + s * (sa[4] +
 403               s * (sa[5] + s * (sa[6] + s * (sa[7] + s * (sa[8] + s))))))));
 404         }
 405       else if (ix < 0x4001d555) /* 6.6666259765625 */
 406         {                       /* 6.666 > |x| >= 1/.35 ~ 2.857143 */
 407           R = rb[0] + s * (rb[1] + s * (rb[2] + s * (rb[3] + s * (rb[4] +
 408               s * (rb[5] + s * (rb[6] + s * rb[7]))))));
 409           S = sb[0] + s * (sb[1] + s * (sb[2] + s * (sb[3] + s * (sb[4] +
 410               s * (sb[5] + s * (sb[6] + s))))));
 411         }
 412       else
 413         {                       /* |x| >= 6.666 */
 414           if (se & 0x8000)
 415             return two - tiny;  /* x < -6.666 */
 416
 417           R = rc[0] + s * (rc[1] + s * (rc[2] + s * (rc[3] +
 418                                                     s * (rc[4] + s * rc[5]))));
 419           S = sc[0] + s * (sc[1] + s * (sc[2] + s * (sc[3] +
 420                                                     s * (sc[4] + s))));
 421         }
 422       z = x;
 423       GET_LDOUBLE_WORDS (hx, i0, i1, z);
 424       i1 = 0;
 425       i0 &= 0xffffff00;
 426       SET_LDOUBLE_WORDS (z, hx, i0, i1);
 427       r = __ieee754_expl (-z * z - 0.5625) *
 428         __ieee754_expl ((z - x) * (z + x) + R / S);
 429       if ((se & 0x8000) == 0)
 430         {
 431           long double ret = r / x;
 432           if (ret == 0)
 433             __set_errno (ERANGE);
 434           return ret;
 435         }
 436       else
 437         return two - r / x;
 438     }
 439   else
 440     {
 441       if ((se & 0x8000) == 0)
 442         {
 443           __set_errno (ERANGE);
 444           return tiny * tiny;
 445         }
 446       else
 447         return two - tiny;
 448     }
 449 }
 450
 451 weak_alias (__erfcl, erfcl)