sysdeps/ieee754/ldbl-96/s_erfl.c

   1 /*
   2  * ====================================================
   3  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
   4  *
   5  * Developed at SunPro, a Sun Microsystems, Inc. business.
   6  * Permission to use, copy, modify, and distribute this
   7  * software is freely granted, provided that this notice
   8  * is preserved.
   9  * ====================================================
  10  */
  11
  12 /* Long double expansions are
  13   Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
  14   and are incorporated herein by permission of the author.  The author
  15   reserves the right to distribute this material elsewhere under different
  16   copying permissions.  These modifications are distributed here under
  17   the following terms:
  18
  19     This library is free software; you can redistribute it and/or
  20     modify it under the terms of the GNU Lesser General Public
  21     License as published by the Free Software Foundation; either
  22     version 2.1 of the License, or (at your option) any later version.
  23
  24     This library is distributed in the hope that it will be useful,
  25     but WITHOUT ANY WARRANTY; without even the implied warranty of
  26     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  27     Lesser General Public License for more details.
  28
  29     You should have received a copy of the GNU Lesser General Public
  30     License along with this library; if not, write to the Free Software
  31     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA */
  32
  33 /* double erf(double x)
  34  * double erfc(double x)
  35  *                           x
  36  *                    2      |\
  37  *     erf(x)  =  ---------  | exp(-t*t)dt
  38  *                 sqrt(pi) \|
  39  *                           0
  40  *
  41  *     erfc(x) =  1-erf(x)
  42  *  Note that
  43  *              erf(-x) = -erf(x)
  44  *              erfc(-x) = 2 - erfc(x)
  45  *
  46  * Method:
  47  *      1. For |x| in [0, 0.84375]
  48  *          erf(x)  = x + x*R(x^2)
  49  *          erfc(x) = 1 - erf(x)           if x in [-.84375,0.25]
  50  *                  = 0.5 + ((0.5-x)-x*R)  if x in [0.25,0.84375]
  51  *         Remark. The formula is derived by noting
  52  *          erf(x) = (2/sqrt(pi))*(x - x^3/3 + x^5/10 - x^7/42 + ....)
  53  *         and that
  54  *          2/sqrt(pi) = 1.128379167095512573896158903121545171688
  55  *         is close to one. The interval is chosen because the fix
  56  *         point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is
  57  *         near 0.6174), and by some experiment, 0.84375 is chosen to
  58  *         guarantee the error is less than one ulp for erf.
  59  *
  60  *      2. For |x| in [0.84375,1.25], let s = |x| - 1, and
  61  *         c = 0.84506291151 rounded to single (24 bits)
  62  *      erf(x)  = sign(x) * (c  + P1(s)/Q1(s))
  63  *      erfc(x) = (1-c)  - P1(s)/Q1(s) if x > 0
  64  *                        1+(c+P1(s)/Q1(s))    if x < 0
  65  *         Remark: here we use the taylor series expansion at x=1.
  66  *              erf(1+s) = erf(1) + s*Poly(s)
  67  *                       = 0.845.. + P1(s)/Q1(s)
  68  *         Note that |P1/Q1|< 0.078 for x in [0.84375,1.25]
  69  *
  70  *      3. For x in [1.25,1/0.35(~2.857143)],
  71  *      erfc(x) = (1/x)*exp(-x*x-0.5625+R1(z)/S1(z))
  72  *              z=1/x^2
  73  *      erf(x)  = 1 - erfc(x)
  74  *
  75  *      4. For x in [1/0.35,107]
  76  *      erfc(x) = (1/x)*exp(-x*x-0.5625+R2/S2) if x > 0
  77  *                      = 2.0 - (1/x)*exp(-x*x-0.5625+R2(z)/S2(z))
  78  *                             if -6.666<x<0
  79  *                      = 2.0 - tiny            (if x <= -6.666)
  80  *              z=1/x^2
  81  *      erf(x)  = sign(x)*(1.0 - erfc(x)) if x < 6.666, else
  82  *      erf(x)  = sign(x)*(1.0 - tiny)
  83  *      Note1:
  84  *         To compute exp(-x*x-0.5625+R/S), let s be a single
  85  *         precision number and s := x; then
  86  *              -x*x = -s*s + (s-x)*(s+x)
  87  *              exp(-x*x-0.5626+R/S) =
  88  *                      exp(-s*s-0.5625)*exp((s-x)*(s+x)+R/S);
  89  *      Note2:
  90  *         Here 4 and 5 make use of the asymptotic series
  91  *                        exp(-x*x)
  92  *              erfc(x) ~ ---------- * ( 1 + Poly(1/x^2) )
  93  *                        x*sqrt(pi)
  94  *
  95  *      5. For inf > x >= 107
  96  *      erf(x)  = sign(x) *(1 - tiny)  (raise inexact)
  97  *      erfc(x) = tiny*tiny (raise underflow) if x > 0
  98  *                      = 2 - tiny if x<0
  99  *
 100  *      7. Special case:
 101  *      erf(0)  = 0, erf(inf)  = 1, erf(-inf) = -1,
 102  *      erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2,
 103  *              erfc/erf(NaN) is NaN
 104  */
 105
 106
 107 #include "math.h"
 108 #include "math_private.h"
 109
 110 #ifdef __STDC__
 111 static const long double
 112 #else
 113 static long double
 114 #endif
 115 tiny = 1e-4931L,
 116   half = 0.5L,
 117   one = 1.0L,
 118   two = 2.0L,
 119         /* c = (float)0.84506291151 */
 120   erx = 0.845062911510467529296875L,
 121 /*
 122  * Coefficients for approximation to  erf on [0,0.84375]
 123  */
 124   /* 2/sqrt(pi) - 1 */
 125   efx = 1.2837916709551257389615890312154517168810E-1L,
 126   /* 8 * (2/sqrt(pi) - 1) */
 127   efx8 = 1.0270333367641005911692712249723613735048E0L,
 128
 129   pp[6] = {
 130     1.122751350964552113068262337278335028553E6L,
 131     -2.808533301997696164408397079650699163276E6L,
 132     -3.314325479115357458197119660818768924100E5L,
 133     -6.848684465326256109712135497895525446398E4L,
 134     -2.657817695110739185591505062971929859314E3L,
 135     -1.655310302737837556654146291646499062882E2L,
 136   },
 137
 138   qq[6] = {
 139     8.745588372054466262548908189000448124232E6L,
 140     3.746038264792471129367533128637019611485E6L,
 141     7.066358783162407559861156173539693900031E5L,
 142     7.448928604824620999413120955705448117056E4L,
 143     4.511583986730994111992253980546131408924E3L,
 144     1.368902937933296323345610240009071254014E2L,
 145     /* 1.000000000000000000000000000000000000000E0 */
 146   },
 147
 148 /*
 149  * Coefficients for approximation to  erf  in [0.84375,1.25]
 150  */
 151 /* erf(x+1) = 0.845062911510467529296875 + pa(x)/qa(x)
 152    -0.15625 <= x <= +.25
 153    Peak relative error 8.5e-22  */
 154
 155   pa[8] = {
 156     -1.076952146179812072156734957705102256059E0L,
 157      1.884814957770385593365179835059971587220E2L,
 158     -5.339153975012804282890066622962070115606E1L,
 159      4.435910679869176625928504532109635632618E1L,
 160      1.683219516032328828278557309642929135179E1L,
 161     -2.360236618396952560064259585299045804293E0L,
 162      1.852230047861891953244413872297940938041E0L,
 163      9.394994446747752308256773044667843200719E-2L,
 164   },
 165
 166   qa[7] =  {
 167     4.559263722294508998149925774781887811255E2L,
 168     3.289248982200800575749795055149780689738E2L,
 169     2.846070965875643009598627918383314457912E2L,
 170     1.398715859064535039433275722017479994465E2L,
 171     6.060190733759793706299079050985358190726E1L,
 172     2.078695677795422351040502569964299664233E1L,
 173     4.641271134150895940966798357442234498546E0L,
 174     /* 1.000000000000000000000000000000000000000E0 */
 175   },
 176
 177 /*
 178  * Coefficients for approximation to  erfc in [1.25,1/0.35]
 179  */
 180 /* erfc(1/x) = x exp (-1/x^2 - 0.5625 + ra(x^2)/sa(x^2))
 181    1/2.85711669921875 < 1/x < 1/1.25
 182    Peak relative error 3.1e-21  */
 183
 184     ra[] = {
 185       1.363566591833846324191000679620738857234E-1L,
 186       1.018203167219873573808450274314658434507E1L,
 187       1.862359362334248675526472871224778045594E2L,
 188       1.411622588180721285284945138667933330348E3L,
 189       5.088538459741511988784440103218342840478E3L,
 190       8.928251553922176506858267311750789273656E3L,
 191       7.264436000148052545243018622742770549982E3L,
 192       2.387492459664548651671894725748959751119E3L,
 193       2.220916652813908085449221282808458466556E2L,
 194     },
 195
 196     sa[] = {
 197       -1.382234625202480685182526402169222331847E1L,
 198       -3.315638835627950255832519203687435946482E2L,
 199       -2.949124863912936259747237164260785326692E3L,
 200       -1.246622099070875940506391433635999693661E4L,
 201       -2.673079795851665428695842853070996219632E4L,
 202       -2.880269786660559337358397106518918220991E4L,
 203       -1.450600228493968044773354186390390823713E4L,
 204       -2.874539731125893533960680525192064277816E3L,
 205       -1.402241261419067750237395034116942296027E2L,
 206       /* 1.000000000000000000000000000000000000000E0 */
 207     },
 208 /*
 209  * Coefficients for approximation to  erfc in [1/.35,107]
 210  */
 211 /* erfc(1/x) = x exp (-1/x^2 - 0.5625 + rb(x^2)/sb(x^2))
 212    1/6.6666259765625 < 1/x < 1/2.85711669921875
 213    Peak relative error 4.2e-22  */
 214     rb[] = {
 215       -4.869587348270494309550558460786501252369E-5L,
 216       -4.030199390527997378549161722412466959403E-3L,
 217       -9.434425866377037610206443566288917589122E-2L,
 218       -9.319032754357658601200655161585539404155E-1L,
 219       -4.273788174307459947350256581445442062291E0L,
 220       -8.842289940696150508373541814064198259278E0L,
 221       -7.069215249419887403187988144752613025255E0L,
 222       -1.401228723639514787920274427443330704764E0L,
 223     },
 224
 225     sb[] = {
 226       4.936254964107175160157544545879293019085E-3L,
 227       1.583457624037795744377163924895349412015E-1L,
 228       1.850647991850328356622940552450636420484E0L,
 229       9.927611557279019463768050710008450625415E0L,
 230       2.531667257649436709617165336779212114570E1L,
 231       2.869752886406743386458304052862814690045E1L,
 232       1.182059497870819562441683560749192539345E1L,
 233       /* 1.000000000000000000000000000000000000000E0 */
 234     },
 235 /* erfc(1/x) = x exp (-1/x^2 - 0.5625 + rc(x^2)/sc(x^2))
 236    1/107 <= 1/x <= 1/6.6666259765625
 237    Peak relative error 1.1e-21  */
 238     rc[] = {
 239       -8.299617545269701963973537248996670806850E-5L,
 240       -6.243845685115818513578933902532056244108E-3L,
 241       -1.141667210620380223113693474478394397230E-1L,
 242       -7.521343797212024245375240432734425789409E-1L,
 243       -1.765321928311155824664963633786967602934E0L,
 244       -1.029403473103215800456761180695263439188E0L,
 245     },
 246
 247     sc[] = {
 248       8.413244363014929493035952542677768808601E-3L,
 249       2.065114333816877479753334599639158060979E-1L,
 250       1.639064941530797583766364412782135680148E0L,
 251       4.936788463787115555582319302981666347450E0L,
 252       5.005177727208955487404729933261347679090E0L,
 253       /* 1.000000000000000000000000000000000000000E0 */
 254     };
 255
 256 #ifdef __STDC__
 257 long double
 258 __erfl (long double x)
 259 #else
 260 long double
 261 __erfl (x)
 262      long double x;
 263 #endif
 264 {
 265   long double R, S, P, Q, s, y, z, r;
 266   int32_t ix, i;
 267   u_int32_t se, i0, i1;
 268
 269   GET_LDOUBLE_WORDS (se, i0, i1, x);
 270   ix = se & 0x7fff;
 271
 272   if (ix >= 0x7fff)
 273     {                           /* erf(nan)=nan */
 274       i = ((se & 0xffff) >> 15) << 1;
 275       return (long double) (1 - i) + one / x;   /* erf(+-inf)=+-1 */
 276     }
 277
 278   ix = (ix << 16) | (i0 >> 16);
 279   if (ix < 0x3ffed800) /* |x|<0.84375 */
 280     {
 281       if (ix < 0x3fde8000) /* |x|<2**-33 */
 282         {
 283           if (ix < 0x00080000)
 284             return 0.125 * (8.0 * x + efx8 * x);        /*avoid underflow */
 285           return x + efx * x;
 286         }
 287       z = x * x;
 288       r = pp[0] + z * (pp[1]
 289           + z * (pp[2] + z * (pp[3] + z * (pp[4] + z * pp[5]))));
 290       s = qq[0] + z * (qq[1]
 291           + z * (qq[2] + z * (qq[3] + z * (qq[4] + z * (qq[5] + z)))));
 292       y = r / s;
 293       return x + x * y;
 294     }
 295   if (ix < 0x3fffa000) /* 1.25 */
 296     {                           /* 0.84375 <= |x| < 1.25 */
 297       s = fabsl (x) - one;
 298       P = pa[0] + s * (pa[1] + s * (pa[2]
 299         + s * (pa[3] + s * (pa[4] + s * (pa[5] + s * (pa[6] + s * pa[7]))))));
 300       Q = qa[0] + s * (qa[1] + s * (qa[2]
 301         + s * (qa[3] + s * (qa[4] + s * (qa[5] + s * (qa[6] + s))))));
 302       if ((se & 0x8000) == 0)
 303         return erx + P / Q;
 304       else
 305         return -erx - P / Q;
 306     }
 307   if (ix >= 0x4001d555) /* 6.6666259765625 */
 308     {                           /* inf>|x|>=6.666 */
 309       if ((se & 0x8000) == 0)
 310         return one - tiny;
 311       else
 312         return tiny - one;
 313     }
 314   x = fabsl (x);
 315   s = one / (x * x);
 316   if (ix < 0x4000b6db) /* 2.85711669921875 */
 317     {
 318       R = ra[0] + s * (ra[1] + s * (ra[2] + s * (ra[3] + s * (ra[4] +
 319           s * (ra[5] + s * (ra[6] + s * (ra[7] + s * ra[8])))))));
 320       S = sa[0] + s * (sa[1] + s * (sa[2] + s * (sa[3] + s * (sa[4] +
 321           s * (sa[5] + s * (sa[6] + s * (sa[7] + s * (sa[8] + s))))))));
 322     }
 323   else
 324     {                           /* |x| >= 1/0.35 */
 325       R = rb[0] + s * (rb[1] + s * (rb[2] + s * (rb[3] + s * (rb[4] +
 326          s * (rb[5] + s * (rb[6] + s * rb[7]))))));
 327       S = sb[0] + s * (sb[1] + s * (sb[2] + s * (sb[3] + s * (sb[4] +
 328          s * (sb[5] + s * (sb[6] + s))))));
 329     }
 330   z = x;
 331   GET_LDOUBLE_WORDS (i, i0, i1, z);
 332   i1 = 0;
 333   SET_LDOUBLE_WORDS (z, i, i0, i1);
 334   r =
 335     __ieee754_expl (-z * z - 0.5625) * __ieee754_expl ((z - x) * (z + x) +
 336                                                      R / S);
 337   if ((se & 0x8000) == 0)
 338     return one - r / x;
 339   else
 340     return r / x - one;
 341 }
 342
 343 weak_alias (__erfl, erfl)
 344 #ifdef __STDC__
 345      long double
 346      __erfcl (long double x)
 347 #else
 348      long double
 349      __erfcl (x)
 350      long double x;
 351 #endif
 352 {
 353   int32_t hx, ix;
 354   long double R, S, P, Q, s, y, z, r;
 355   u_int32_t se, i0, i1;
 356
 357   GET_LDOUBLE_WORDS (se, i0, i1, x);
 358   ix = se & 0x7fff;
 359   if (ix >= 0x7fff)
 360     {                           /* erfc(nan)=nan */
 361       /* erfc(+-inf)=0,2 */
 362       return (long double) (((se & 0xffff) >> 15) << 1) + one / x;
 363     }
 364
 365   ix = (ix << 16) | (i0 >> 16);
 366   if (ix < 0x3ffed800) /* |x|<0.84375 */
 367     {
 368       if (ix < 0x3fbe0000) /* |x|<2**-65 */
 369         return one - x;
 370       z = x * x;
 371       r = pp[0] + z * (pp[1]
 372           + z * (pp[2] + z * (pp[3] + z * (pp[4] + z * pp[5]))));
 373       s = qq[0] + z * (qq[1]
 374           + z * (qq[2] + z * (qq[3] + z * (qq[4] + z * (qq[5] + z)))));
 375       y = r / s;
 376       if (ix < 0x3ffd8000) /* x<1/4 */
 377         {
 378           return one - (x + x * y);
 379         }
 380       else
 381         {
 382           r = x * y;
 383           r += (x - half);
 384           return half - r;
 385         }
 386     }
 387   if (ix < 0x3fffa000) /* 1.25 */
 388     {                           /* 0.84375 <= |x| < 1.25 */
 389       s = fabsl (x) - one;
 390       P = pa[0] + s * (pa[1] + s * (pa[2]
 391         + s * (pa[3] + s * (pa[4] + s * (pa[5] + s * (pa[6] + s * pa[7]))))));
 392       Q = qa[0] + s * (qa[1] + s * (qa[2]
 393         + s * (qa[3] + s * (qa[4] + s * (qa[5] + s * (qa[6] + s))))));
 394       if ((se & 0x8000) == 0)
 395         {
 396           z = one - erx;
 397           return z - P / Q;
 398         }
 399       else
 400         {
 401           z = erx + P / Q;
 402           return one + z;
 403         }
 404     }
 405   if (ix < 0x4005d600) /* 107 */
 406     {                           /* |x|<107 */
 407       x = fabsl (x);
 408       s = one / (x * x);
 409       if (ix < 0x4000b6db) /* 2.85711669921875 */
 410         {                       /* |x| < 1/.35 ~ 2.857143 */
 411           R = ra[0] + s * (ra[1] + s * (ra[2] + s * (ra[3] + s * (ra[4] +
 412               s * (ra[5] + s * (ra[6] + s * (ra[7] + s * ra[8])))))));
 413           S = sa[0] + s * (sa[1] + s * (sa[2] + s * (sa[3] + s * (sa[4] +
 414               s * (sa[5] + s * (sa[6] + s * (sa[7] + s * (sa[8] + s))))))));
 415         }
 416       else if (ix < 0x4001d555) /* 6.6666259765625 */
 417         {                       /* 6.666 > |x| >= 1/.35 ~ 2.857143 */
 418           R = rb[0] + s * (rb[1] + s * (rb[2] + s * (rb[3] + s * (rb[4] +
 419               s * (rb[5] + s * (rb[6] + s * rb[7]))))));
 420           S = sb[0] + s * (sb[1] + s * (sb[2] + s * (sb[3] + s * (sb[4] +
 421               s * (sb[5] + s * (sb[6] + s))))));
 422         }
 423       else
 424         {                       /* |x| >= 6.666 */
 425           if (se & 0x8000)
 426             return two - tiny;  /* x < -6.666 */
 427
 428           R = rc[0] + s * (rc[1] + s * (rc[2] + s * (rc[3] +
 429                                                     s * (rc[4] + s * rc[5]))));
 430           S = sc[0] + s * (sc[1] + s * (sc[2] + s * (sc[3] +
 431                                                     s * (sc[4] + s))));
 432         }
 433       z = x;
 434       GET_LDOUBLE_WORDS (hx, i0, i1, z);
 435       i1 = 0;
 436       i0 &= 0xffffff00;
 437       SET_LDOUBLE_WORDS (z, hx, i0, i1);
 438       r = __ieee754_expl (-z * z - 0.5625) *
 439         __ieee754_expl ((z - x) * (z + x) + R / S);
 440       if ((se & 0x8000) == 0)
 441         return r / x;
 442       else
 443         return two - r / x;
 444     }
 445   else
 446     {
 447       if ((se & 0x8000) == 0)
 448         return tiny * tiny;
 449       else
 450         return two - tiny;
 451     }
 452 }
 453
 454 weak_alias (__erfcl, erfcl)