More strict check of AVX512 support in assembler.
[glibc.git] / sysdeps / ieee754 / ldbl-96 / s_erfl.c
blobc27de812ca23a7f26aeb32cc5cc2b1116b7bf34c
1 /*
2 * ====================================================
3 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
5 * Developed at SunPro, a Sun Microsystems, Inc. business.
6 * Permission to use, copy, modify, and distribute this
7 * software is freely granted, provided that this notice
8 * is preserved.
9 * ====================================================
12 /* Long double expansions are
13 Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
14 and are incorporated herein by permission of the author. The author
15 reserves the right to distribute this material elsewhere under different
16 copying permissions. These modifications are distributed here under
17 the following terms:
19 This library is free software; you can redistribute it and/or
20 modify it under the terms of the GNU Lesser General Public
21 License as published by the Free Software Foundation; either
22 version 2.1 of the License, or (at your option) any later version.
24 This library is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27 Lesser General Public License for more details.
29 You should have received a copy of the GNU Lesser General Public
30 License along with this library; if not, see
31 <http://www.gnu.org/licenses/>. */
33 /* double erf(double x)
34 * double erfc(double x)
35 * x
36 * 2 |\
37 * erf(x) = --------- | exp(-t*t)dt
38 * sqrt(pi) \|
39 * 0
41 * erfc(x) = 1-erf(x)
42 * Note that
43 * erf(-x) = -erf(x)
44 * erfc(-x) = 2 - erfc(x)
46 * Method:
47 * 1. For |x| in [0, 0.84375]
48 * erf(x) = x + x*R(x^2)
49 * erfc(x) = 1 - erf(x) if x in [-.84375,0.25]
50 * = 0.5 + ((0.5-x)-x*R) if x in [0.25,0.84375]
51 * Remark. The formula is derived by noting
52 * erf(x) = (2/sqrt(pi))*(x - x^3/3 + x^5/10 - x^7/42 + ....)
53 * and that
54 * 2/sqrt(pi) = 1.128379167095512573896158903121545171688
55 * is close to one. The interval is chosen because the fix
56 * point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is
57 * near 0.6174), and by some experiment, 0.84375 is chosen to
58 * guarantee the error is less than one ulp for erf.
60 * 2. For |x| in [0.84375,1.25], let s = |x| - 1, and
61 * c = 0.84506291151 rounded to single (24 bits)
62 * erf(x) = sign(x) * (c + P1(s)/Q1(s))
63 * erfc(x) = (1-c) - P1(s)/Q1(s) if x > 0
64 * 1+(c+P1(s)/Q1(s)) if x < 0
65 * Remark: here we use the taylor series expansion at x=1.
66 * erf(1+s) = erf(1) + s*Poly(s)
67 * = 0.845.. + P1(s)/Q1(s)
68 * Note that |P1/Q1|< 0.078 for x in [0.84375,1.25]
70 * 3. For x in [1.25,1/0.35(~2.857143)],
71 * erfc(x) = (1/x)*exp(-x*x-0.5625+R1(z)/S1(z))
72 * z=1/x^2
73 * erf(x) = 1 - erfc(x)
75 * 4. For x in [1/0.35,107]
76 * erfc(x) = (1/x)*exp(-x*x-0.5625+R2/S2) if x > 0
77 * = 2.0 - (1/x)*exp(-x*x-0.5625+R2(z)/S2(z))
78 * if -6.666<x<0
79 * = 2.0 - tiny (if x <= -6.666)
80 * z=1/x^2
81 * erf(x) = sign(x)*(1.0 - erfc(x)) if x < 6.666, else
82 * erf(x) = sign(x)*(1.0 - tiny)
83 * Note1:
84 * To compute exp(-x*x-0.5625+R/S), let s be a single
85 * precision number and s := x; then
86 * -x*x = -s*s + (s-x)*(s+x)
87 * exp(-x*x-0.5626+R/S) =
88 * exp(-s*s-0.5625)*exp((s-x)*(s+x)+R/S);
89 * Note2:
90 * Here 4 and 5 make use of the asymptotic series
91 * exp(-x*x)
92 * erfc(x) ~ ---------- * ( 1 + Poly(1/x^2) )
93 * x*sqrt(pi)
95 * 5. For inf > x >= 107
96 * erf(x) = sign(x) *(1 - tiny) (raise inexact)
97 * erfc(x) = tiny*tiny (raise underflow) if x > 0
98 * = 2 - tiny if x<0
100 * 7. Special case:
101 * erf(0) = 0, erf(inf) = 1, erf(-inf) = -1,
102 * erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2,
103 * erfc/erf(NaN) is NaN
107 #include <errno.h>
108 #include <float.h>
109 #include <math.h>
110 #include <math_private.h>
112 static const long double
113 tiny = 1e-4931L,
114 half = 0.5L,
115 one = 1.0L,
116 two = 2.0L,
117 /* c = (float)0.84506291151 */
118 erx = 0.845062911510467529296875L,
120 * Coefficients for approximation to erf on [0,0.84375]
122 /* 2/sqrt(pi) - 1 */
123 efx = 1.2837916709551257389615890312154517168810E-1L,
125 pp[6] = {
126 1.122751350964552113068262337278335028553E6L,
127 -2.808533301997696164408397079650699163276E6L,
128 -3.314325479115357458197119660818768924100E5L,
129 -6.848684465326256109712135497895525446398E4L,
130 -2.657817695110739185591505062971929859314E3L,
131 -1.655310302737837556654146291646499062882E2L,
134 qq[6] = {
135 8.745588372054466262548908189000448124232E6L,
136 3.746038264792471129367533128637019611485E6L,
137 7.066358783162407559861156173539693900031E5L,
138 7.448928604824620999413120955705448117056E4L,
139 4.511583986730994111992253980546131408924E3L,
140 1.368902937933296323345610240009071254014E2L,
141 /* 1.000000000000000000000000000000000000000E0 */
145 * Coefficients for approximation to erf in [0.84375,1.25]
147 /* erf(x+1) = 0.845062911510467529296875 + pa(x)/qa(x)
148 -0.15625 <= x <= +.25
149 Peak relative error 8.5e-22 */
151 pa[8] = {
152 -1.076952146179812072156734957705102256059E0L,
153 1.884814957770385593365179835059971587220E2L,
154 -5.339153975012804282890066622962070115606E1L,
155 4.435910679869176625928504532109635632618E1L,
156 1.683219516032328828278557309642929135179E1L,
157 -2.360236618396952560064259585299045804293E0L,
158 1.852230047861891953244413872297940938041E0L,
159 9.394994446747752308256773044667843200719E-2L,
162 qa[7] = {
163 4.559263722294508998149925774781887811255E2L,
164 3.289248982200800575749795055149780689738E2L,
165 2.846070965875643009598627918383314457912E2L,
166 1.398715859064535039433275722017479994465E2L,
167 6.060190733759793706299079050985358190726E1L,
168 2.078695677795422351040502569964299664233E1L,
169 4.641271134150895940966798357442234498546E0L,
170 /* 1.000000000000000000000000000000000000000E0 */
174 * Coefficients for approximation to erfc in [1.25,1/0.35]
176 /* erfc(1/x) = x exp (-1/x^2 - 0.5625 + ra(x^2)/sa(x^2))
177 1/2.85711669921875 < 1/x < 1/1.25
178 Peak relative error 3.1e-21 */
180 ra[] = {
181 1.363566591833846324191000679620738857234E-1L,
182 1.018203167219873573808450274314658434507E1L,
183 1.862359362334248675526472871224778045594E2L,
184 1.411622588180721285284945138667933330348E3L,
185 5.088538459741511988784440103218342840478E3L,
186 8.928251553922176506858267311750789273656E3L,
187 7.264436000148052545243018622742770549982E3L,
188 2.387492459664548651671894725748959751119E3L,
189 2.220916652813908085449221282808458466556E2L,
192 sa[] = {
193 -1.382234625202480685182526402169222331847E1L,
194 -3.315638835627950255832519203687435946482E2L,
195 -2.949124863912936259747237164260785326692E3L,
196 -1.246622099070875940506391433635999693661E4L,
197 -2.673079795851665428695842853070996219632E4L,
198 -2.880269786660559337358397106518918220991E4L,
199 -1.450600228493968044773354186390390823713E4L,
200 -2.874539731125893533960680525192064277816E3L,
201 -1.402241261419067750237395034116942296027E2L,
202 /* 1.000000000000000000000000000000000000000E0 */
205 * Coefficients for approximation to erfc in [1/.35,107]
207 /* erfc(1/x) = x exp (-1/x^2 - 0.5625 + rb(x^2)/sb(x^2))
208 1/6.6666259765625 < 1/x < 1/2.85711669921875
209 Peak relative error 4.2e-22 */
210 rb[] = {
211 -4.869587348270494309550558460786501252369E-5L,
212 -4.030199390527997378549161722412466959403E-3L,
213 -9.434425866377037610206443566288917589122E-2L,
214 -9.319032754357658601200655161585539404155E-1L,
215 -4.273788174307459947350256581445442062291E0L,
216 -8.842289940696150508373541814064198259278E0L,
217 -7.069215249419887403187988144752613025255E0L,
218 -1.401228723639514787920274427443330704764E0L,
221 sb[] = {
222 4.936254964107175160157544545879293019085E-3L,
223 1.583457624037795744377163924895349412015E-1L,
224 1.850647991850328356622940552450636420484E0L,
225 9.927611557279019463768050710008450625415E0L,
226 2.531667257649436709617165336779212114570E1L,
227 2.869752886406743386458304052862814690045E1L,
228 1.182059497870819562441683560749192539345E1L,
229 /* 1.000000000000000000000000000000000000000E0 */
231 /* erfc(1/x) = x exp (-1/x^2 - 0.5625 + rc(x^2)/sc(x^2))
232 1/107 <= 1/x <= 1/6.6666259765625
233 Peak relative error 1.1e-21 */
234 rc[] = {
235 -8.299617545269701963973537248996670806850E-5L,
236 -6.243845685115818513578933902532056244108E-3L,
237 -1.141667210620380223113693474478394397230E-1L,
238 -7.521343797212024245375240432734425789409E-1L,
239 -1.765321928311155824664963633786967602934E0L,
240 -1.029403473103215800456761180695263439188E0L,
243 sc[] = {
244 8.413244363014929493035952542677768808601E-3L,
245 2.065114333816877479753334599639158060979E-1L,
246 1.639064941530797583766364412782135680148E0L,
247 4.936788463787115555582319302981666347450E0L,
248 5.005177727208955487404729933261347679090E0L,
249 /* 1.000000000000000000000000000000000000000E0 */
252 long double
253 __erfl (long double x)
255 long double R, S, P, Q, s, y, z, r;
256 int32_t ix, i;
257 u_int32_t se, i0, i1;
259 GET_LDOUBLE_WORDS (se, i0, i1, x);
260 ix = se & 0x7fff;
262 if (ix >= 0x7fff)
263 { /* erf(nan)=nan */
264 i = ((se & 0xffff) >> 15) << 1;
265 return (long double) (1 - i) + one / x; /* erf(+-inf)=+-1 */
268 ix = (ix << 16) | (i0 >> 16);
269 if (ix < 0x3ffed800) /* |x|<0.84375 */
271 if (ix < 0x3fde8000) /* |x|<2**-33 */
273 if (ix < 0x00080000)
275 /* Avoid spurious underflow. */
276 long double ret = 0.0625 * (16.0 * x + (16.0 * efx) * x);
277 if (fabsl (ret) < LDBL_MIN)
279 long double force_underflow = ret * ret;
280 math_force_eval (force_underflow);
282 return ret;
284 return x + efx * x;
286 z = x * x;
287 r = pp[0] + z * (pp[1]
288 + z * (pp[2] + z * (pp[3] + z * (pp[4] + z * pp[5]))));
289 s = qq[0] + z * (qq[1]
290 + z * (qq[2] + z * (qq[3] + z * (qq[4] + z * (qq[5] + z)))));
291 y = r / s;
292 return x + x * y;
294 if (ix < 0x3fffa000) /* 1.25 */
295 { /* 0.84375 <= |x| < 1.25 */
296 s = fabsl (x) - one;
297 P = pa[0] + s * (pa[1] + s * (pa[2]
298 + s * (pa[3] + s * (pa[4] + s * (pa[5] + s * (pa[6] + s * pa[7]))))));
299 Q = qa[0] + s * (qa[1] + s * (qa[2]
300 + s * (qa[3] + s * (qa[4] + s * (qa[5] + s * (qa[6] + s))))));
301 if ((se & 0x8000) == 0)
302 return erx + P / Q;
303 else
304 return -erx - P / Q;
306 if (ix >= 0x4001d555) /* 6.6666259765625 */
307 { /* inf>|x|>=6.666 */
308 if ((se & 0x8000) == 0)
309 return one - tiny;
310 else
311 return tiny - one;
313 x = fabsl (x);
314 s = one / (x * x);
315 if (ix < 0x4000b6db) /* 2.85711669921875 */
317 R = ra[0] + s * (ra[1] + s * (ra[2] + s * (ra[3] + s * (ra[4] +
318 s * (ra[5] + s * (ra[6] + s * (ra[7] + s * ra[8])))))));
319 S = sa[0] + s * (sa[1] + s * (sa[2] + s * (sa[3] + s * (sa[4] +
320 s * (sa[5] + s * (sa[6] + s * (sa[7] + s * (sa[8] + s))))))));
322 else
323 { /* |x| >= 1/0.35 */
324 R = rb[0] + s * (rb[1] + s * (rb[2] + s * (rb[3] + s * (rb[4] +
325 s * (rb[5] + s * (rb[6] + s * rb[7]))))));
326 S = sb[0] + s * (sb[1] + s * (sb[2] + s * (sb[3] + s * (sb[4] +
327 s * (sb[5] + s * (sb[6] + s))))));
329 z = x;
330 GET_LDOUBLE_WORDS (i, i0, i1, z);
331 i1 = 0;
332 SET_LDOUBLE_WORDS (z, i, i0, i1);
334 __ieee754_expl (-z * z - 0.5625) * __ieee754_expl ((z - x) * (z + x) +
335 R / S);
336 if ((se & 0x8000) == 0)
337 return one - r / x;
338 else
339 return r / x - one;
342 weak_alias (__erfl, erfl)
343 long double
344 __erfcl (long double x)
346 int32_t hx, ix;
347 long double R, S, P, Q, s, y, z, r;
348 u_int32_t se, i0, i1;
350 GET_LDOUBLE_WORDS (se, i0, i1, x);
351 ix = se & 0x7fff;
352 if (ix >= 0x7fff)
353 { /* erfc(nan)=nan */
354 /* erfc(+-inf)=0,2 */
355 return (long double) (((se & 0xffff) >> 15) << 1) + one / x;
358 ix = (ix << 16) | (i0 >> 16);
359 if (ix < 0x3ffed800) /* |x|<0.84375 */
361 if (ix < 0x3fbe0000) /* |x|<2**-65 */
362 return one - x;
363 z = x * x;
364 r = pp[0] + z * (pp[1]
365 + z * (pp[2] + z * (pp[3] + z * (pp[4] + z * pp[5]))));
366 s = qq[0] + z * (qq[1]
367 + z * (qq[2] + z * (qq[3] + z * (qq[4] + z * (qq[5] + z)))));
368 y = r / s;
369 if (ix < 0x3ffd8000) /* x<1/4 */
371 return one - (x + x * y);
373 else
375 r = x * y;
376 r += (x - half);
377 return half - r;
380 if (ix < 0x3fffa000) /* 1.25 */
381 { /* 0.84375 <= |x| < 1.25 */
382 s = fabsl (x) - one;
383 P = pa[0] + s * (pa[1] + s * (pa[2]
384 + s * (pa[3] + s * (pa[4] + s * (pa[5] + s * (pa[6] + s * pa[7]))))));
385 Q = qa[0] + s * (qa[1] + s * (qa[2]
386 + s * (qa[3] + s * (qa[4] + s * (qa[5] + s * (qa[6] + s))))));
387 if ((se & 0x8000) == 0)
389 z = one - erx;
390 return z - P / Q;
392 else
394 z = erx + P / Q;
395 return one + z;
398 if (ix < 0x4005d600) /* 107 */
399 { /* |x|<107 */
400 x = fabsl (x);
401 s = one / (x * x);
402 if (ix < 0x4000b6db) /* 2.85711669921875 */
403 { /* |x| < 1/.35 ~ 2.857143 */
404 R = ra[0] + s * (ra[1] + s * (ra[2] + s * (ra[3] + s * (ra[4] +
405 s * (ra[5] + s * (ra[6] + s * (ra[7] + s * ra[8])))))));
406 S = sa[0] + s * (sa[1] + s * (sa[2] + s * (sa[3] + s * (sa[4] +
407 s * (sa[5] + s * (sa[6] + s * (sa[7] + s * (sa[8] + s))))))));
409 else if (ix < 0x4001d555) /* 6.6666259765625 */
410 { /* 6.666 > |x| >= 1/.35 ~ 2.857143 */
411 R = rb[0] + s * (rb[1] + s * (rb[2] + s * (rb[3] + s * (rb[4] +
412 s * (rb[5] + s * (rb[6] + s * rb[7]))))));
413 S = sb[0] + s * (sb[1] + s * (sb[2] + s * (sb[3] + s * (sb[4] +
414 s * (sb[5] + s * (sb[6] + s))))));
416 else
417 { /* |x| >= 6.666 */
418 if (se & 0x8000)
419 return two - tiny; /* x < -6.666 */
421 R = rc[0] + s * (rc[1] + s * (rc[2] + s * (rc[3] +
422 s * (rc[4] + s * rc[5]))));
423 S = sc[0] + s * (sc[1] + s * (sc[2] + s * (sc[3] +
424 s * (sc[4] + s))));
426 z = x;
427 GET_LDOUBLE_WORDS (hx, i0, i1, z);
428 i1 = 0;
429 i0 &= 0xffffff00;
430 SET_LDOUBLE_WORDS (z, hx, i0, i1);
431 r = __ieee754_expl (-z * z - 0.5625) *
432 __ieee754_expl ((z - x) * (z + x) + R / S);
433 if ((se & 0x8000) == 0)
435 long double ret = r / x;
436 if (ret == 0)
437 __set_errno (ERANGE);
438 return ret;
440 else
441 return two - r / x;
443 else
445 if ((se & 0x8000) == 0)
447 __set_errno (ERANGE);
448 return tiny * tiny;
450 else
451 return two - tiny;
455 weak_alias (__erfcl, erfcl)