More strict check of AVX512 support in assembler.
[glibc.git] / sysdeps / ieee754 / ldbl-96 / e_lgammal_r.c
blob0cc35f925223985185551c7c379fa0f8e777726a
1 /*
2 * ====================================================
3 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
5 * Developed at SunPro, a Sun Microsystems, Inc. business.
6 * Permission to use, copy, modify, and distribute this
7 * software is freely granted, provided that this notice
8 * is preserved.
9 * ====================================================
12 /* Long double expansions are
13 Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
14 and are incorporated herein by permission of the author. The author
15 reserves the right to distribute this material elsewhere under different
16 copying permissions. These modifications are distributed here under
17 the following terms:
19 This library is free software; you can redistribute it and/or
20 modify it under the terms of the GNU Lesser General Public
21 License as published by the Free Software Foundation; either
22 version 2.1 of the License, or (at your option) any later version.
24 This library is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27 Lesser General Public License for more details.
29 You should have received a copy of the GNU Lesser General Public
30 License along with this library; if not, see
31 <http://www.gnu.org/licenses/>. */
33 /* __ieee754_lgammal_r(x, signgamp)
34 * Reentrant version of the logarithm of the Gamma function
35 * with user provide pointer for the sign of Gamma(x).
37 * Method:
38 * 1. Argument Reduction for 0 < x <= 8
39 * Since gamma(1+s)=s*gamma(s), for x in [0,8], we may
40 * reduce x to a number in [1.5,2.5] by
41 * lgamma(1+s) = log(s) + lgamma(s)
42 * for example,
43 * lgamma(7.3) = log(6.3) + lgamma(6.3)
44 * = log(6.3*5.3) + lgamma(5.3)
45 * = log(6.3*5.3*4.3*3.3*2.3) + lgamma(2.3)
46 * 2. Polynomial approximation of lgamma around its
47 * minimun ymin=1.461632144968362245 to maintain monotonicity.
48 * On [ymin-0.23, ymin+0.27] (i.e., [1.23164,1.73163]), use
49 * Let z = x-ymin;
50 * lgamma(x) = -1.214862905358496078218 + z^2*poly(z)
51 * 2. Rational approximation in the primary interval [2,3]
52 * We use the following approximation:
53 * s = x-2.0;
54 * lgamma(x) = 0.5*s + s*P(s)/Q(s)
55 * Our algorithms are based on the following observation
57 * zeta(2)-1 2 zeta(3)-1 3
58 * lgamma(2+s) = s*(1-Euler) + --------- * s - --------- * s + ...
59 * 2 3
61 * where Euler = 0.5771... is the Euler constant, which is very
62 * close to 0.5.
64 * 3. For x>=8, we have
65 * lgamma(x)~(x-0.5)log(x)-x+0.5*log(2pi)+1/(12x)-1/(360x**3)+....
66 * (better formula:
67 * lgamma(x)~(x-0.5)*(log(x)-1)-.5*(log(2pi)-1) + ...)
68 * Let z = 1/x, then we approximation
69 * f(z) = lgamma(x) - (x-0.5)(log(x)-1)
70 * by
71 * 3 5 11
72 * w = w0 + w1*z + w2*z + w3*z + ... + w6*z
74 * 4. For negative x, since (G is gamma function)
75 * -x*G(-x)*G(x) = pi/sin(pi*x),
76 * we have
77 * G(x) = pi/(sin(pi*x)*(-x)*G(-x))
78 * since G(-x) is positive, sign(G(x)) = sign(sin(pi*x)) for x<0
79 * Hence, for x<0, signgam = sign(sin(pi*x)) and
80 * lgamma(x) = log(|Gamma(x)|)
81 * = log(pi/(|x*sin(pi*x)|)) - lgamma(-x);
82 * Note: one should avoid compute pi*(-x) directly in the
83 * computation of sin(pi*(-x)).
85 * 5. Special Cases
86 * lgamma(2+s) ~ s*(1-Euler) for tiny s
87 * lgamma(1)=lgamma(2)=0
88 * lgamma(x) ~ -log(x) for tiny x
89 * lgamma(0) = lgamma(inf) = inf
90 * lgamma(-integer) = +-inf
94 #include <libc-internal.h>
95 #include <math.h>
96 #include <math_private.h>
98 static const long double
99 half = 0.5L,
100 one = 1.0L,
101 pi = 3.14159265358979323846264L,
102 two63 = 9.223372036854775808e18L,
104 /* lgam(1+x) = 0.5 x + x a(x)/b(x)
105 -0.268402099609375 <= x <= 0
106 peak relative error 6.6e-22 */
107 a0 = -6.343246574721079391729402781192128239938E2L,
108 a1 = 1.856560238672465796768677717168371401378E3L,
109 a2 = 2.404733102163746263689288466865843408429E3L,
110 a3 = 8.804188795790383497379532868917517596322E2L,
111 a4 = 1.135361354097447729740103745999661157426E2L,
112 a5 = 3.766956539107615557608581581190400021285E0L,
114 b0 = 8.214973713960928795704317259806842490498E3L,
115 b1 = 1.026343508841367384879065363925870888012E4L,
116 b2 = 4.553337477045763320522762343132210919277E3L,
117 b3 = 8.506975785032585797446253359230031874803E2L,
118 b4 = 6.042447899703295436820744186992189445813E1L,
119 /* b5 = 1.000000000000000000000000000000000000000E0 */
122 tc = 1.4616321449683623412626595423257213284682E0L,
123 tf = -1.2148629053584961146050602565082954242826E-1,/* double precision */
124 /* tt = (tail of tf), i.e. tf + tt has extended precision. */
125 tt = 3.3649914684731379602768989080467587736363E-18L,
126 /* lgam ( 1.4616321449683623412626595423257213284682E0 ) =
127 -1.2148629053584960809551455717769158215135617312999903886372437313313530E-1 */
129 /* lgam (x + tc) = tf + tt + x g(x)/h(x)
130 - 0.230003726999612341262659542325721328468 <= x
131 <= 0.2699962730003876587373404576742786715318
132 peak relative error 2.1e-21 */
133 g0 = 3.645529916721223331888305293534095553827E-18L,
134 g1 = 5.126654642791082497002594216163574795690E3L,
135 g2 = 8.828603575854624811911631336122070070327E3L,
136 g3 = 5.464186426932117031234820886525701595203E3L,
137 g4 = 1.455427403530884193180776558102868592293E3L,
138 g5 = 1.541735456969245924860307497029155838446E2L,
139 g6 = 4.335498275274822298341872707453445815118E0L,
141 h0 = 1.059584930106085509696730443974495979641E4L,
142 h1 = 2.147921653490043010629481226937850618860E4L,
143 h2 = 1.643014770044524804175197151958100656728E4L,
144 h3 = 5.869021995186925517228323497501767586078E3L,
145 h4 = 9.764244777714344488787381271643502742293E2L,
146 h5 = 6.442485441570592541741092969581997002349E1L,
147 /* h6 = 1.000000000000000000000000000000000000000E0 */
150 /* lgam (x+1) = -0.5 x + x u(x)/v(x)
151 -0.100006103515625 <= x <= 0.231639862060546875
152 peak relative error 1.3e-21 */
153 u0 = -8.886217500092090678492242071879342025627E1L,
154 u1 = 6.840109978129177639438792958320783599310E2L,
155 u2 = 2.042626104514127267855588786511809932433E3L,
156 u3 = 1.911723903442667422201651063009856064275E3L,
157 u4 = 7.447065275665887457628865263491667767695E2L,
158 u5 = 1.132256494121790736268471016493103952637E2L,
159 u6 = 4.484398885516614191003094714505960972894E0L,
161 v0 = 1.150830924194461522996462401210374632929E3L,
162 v1 = 3.399692260848747447377972081399737098610E3L,
163 v2 = 3.786631705644460255229513563657226008015E3L,
164 v3 = 1.966450123004478374557778781564114347876E3L,
165 v4 = 4.741359068914069299837355438370682773122E2L,
166 v5 = 4.508989649747184050907206782117647852364E1L,
167 /* v6 = 1.000000000000000000000000000000000000000E0 */
170 /* lgam (x+2) = .5 x + x s(x)/r(x)
171 0 <= x <= 1
172 peak relative error 7.2e-22 */
173 s0 = 1.454726263410661942989109455292824853344E6L,
174 s1 = -3.901428390086348447890408306153378922752E6L,
175 s2 = -6.573568698209374121847873064292963089438E6L,
176 s3 = -3.319055881485044417245964508099095984643E6L,
177 s4 = -7.094891568758439227560184618114707107977E5L,
178 s5 = -6.263426646464505837422314539808112478303E4L,
179 s6 = -1.684926520999477529949915657519454051529E3L,
181 r0 = -1.883978160734303518163008696712983134698E7L,
182 r1 = -2.815206082812062064902202753264922306830E7L,
183 r2 = -1.600245495251915899081846093343626358398E7L,
184 r3 = -4.310526301881305003489257052083370058799E6L,
185 r4 = -5.563807682263923279438235987186184968542E5L,
186 r5 = -3.027734654434169996032905158145259713083E4L,
187 r6 = -4.501995652861105629217250715790764371267E2L,
188 /* r6 = 1.000000000000000000000000000000000000000E0 */
191 /* lgam(x) = ( x - 0.5 ) * log(x) - x + LS2PI + 1/x w(1/x^2)
192 x >= 8
193 Peak relative error 1.51e-21
194 w0 = LS2PI - 0.5 */
195 w0 = 4.189385332046727417803e-1L,
196 w1 = 8.333333333333331447505E-2L,
197 w2 = -2.777777777750349603440E-3L,
198 w3 = 7.936507795855070755671E-4L,
199 w4 = -5.952345851765688514613E-4L,
200 w5 = 8.412723297322498080632E-4L,
201 w6 = -1.880801938119376907179E-3L,
202 w7 = 4.885026142432270781165E-3L;
204 static const long double zero = 0.0L;
206 static long double
207 sin_pi (long double x)
209 long double y, z;
210 int n, ix;
211 u_int32_t se, i0, i1;
213 GET_LDOUBLE_WORDS (se, i0, i1, x);
214 ix = se & 0x7fff;
215 ix = (ix << 16) | (i0 >> 16);
216 if (ix < 0x3ffd8000) /* 0.25 */
217 return __sinl (pi * x);
218 y = -x; /* x is assume negative */
221 * argument reduction, make sure inexact flag not raised if input
222 * is an integer
224 z = __floorl (y);
225 if (z != y)
226 { /* inexact anyway */
227 y *= 0.5;
228 y = 2.0*(y - __floorl(y)); /* y = |x| mod 2.0 */
229 n = (int) (y*4.0);
231 else
233 if (ix >= 0x403f8000) /* 2^64 */
235 y = zero; n = 0; /* y must be even */
237 else
239 if (ix < 0x403e8000) /* 2^63 */
240 z = y + two63; /* exact */
241 GET_LDOUBLE_WORDS (se, i0, i1, z);
242 n = i1 & 1;
243 y = n;
244 n <<= 2;
248 switch (n)
250 case 0:
251 y = __sinl (pi * y);
252 break;
253 case 1:
254 case 2:
255 y = __cosl (pi * (half - y));
256 break;
257 case 3:
258 case 4:
259 y = __sinl (pi * (one - y));
260 break;
261 case 5:
262 case 6:
263 y = -__cosl (pi * (y - 1.5));
264 break;
265 default:
266 y = __sinl (pi * (y - 2.0));
267 break;
269 return -y;
273 long double
274 __ieee754_lgammal_r (long double x, int *signgamp)
276 long double t, y, z, nadj, p, p1, p2, q, r, w;
277 int i, ix;
278 u_int32_t se, i0, i1;
280 *signgamp = 1;
281 GET_LDOUBLE_WORDS (se, i0, i1, x);
282 ix = se & 0x7fff;
284 if (__builtin_expect((ix | i0 | i1) == 0, 0))
286 if (se & 0x8000)
287 *signgamp = -1;
288 return one / fabsl (x);
291 ix = (ix << 16) | (i0 >> 16);
293 /* purge off +-inf, NaN, +-0, and negative arguments */
294 if (__builtin_expect(ix >= 0x7fff0000, 0))
295 return x * x;
297 if (__builtin_expect(ix < 0x3fc08000, 0)) /* 2^-63 */
298 { /* |x|<2**-63, return -log(|x|) */
299 if (se & 0x8000)
301 *signgamp = -1;
302 return -__ieee754_logl (-x);
304 else
305 return -__ieee754_logl (x);
307 if (se & 0x8000)
309 t = sin_pi (x);
310 if (t == zero)
311 return one / fabsl (t); /* -integer */
312 nadj = __ieee754_logl (pi / fabsl (t * x));
313 if (t < zero)
314 *signgamp = -1;
315 x = -x;
318 /* purge off 1 and 2 */
319 if ((((ix - 0x3fff8000) | i0 | i1) == 0)
320 || (((ix - 0x40008000) | i0 | i1) == 0))
321 r = 0;
322 else if (ix < 0x40008000) /* 2.0 */
324 /* x < 2.0 */
325 if (ix <= 0x3ffee666) /* 8.99993896484375e-1 */
327 /* lgamma(x) = lgamma(x+1) - log(x) */
328 r = -__ieee754_logl (x);
329 if (ix >= 0x3ffebb4a) /* 7.31597900390625e-1 */
331 y = x - one;
332 i = 0;
334 else if (ix >= 0x3ffced33)/* 2.31639862060546875e-1 */
336 y = x - (tc - one);
337 i = 1;
339 else
341 /* x < 0.23 */
342 y = x;
343 i = 2;
346 else
348 r = zero;
349 if (ix >= 0x3fffdda6) /* 1.73162841796875 */
351 /* [1.7316,2] */
352 y = x - 2.0;
353 i = 0;
355 else if (ix >= 0x3fff9da6)/* 1.23162841796875 */
357 /* [1.23,1.73] */
358 y = x - tc;
359 i = 1;
361 else
363 /* [0.9, 1.23] */
364 y = x - one;
365 i = 2;
368 switch (i)
370 case 0:
371 p1 = a0 + y * (a1 + y * (a2 + y * (a3 + y * (a4 + y * a5))));
372 p2 = b0 + y * (b1 + y * (b2 + y * (b3 + y * (b4 + y))));
373 r += half * y + y * p1/p2;
374 break;
375 case 1:
376 p1 = g0 + y * (g1 + y * (g2 + y * (g3 + y * (g4 + y * (g5 + y * g6)))));
377 p2 = h0 + y * (h1 + y * (h2 + y * (h3 + y * (h4 + y * (h5 + y)))));
378 p = tt + y * p1/p2;
379 r += (tf + p);
380 break;
381 case 2:
382 p1 = y * (u0 + y * (u1 + y * (u2 + y * (u3 + y * (u4 + y * (u5 + y * u6))))));
383 p2 = v0 + y * (v1 + y * (v2 + y * (v3 + y * (v4 + y * (v5 + y)))));
384 r += (-half * y + p1 / p2);
387 else if (ix < 0x40028000) /* 8.0 */
389 /* x < 8.0 */
390 i = (int) x;
391 t = zero;
392 y = x - (double) i;
393 p = y *
394 (s0 + y * (s1 + y * (s2 + y * (s3 + y * (s4 + y * (s5 + y * s6))))));
395 q = r0 + y * (r1 + y * (r2 + y * (r3 + y * (r4 + y * (r5 + y * (r6 + y))))));
396 r = half * y + p / q;
397 z = one; /* lgamma(1+s) = log(s) + lgamma(s) */
398 switch (i)
400 case 7:
401 z *= (y + 6.0); /* FALLTHRU */
402 case 6:
403 z *= (y + 5.0); /* FALLTHRU */
404 case 5:
405 z *= (y + 4.0); /* FALLTHRU */
406 case 4:
407 z *= (y + 3.0); /* FALLTHRU */
408 case 3:
409 z *= (y + 2.0); /* FALLTHRU */
410 r += __ieee754_logl (z);
411 break;
414 else if (ix < 0x40418000) /* 2^66 */
416 /* 8.0 <= x < 2**66 */
417 t = __ieee754_logl (x);
418 z = one / x;
419 y = z * z;
420 w = w0 + z * (w1
421 + y * (w2 + y * (w3 + y * (w4 + y * (w5 + y * (w6 + y * w7))))));
422 r = (x - half) * (t - one) + w;
424 else
425 /* 2**66 <= x <= inf */
426 r = x * (__ieee754_logl (x) - one);
427 /* NADJ is set for negative arguments but not otherwise, resulting
428 in warnings that it may be used uninitialized although in the
429 cases where it is used it has always been set. */
430 DIAG_PUSH_NEEDS_COMMENT;
431 #if __GNUC_PREREQ (4, 7)
432 DIAG_IGNORE_NEEDS_COMMENT (4.9, "-Wmaybe-uninitialized");
433 #else
434 DIAG_IGNORE_NEEDS_COMMENT (4.9, "-Wuninitialized");
435 #endif
436 if (se & 0x8000)
437 r = nadj - r;
438 DIAG_POP_NEEDS_COMMENT;
439 return r;
441 strong_alias (__ieee754_lgammal_r, __lgammal_r_finite)