More strict check of AVX512 support in assembler.
[glibc.git] / sysdeps / ieee754 / ldbl-96 / e_j1l.c
blob1bd54995a88863ff2a0d765f1930c990a4df7d2f
1 /*
2 * ====================================================
3 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
5 * Developed at SunPro, a Sun Microsystems, Inc. business.
6 * Permission to use, copy, modify, and distribute this
7 * software is freely granted, provided that this notice
8 * is preserved.
9 * ====================================================
12 /* Long double expansions are
13 Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov>
14 and are incorporated herein by permission of the author. The author
15 reserves the right to distribute this material elsewhere under different
16 copying permissions. These modifications are distributed here under
17 the following terms:
19 This library is free software; you can redistribute it and/or
20 modify it under the terms of the GNU Lesser General Public
21 License as published by the Free Software Foundation; either
22 version 2.1 of the License, or (at your option) any later version.
24 This library is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27 Lesser General Public License for more details.
29 You should have received a copy of the GNU Lesser General Public
30 License along with this library; if not, see
31 <http://www.gnu.org/licenses/>. */
33 /* __ieee754_j1(x), __ieee754_y1(x)
34 * Bessel function of the first and second kinds of order zero.
35 * Method -- j1(x):
36 * 1. For tiny x, we use j1(x) = x/2 - x^3/16 + x^5/384 - ...
37 * 2. Reduce x to |x| since j1(x)=-j1(-x), and
38 * for x in (0,2)
39 * j1(x) = x/2 + x*z*R0/S0, where z = x*x;
40 * for x in (2,inf)
41 * j1(x) = sqrt(2/(pi*x))*(p1(x)*cos(x1)-q1(x)*sin(x1))
42 * y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x1)+q1(x)*cos(x1))
43 * where x1 = x-3*pi/4. It is better to compute sin(x1),cos(x1)
44 * as follow:
45 * cos(x1) = cos(x)cos(3pi/4)+sin(x)sin(3pi/4)
46 * = 1/sqrt(2) * (sin(x) - cos(x))
47 * sin(x1) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4)
48 * = -1/sqrt(2) * (sin(x) + cos(x))
49 * (To avoid cancellation, use
50 * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
51 * to compute the worse one.)
53 * 3 Special cases
54 * j1(nan)= nan
55 * j1(0) = 0
56 * j1(inf) = 0
58 * Method -- y1(x):
59 * 1. screen out x<=0 cases: y1(0)=-inf, y1(x<0)=NaN
60 * 2. For x<2.
61 * Since
62 * y1(x) = 2/pi*(j1(x)*(ln(x/2)+Euler)-1/x-x/2+5/64*x^3-...)
63 * therefore y1(x)-2/pi*j1(x)*ln(x)-1/x is an odd function.
64 * We use the following function to approximate y1,
65 * y1(x) = x*U(z)/V(z) + (2/pi)*(j1(x)*ln(x)-1/x), z= x^2
66 * Note: For tiny x, 1/x dominate y1 and hence
67 * y1(tiny) = -2/pi/tiny
68 * 3. For x>=2.
69 * y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x1)+q1(x)*cos(x1))
70 * where x1 = x-3*pi/4. It is better to compute sin(x1),cos(x1)
71 * by method mentioned above.
74 #include <errno.h>
75 #include <math.h>
76 #include <math_private.h>
78 static long double pone (long double), qone (long double);
80 static const long double
81 huge = 1e4930L,
82 one = 1.0L,
83 invsqrtpi = 5.6418958354775628694807945156077258584405e-1L,
84 tpi = 6.3661977236758134307553505349005744813784e-1L,
86 /* J1(x) = .5 x + x x^2 R(x^2) / S(x^2)
87 0 <= x <= 2
88 Peak relative error 4.5e-21 */
89 R[5] = {
90 -9.647406112428107954753770469290757756814E7L,
91 2.686288565865230690166454005558203955564E6L,
92 -3.689682683905671185891885948692283776081E4L,
93 2.195031194229176602851429567792676658146E2L,
94 -5.124499848728030297902028238597308971319E-1L,
97 S[4] =
99 1.543584977988497274437410333029029035089E9L,
100 2.133542369567701244002565983150952549520E7L,
101 1.394077011298227346483732156167414670520E5L,
102 5.252401789085732428842871556112108446506E2L,
103 /* 1.000000000000000000000000000000000000000E0L, */
106 static const long double zero = 0.0;
109 long double
110 __ieee754_j1l (long double x)
112 long double z, c, r, s, ss, cc, u, v, y;
113 int32_t ix;
114 u_int32_t se;
116 GET_LDOUBLE_EXP (se, x);
117 ix = se & 0x7fff;
118 if (__glibc_unlikely (ix >= 0x7fff))
119 return one / x;
120 y = fabsl (x);
121 if (ix >= 0x4000)
122 { /* |x| >= 2.0 */
123 __sincosl (y, &s, &c);
124 ss = -s - c;
125 cc = s - c;
126 if (ix < 0x7ffe)
127 { /* make sure y+y not overflow */
128 z = __cosl (y + y);
129 if ((s * c) > zero)
130 cc = z / ss;
131 else
132 ss = z / cc;
135 * j1(x) = 1/sqrt(pi) * (P(1,x)*cc - Q(1,x)*ss) / sqrt(x)
136 * y1(x) = 1/sqrt(pi) * (P(1,x)*ss + Q(1,x)*cc) / sqrt(x)
138 if (__glibc_unlikely (ix > 0x4080))
139 z = (invsqrtpi * cc) / __ieee754_sqrtl (y);
140 else
142 u = pone (y);
143 v = qone (y);
144 z = invsqrtpi * (u * cc - v * ss) / __ieee754_sqrtl (y);
146 if (se & 0x8000)
147 return -z;
148 else
149 return z;
151 if (__glibc_unlikely (ix < 0x3fde)) /* |x| < 2^-33 */
153 if (huge + x > one)
154 return 0.5 * x; /* inexact if x!=0 necessary */
156 z = x * x;
157 r = z * (R[0] + z * (R[1]+ z * (R[2] + z * (R[3] + z * R[4]))));
158 s = S[0] + z * (S[1] + z * (S[2] + z * (S[3] + z)));
159 r *= x;
160 return (x * 0.5 + r / s);
162 strong_alias (__ieee754_j1l, __j1l_finite)
165 /* Y1(x) = 2/pi * (log(x) * j1(x) - 1/x) + x R(x^2)
166 0 <= x <= 2
167 Peak relative error 2.3e-23 */
168 static const long double U0[6] = {
169 -5.908077186259914699178903164682444848615E10L,
170 1.546219327181478013495975514375773435962E10L,
171 -6.438303331169223128870035584107053228235E8L,
172 9.708540045657182600665968063824819371216E6L,
173 -6.138043997084355564619377183564196265471E4L,
174 1.418503228220927321096904291501161800215E2L,
176 static const long double V0[5] = {
177 3.013447341682896694781964795373783679861E11L,
178 4.669546565705981649470005402243136124523E9L,
179 3.595056091631351184676890179233695857260E7L,
180 1.761554028569108722903944659933744317994E5L,
181 5.668480419646516568875555062047234534863E2L,
182 /* 1.000000000000000000000000000000000000000E0L, */
186 long double
187 __ieee754_y1l (long double x)
189 long double z, s, c, ss, cc, u, v;
190 int32_t ix;
191 u_int32_t se, i0, i1;
193 GET_LDOUBLE_WORDS (se, i0, i1, x);
194 ix = se & 0x7fff;
195 /* if Y1(NaN) is NaN, Y1(-inf) is NaN, Y1(inf) is 0 */
196 if (__glibc_unlikely (se & 0x8000))
197 return zero / (zero * x);
198 if (__glibc_unlikely (ix >= 0x7fff))
199 return one / (x + x * x);
200 if (__glibc_unlikely ((i0 | i1) == 0))
201 return -HUGE_VALL + x; /* -inf and overflow exception. */
202 if (ix >= 0x4000)
203 { /* |x| >= 2.0 */
204 __sincosl (x, &s, &c);
205 ss = -s - c;
206 cc = s - c;
207 if (ix < 0x7ffe)
208 { /* make sure x+x not overflow */
209 z = __cosl (x + x);
210 if ((s * c) > zero)
211 cc = z / ss;
212 else
213 ss = z / cc;
215 /* y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x0)+q1(x)*cos(x0))
216 * where x0 = x-3pi/4
217 * Better formula:
218 * cos(x0) = cos(x)cos(3pi/4)+sin(x)sin(3pi/4)
219 * = 1/sqrt(2) * (sin(x) - cos(x))
220 * sin(x0) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4)
221 * = -1/sqrt(2) * (cos(x) + sin(x))
222 * To avoid cancellation, use
223 * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
224 * to compute the worse one.
226 if (__glibc_unlikely (ix > 0x4080))
227 z = (invsqrtpi * ss) / __ieee754_sqrtl (x);
228 else
230 u = pone (x);
231 v = qone (x);
232 z = invsqrtpi * (u * ss + v * cc) / __ieee754_sqrtl (x);
234 return z;
236 if (__glibc_unlikely (ix <= 0x3fbe))
237 { /* x < 2**-65 */
238 z = -tpi / x;
239 if (isinf (z))
240 __set_errno (ERANGE);
241 return z;
243 z = x * x;
244 u = U0[0] + z * (U0[1] + z * (U0[2] + z * (U0[3] + z * (U0[4] + z * U0[5]))));
245 v = V0[0] + z * (V0[1] + z * (V0[2] + z * (V0[3] + z * (V0[4] + z))));
246 return (x * (u / v) +
247 tpi * (__ieee754_j1l (x) * __ieee754_logl (x) - one / x));
249 strong_alias (__ieee754_y1l, __y1l_finite)
252 /* For x >= 8, the asymptotic expansions of pone is
253 * 1 + 15/128 s^2 - 4725/2^15 s^4 - ..., where s = 1/x.
254 * We approximate pone by
255 * pone(x) = 1 + (R/S)
258 /* J1(x) cosX + Y1(x) sinX = sqrt( 2/(pi x)) P1(x)
259 P1(x) = 1 + z^2 R(z^2), z=1/x
260 8 <= x <= inf (0 <= z <= 0.125)
261 Peak relative error 5.2e-22 */
263 static const long double pr8[7] = {
264 8.402048819032978959298664869941375143163E-9L,
265 1.813743245316438056192649247507255996036E-6L,
266 1.260704554112906152344932388588243836276E-4L,
267 3.439294839869103014614229832700986965110E-3L,
268 3.576910849712074184504430254290179501209E-2L,
269 1.131111483254318243139953003461511308672E-1L,
270 4.480715825681029711521286449131671880953E-2L,
272 static const long double ps8[6] = {
273 7.169748325574809484893888315707824924354E-8L,
274 1.556549720596672576431813934184403614817E-5L,
275 1.094540125521337139209062035774174565882E-3L,
276 3.060978962596642798560894375281428805840E-2L,
277 3.374146536087205506032643098619414507024E-1L,
278 1.253830208588979001991901126393231302559E0L,
279 /* 1.000000000000000000000000000000000000000E0L, */
282 /* J1(x) cosX + Y1(x) sinX = sqrt( 2/(pi x)) P1(x)
283 P1(x) = 1 + z^2 R(z^2), z=1/x
284 4.54541015625 <= x <= 8
285 Peak relative error 7.7e-22 */
286 static const long double pr5[7] = {
287 4.318486887948814529950980396300969247900E-7L,
288 4.715341880798817230333360497524173929315E-5L,
289 1.642719430496086618401091544113220340094E-3L,
290 2.228688005300803935928733750456396149104E-2L,
291 1.142773760804150921573259605730018327162E-1L,
292 1.755576530055079253910829652698703791957E-1L,
293 3.218803858282095929559165965353784980613E-2L,
295 static const long double ps5[6] = {
296 3.685108812227721334719884358034713967557E-6L,
297 4.069102509511177498808856515005792027639E-4L,
298 1.449728676496155025507893322405597039816E-2L,
299 2.058869213229520086582695850441194363103E-1L,
300 1.164890985918737148968424972072751066553E0L,
301 2.274776933457009446573027260373361586841E0L,
302 /* 1.000000000000000000000000000000000000000E0L,*/
305 /* J1(x) cosX + Y1(x) sinX = sqrt( 2/(pi x)) P1(x)
306 P1(x) = 1 + z^2 R(z^2), z=1/x
307 2.85711669921875 <= x <= 4.54541015625
308 Peak relative error 6.5e-21 */
309 static const long double pr3[7] = {
310 1.265251153957366716825382654273326407972E-5L,
311 8.031057269201324914127680782288352574567E-4L,
312 1.581648121115028333661412169396282881035E-2L,
313 1.179534658087796321928362981518645033967E-1L,
314 3.227936912780465219246440724502790727866E-1L,
315 2.559223765418386621748404398017602935764E-1L,
316 2.277136933287817911091370397134882441046E-2L,
318 static const long double ps3[6] = {
319 1.079681071833391818661952793568345057548E-4L,
320 6.986017817100477138417481463810841529026E-3L,
321 1.429403701146942509913198539100230540503E-1L,
322 1.148392024337075609460312658938700765074E0L,
323 3.643663015091248720208251490291968840882E0L,
324 3.990702269032018282145100741746633960737E0L,
325 /* 1.000000000000000000000000000000000000000E0L, */
328 /* J1(x) cosX + Y1(x) sinX = sqrt( 2/(pi x)) P1(x)
329 P1(x) = 1 + z^2 R(z^2), z=1/x
330 2 <= x <= 2.85711669921875
331 Peak relative error 3.5e-21 */
332 static const long double pr2[7] = {
333 2.795623248568412225239401141338714516445E-4L,
334 1.092578168441856711925254839815430061135E-2L,
335 1.278024620468953761154963591853679640560E-1L,
336 5.469680473691500673112904286228351988583E-1L,
337 8.313769490922351300461498619045639016059E-1L,
338 3.544176317308370086415403567097130611468E-1L,
339 1.604142674802373041247957048801599740644E-2L,
341 static const long double ps2[6] = {
342 2.385605161555183386205027000675875235980E-3L,
343 9.616778294482695283928617708206967248579E-2L,
344 1.195215570959693572089824415393951258510E0L,
345 5.718412857897054829999458736064922974662E0L,
346 1.065626298505499086386584642761602177568E1L,
347 6.809140730053382188468983548092322151791E0L,
348 /* 1.000000000000000000000000000000000000000E0L, */
352 static long double
353 pone (long double x)
355 const long double *p, *q;
356 long double z, r, s;
357 int32_t ix;
358 u_int32_t se, i0, i1;
360 GET_LDOUBLE_WORDS (se, i0, i1, x);
361 ix = se & 0x7fff;
362 /* ix >= 0x4000 for all calls to this function. */
363 if (ix >= 0x4002) /* x >= 8 */
365 p = pr8;
366 q = ps8;
368 else
370 i1 = (ix << 16) | (i0 >> 16);
371 if (i1 >= 0x40019174) /* x >= 4.54541015625 */
373 p = pr5;
374 q = ps5;
376 else if (i1 >= 0x4000b6db) /* x >= 2.85711669921875 */
378 p = pr3;
379 q = ps3;
381 else /* x >= 2 */
383 p = pr2;
384 q = ps2;
387 z = one / (x * x);
388 r = p[0] + z * (p[1] +
389 z * (p[2] + z * (p[3] + z * (p[4] + z * (p[5] + z * p[6])))));
390 s = q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * (q[4] + z * (q[5] + z)))));
391 return one + z * r / s;
395 /* For x >= 8, the asymptotic expansions of qone is
396 * 3/8 s - 105/1024 s^3 - ..., where s = 1/x.
397 * We approximate pone by
398 * qone(x) = s*(0.375 + (R/S))
401 /* Y1(x)cosX - J1(x)sinX = sqrt( 2/(pi x)) Q1(x),
402 Q1(x) = z(.375 + z^2 R(z^2)), z=1/x
403 8 <= x <= inf
404 Peak relative error 8.3e-22 */
406 static const long double qr8[7] = {
407 -5.691925079044209246015366919809404457380E-10L,
408 -1.632587664706999307871963065396218379137E-7L,
409 -1.577424682764651970003637263552027114600E-5L,
410 -6.377627959241053914770158336842725291713E-4L,
411 -1.087408516779972735197277149494929568768E-2L,
412 -6.854943629378084419631926076882330494217E-2L,
413 -1.055448290469180032312893377152490183203E-1L,
415 static const long double qs8[7] = {
416 5.550982172325019811119223916998393907513E-9L,
417 1.607188366646736068460131091130644192244E-6L,
418 1.580792530091386496626494138334505893599E-4L,
419 6.617859900815747303032860443855006056595E-3L,
420 1.212840547336984859952597488863037659161E-1L,
421 9.017885953937234900458186716154005541075E-1L,
422 2.201114489712243262000939120146436167178E0L,
423 /* 1.000000000000000000000000000000000000000E0L, */
426 /* Y1(x)cosX - J1(x)sinX = sqrt( 2/(pi x)) Q1(x),
427 Q1(x) = z(.375 + z^2 R(z^2)), z=1/x
428 4.54541015625 <= x <= 8
429 Peak relative error 4.1e-22 */
430 static const long double qr5[7] = {
431 -6.719134139179190546324213696633564965983E-8L,
432 -9.467871458774950479909851595678622044140E-6L,
433 -4.429341875348286176950914275723051452838E-4L,
434 -8.539898021757342531563866270278505014487E-3L,
435 -6.818691805848737010422337101409276287170E-2L,
436 -1.964432669771684034858848142418228214855E-1L,
437 -1.333896496989238600119596538299938520726E-1L,
439 static const long double qs5[7] = {
440 6.552755584474634766937589285426911075101E-7L,
441 9.410814032118155978663509073200494000589E-5L,
442 4.561677087286518359461609153655021253238E-3L,
443 9.397742096177905170800336715661091535805E-2L,
444 8.518538116671013902180962914473967738771E-1L,
445 3.177729183645800174212539541058292579009E0L,
446 4.006745668510308096259753538973038902990E0L,
447 /* 1.000000000000000000000000000000000000000E0L, */
450 /* Y1(x)cosX - J1(x)sinX = sqrt( 2/(pi x)) Q1(x),
451 Q1(x) = z(.375 + z^2 R(z^2)), z=1/x
452 2.85711669921875 <= x <= 4.54541015625
453 Peak relative error 2.2e-21 */
454 static const long double qr3[7] = {
455 -3.618746299358445926506719188614570588404E-6L,
456 -2.951146018465419674063882650970344502798E-4L,
457 -7.728518171262562194043409753656506795258E-3L,
458 -8.058010968753999435006488158237984014883E-2L,
459 -3.356232856677966691703904770937143483472E-1L,
460 -4.858192581793118040782557808823460276452E-1L,
461 -1.592399251246473643510898335746432479373E-1L,
463 static const long double qs3[7] = {
464 3.529139957987837084554591421329876744262E-5L,
465 2.973602667215766676998703687065066180115E-3L,
466 8.273534546240864308494062287908662592100E-2L,
467 9.613359842126507198241321110649974032726E-1L,
468 4.853923697093974370118387947065402707519E0L,
469 1.002671608961669247462020977417828796933E1L,
470 7.028927383922483728931327850683151410267E0L,
471 /* 1.000000000000000000000000000000000000000E0L, */
474 /* Y1(x)cosX - J1(x)sinX = sqrt( 2/(pi x)) Q1(x),
475 Q1(x) = z(.375 + z^2 R(z^2)), z=1/x
476 2 <= x <= 2.85711669921875
477 Peak relative error 6.9e-22 */
478 static const long double qr2[7] = {
479 -1.372751603025230017220666013816502528318E-4L,
480 -6.879190253347766576229143006767218972834E-3L,
481 -1.061253572090925414598304855316280077828E-1L,
482 -6.262164224345471241219408329354943337214E-1L,
483 -1.423149636514768476376254324731437473915E0L,
484 -1.087955310491078933531734062917489870754E0L,
485 -1.826821119773182847861406108689273719137E-1L,
487 static const long double qs2[7] = {
488 1.338768933634451601814048220627185324007E-3L,
489 7.071099998918497559736318523932241901810E-2L,
490 1.200511429784048632105295629933382142221E0L,
491 8.327301713640367079030141077172031825276E0L,
492 2.468479301872299311658145549931764426840E1L,
493 2.961179686096262083509383820557051621644E1L,
494 1.201402313144305153005639494661767354977E1L,
495 /* 1.000000000000000000000000000000000000000E0L, */
499 static long double
500 qone (long double x)
502 const long double *p, *q;
503 static long double s, r, z;
504 int32_t ix;
505 u_int32_t se, i0, i1;
507 GET_LDOUBLE_WORDS (se, i0, i1, x);
508 ix = se & 0x7fff;
509 /* ix >= 0x4000 for all calls to this function. */
510 if (ix >= 0x4002) /* x >= 8 */
512 p = qr8;
513 q = qs8;
515 else
517 i1 = (ix << 16) | (i0 >> 16);
518 if (i1 >= 0x40019174) /* x >= 4.54541015625 */
520 p = qr5;
521 q = qs5;
523 else if (i1 >= 0x4000b6db) /* x >= 2.85711669921875 */
525 p = qr3;
526 q = qs3;
528 else /* x >= 2 */
530 p = qr2;
531 q = qs2;
534 z = one / (x * x);
536 p[0] + z * (p[1] +
537 z * (p[2] + z * (p[3] + z * (p[4] + z * (p[5] + z * p[6])))));
539 q[0] + z * (q[1] +
540 z * (q[2] +
541 z * (q[3] + z * (q[4] + z * (q[5] + z * (q[6] + z))))));
542 return (.375 + z * r / s) / x;