Fix atan / atan2 missing underflows (bug 15319).
[glibc.git] / sysdeps / ieee754 / dbl-64 / s_atan.c
blob7b598f14a9f6b6c6357116c5d1c6e4ded8e37b85
1 /*
2 * IBM Accurate Mathematical Library
3 * written by International Business Machines Corp.
4 * Copyright (C) 2001-2015 Free Software Foundation, Inc.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU Lesser General Public License as published by
8 * the Free Software Foundation; either version 2.1 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public License
17 * along with this program; if not, see <http://www.gnu.org/licenses/>.
19 /************************************************************************/
20 /* MODULE_NAME: atnat.c */
21 /* */
22 /* FUNCTIONS: uatan */
23 /* atanMp */
24 /* signArctan */
25 /* */
26 /* */
27 /* FILES NEEDED: dla.h endian.h mpa.h mydefs.h atnat.h */
28 /* mpatan.c mpatan2.c mpsqrt.c */
29 /* uatan.tbl */
30 /* */
31 /* An ultimate atan() routine. Given an IEEE double machine number x */
32 /* it computes the correctly rounded (to nearest) value of atan(x). */
33 /* */
34 /* Assumption: Machine arithmetic operations are performed in */
35 /* round to nearest mode of IEEE 754 standard. */
36 /* */
37 /************************************************************************/
39 #include <dla.h>
40 #include "mpa.h"
41 #include "MathLib.h"
42 #include "uatan.tbl"
43 #include "atnat.h"
44 #include <float.h>
45 #include <math.h>
46 #include <math_private.h>
47 #include <stap-probe.h>
49 void __mpatan (mp_no *, mp_no *, int); /* see definition in mpatan.c */
50 static double atanMp (double, const int[]);
52 /* Fix the sign of y and return */
53 static double
54 __signArctan (double x, double y)
56 return __copysign (y, x);
60 /* An ultimate atan() routine. Given an IEEE double machine number x, */
61 /* routine computes the correctly rounded (to nearest) value of atan(x). */
62 double
63 atan (double x)
65 double cor, s1, ss1, s2, ss2, t1, t2, t3, t7, t8, t9, t10, u, u2, u3,
66 v, vv, w, ww, y, yy, z, zz;
67 #ifndef DLA_FMS
68 double t4, t5, t6;
69 #endif
70 int i, ux, dx;
71 static const int pr[M] = { 6, 8, 10, 32 };
72 number num;
74 num.d = x;
75 ux = num.i[HIGH_HALF];
76 dx = num.i[LOW_HALF];
78 /* x=NaN */
79 if (((ux & 0x7ff00000) == 0x7ff00000)
80 && (((ux & 0x000fffff) | dx) != 0x00000000))
81 return x + x;
83 /* Regular values of x, including denormals +-0 and +-INF */
84 u = (x < 0) ? -x : x;
85 if (u < C)
87 if (u < B)
89 if (u < A)
91 if (u < DBL_MIN)
93 double force_underflow = x * x;
94 math_force_eval (force_underflow);
96 return x;
98 else
99 { /* A <= u < B */
100 v = x * x;
101 yy = d11.d + v * d13.d;
102 yy = d9.d + v * yy;
103 yy = d7.d + v * yy;
104 yy = d5.d + v * yy;
105 yy = d3.d + v * yy;
106 yy *= x * v;
108 if ((y = x + (yy - U1 * x)) == x + (yy + U1 * x))
109 return y;
111 EMULV (x, x, v, vv, t1, t2, t3, t4, t5); /* v+vv=x^2 */
113 s1 = f17.d + v * f19.d;
114 s1 = f15.d + v * s1;
115 s1 = f13.d + v * s1;
116 s1 = f11.d + v * s1;
117 s1 *= v;
119 ADD2 (f9.d, ff9.d, s1, 0, s2, ss2, t1, t2);
120 MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
121 ADD2 (f7.d, ff7.d, s1, ss1, s2, ss2, t1, t2);
122 MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
123 ADD2 (f5.d, ff5.d, s1, ss1, s2, ss2, t1, t2);
124 MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
125 ADD2 (f3.d, ff3.d, s1, ss1, s2, ss2, t1, t2);
126 MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
127 MUL2 (x, 0, s1, ss1, s2, ss2, t1, t2, t3, t4, t5, t6, t7,
128 t8);
129 ADD2 (x, 0, s2, ss2, s1, ss1, t1, t2);
130 if ((y = s1 + (ss1 - U5 * s1)) == s1 + (ss1 + U5 * s1))
131 return y;
133 return atanMp (x, pr);
136 else
137 { /* B <= u < C */
138 i = (TWO52 + TWO8 * u) - TWO52;
139 i -= 16;
140 z = u - cij[i][0].d;
141 yy = cij[i][5].d + z * cij[i][6].d;
142 yy = cij[i][4].d + z * yy;
143 yy = cij[i][3].d + z * yy;
144 yy = cij[i][2].d + z * yy;
145 yy *= z;
147 t1 = cij[i][1].d;
148 if (i < 112)
150 if (i < 48)
151 u2 = U21; /* u < 1/4 */
152 else
153 u2 = U22;
154 } /* 1/4 <= u < 1/2 */
155 else
157 if (i < 176)
158 u2 = U23; /* 1/2 <= u < 3/4 */
159 else
160 u2 = U24;
161 } /* 3/4 <= u <= 1 */
162 if ((y = t1 + (yy - u2 * t1)) == t1 + (yy + u2 * t1))
163 return __signArctan (x, y);
165 z = u - hij[i][0].d;
167 s1 = hij[i][14].d + z * hij[i][15].d;
168 s1 = hij[i][13].d + z * s1;
169 s1 = hij[i][12].d + z * s1;
170 s1 = hij[i][11].d + z * s1;
171 s1 *= z;
173 ADD2 (hij[i][9].d, hij[i][10].d, s1, 0, s2, ss2, t1, t2);
174 MUL2 (z, 0, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
175 ADD2 (hij[i][7].d, hij[i][8].d, s1, ss1, s2, ss2, t1, t2);
176 MUL2 (z, 0, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
177 ADD2 (hij[i][5].d, hij[i][6].d, s1, ss1, s2, ss2, t1, t2);
178 MUL2 (z, 0, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
179 ADD2 (hij[i][3].d, hij[i][4].d, s1, ss1, s2, ss2, t1, t2);
180 MUL2 (z, 0, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
181 ADD2 (hij[i][1].d, hij[i][2].d, s1, ss1, s2, ss2, t1, t2);
182 if ((y = s2 + (ss2 - U6 * s2)) == s2 + (ss2 + U6 * s2))
183 return __signArctan (x, y);
185 return atanMp (x, pr);
188 else
190 if (u < D)
191 { /* C <= u < D */
192 w = 1 / u;
193 EMULV (w, u, t1, t2, t3, t4, t5, t6, t7);
194 ww = w * ((1 - t1) - t2);
195 i = (TWO52 + TWO8 * w) - TWO52;
196 i -= 16;
197 z = (w - cij[i][0].d) + ww;
199 yy = cij[i][5].d + z * cij[i][6].d;
200 yy = cij[i][4].d + z * yy;
201 yy = cij[i][3].d + z * yy;
202 yy = cij[i][2].d + z * yy;
203 yy = HPI1 - z * yy;
205 t1 = HPI - cij[i][1].d;
206 if (i < 112)
207 u3 = U31; /* w < 1/2 */
208 else
209 u3 = U32; /* w >= 1/2 */
210 if ((y = t1 + (yy - u3)) == t1 + (yy + u3))
211 return __signArctan (x, y);
213 DIV2 (1, 0, u, 0, w, ww, t1, t2, t3, t4, t5, t6, t7, t8, t9,
214 t10);
215 t1 = w - hij[i][0].d;
216 EADD (t1, ww, z, zz);
218 s1 = hij[i][14].d + z * hij[i][15].d;
219 s1 = hij[i][13].d + z * s1;
220 s1 = hij[i][12].d + z * s1;
221 s1 = hij[i][11].d + z * s1;
222 s1 *= z;
224 ADD2 (hij[i][9].d, hij[i][10].d, s1, 0, s2, ss2, t1, t2);
225 MUL2 (z, zz, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
226 ADD2 (hij[i][7].d, hij[i][8].d, s1, ss1, s2, ss2, t1, t2);
227 MUL2 (z, zz, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
228 ADD2 (hij[i][5].d, hij[i][6].d, s1, ss1, s2, ss2, t1, t2);
229 MUL2 (z, zz, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
230 ADD2 (hij[i][3].d, hij[i][4].d, s1, ss1, s2, ss2, t1, t2);
231 MUL2 (z, zz, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
232 ADD2 (hij[i][1].d, hij[i][2].d, s1, ss1, s2, ss2, t1, t2);
233 SUB2 (HPI, HPI1, s2, ss2, s1, ss1, t1, t2);
234 if ((y = s1 + (ss1 - U7)) == s1 + (ss1 + U7))
235 return __signArctan (x, y);
237 return atanMp (x, pr);
239 else
241 if (u < E)
242 { /* D <= u < E */
243 w = 1 / u;
244 v = w * w;
245 EMULV (w, u, t1, t2, t3, t4, t5, t6, t7);
247 yy = d11.d + v * d13.d;
248 yy = d9.d + v * yy;
249 yy = d7.d + v * yy;
250 yy = d5.d + v * yy;
251 yy = d3.d + v * yy;
252 yy *= w * v;
254 ww = w * ((1 - t1) - t2);
255 ESUB (HPI, w, t3, cor);
256 yy = ((HPI1 + cor) - ww) - yy;
257 if ((y = t3 + (yy - U4)) == t3 + (yy + U4))
258 return __signArctan (x, y);
260 DIV2 (1, 0, u, 0, w, ww, t1, t2, t3, t4, t5, t6, t7, t8,
261 t9, t10);
262 MUL2 (w, ww, w, ww, v, vv, t1, t2, t3, t4, t5, t6, t7, t8);
264 s1 = f17.d + v * f19.d;
265 s1 = f15.d + v * s1;
266 s1 = f13.d + v * s1;
267 s1 = f11.d + v * s1;
268 s1 *= v;
270 ADD2 (f9.d, ff9.d, s1, 0, s2, ss2, t1, t2);
271 MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
272 ADD2 (f7.d, ff7.d, s1, ss1, s2, ss2, t1, t2);
273 MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
274 ADD2 (f5.d, ff5.d, s1, ss1, s2, ss2, t1, t2);
275 MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
276 ADD2 (f3.d, ff3.d, s1, ss1, s2, ss2, t1, t2);
277 MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
278 MUL2 (w, ww, s1, ss1, s2, ss2, t1, t2, t3, t4, t5, t6, t7, t8);
279 ADD2 (w, ww, s2, ss2, s1, ss1, t1, t2);
280 SUB2 (HPI, HPI1, s1, ss1, s2, ss2, t1, t2);
282 if ((y = s2 + (ss2 - U8)) == s2 + (ss2 + U8))
283 return __signArctan (x, y);
285 return atanMp (x, pr);
287 else
289 /* u >= E */
290 if (x > 0)
291 return HPI;
292 else
293 return MHPI;
299 /* Final stages. Compute atan(x) by multiple precision arithmetic */
300 static double
301 atanMp (double x, const int pr[])
303 mp_no mpx, mpy, mpy2, mperr, mpt1, mpy1;
304 double y1, y2;
305 int i, p;
307 for (i = 0; i < M; i++)
309 p = pr[i];
310 __dbl_mp (x, &mpx, p);
311 __mpatan (&mpx, &mpy, p);
312 __dbl_mp (u9[i].d, &mpt1, p);
313 __mul (&mpy, &mpt1, &mperr, p);
314 __add (&mpy, &mperr, &mpy1, p);
315 __sub (&mpy, &mperr, &mpy2, p);
316 __mp_dbl (&mpy1, &y1, p);
317 __mp_dbl (&mpy2, &y2, p);
318 if (y1 == y2)
320 LIBC_PROBE (slowatan, 3, &p, &x, &y1);
321 return y1;
324 LIBC_PROBE (slowatan_inexact, 3, &p, &x, &y1);
325 return y1; /*if impossible to do exact computing */
328 #ifdef NO_LONG_DOUBLE
329 weak_alias (atan, atanl)
330 #endif