kernel - namecache - fix deadlock
[dragonfly.git] / contrib / mpfr / atan.c
blob17b65aa1c1d90b8aa632c65f37d049cb44549995
1 /* mpfr_atan -- arc-tangent of a floating-point number
3 Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
4 Contributed by the Arenaire and Cacao projects, INRIA.
6 This file is part of the GNU MPFR Library, and was contributed by Mathieu Dutour.
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or (at your
11 option) any later version.
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LIB. If not, write to
20 the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
21 MA 02110-1301, USA. */
23 #define MPFR_NEED_LONGLONG_H
24 #include "mpfr-impl.h"
26 /* If x = p/2^r, put in y an approximation of atan(x)/x using 2^m terms
27 for the series expansion, with an error of at most 1 ulp.
28 Assumes |x| < 1.
30 If X=x^2, we want 1 - X/3 + X^2/5 - ... + (-1)^k*X^k/(2k+1) + ...
32 static void
33 mpfr_atan_aux (mpfr_ptr y, mpz_ptr p, long r, int m, mpz_t *tab)
35 mpz_t *S, *Q, *ptoj;
36 unsigned long n, i, k, j, l;
37 mp_exp_t diff, expo;
38 int im, done;
39 mp_prec_t mult, *accu, *log2_nb_terms;
40 mp_prec_t precy = MPFR_PREC(y);
42 if (mpz_cmp_ui (p, 0) == 0)
44 mpfr_set_ui (y, 1, GMP_RNDN); /* limit(atan(x)/x, x=0) */
45 return;
48 accu = (mp_prec_t*) (*__gmp_allocate_func) ((2 * m + 2) * sizeof (mp_prec_t));
49 log2_nb_terms = accu + m + 1;
51 /* Set Tables */
52 S = tab; /* S */
53 ptoj = S + 1*(m+1); /* p^2^j Precomputed table */
54 Q = S + 2*(m+1); /* Product of Odd integer table */
56 /* From p to p^2, and r to 2r */
57 mpz_mul (p, p, p);
58 MPFR_ASSERTD (2 * r > r);
59 r = 2 * r;
61 /* Normalize p */
62 n = mpz_scan1 (p, 0);
63 mpz_tdiv_q_2exp (p, p, n); /* exact */
64 MPFR_ASSERTD (r > n);
65 r -= n;
66 /* since |p/2^r| < 1, and p is a non-zero integer, necessarily r > 0 */
68 MPFR_ASSERTD (mpz_sgn (p) > 0);
69 MPFR_ASSERTD (m > 0);
71 /* check if p=1 (special case) */
72 l = 0;
74 We compute by binary splitting, with X = x^2 = p/2^r:
75 P(a,b) = p if a+1=b, P(a,c)*P(c,b) otherwise
76 Q(a,b) = (2a+1)*2^r if a+1=b [except Q(0,1)=1], Q(a,c)*Q(c,b) otherwise
77 S(a,b) = p*(2a+1) if a+1=b, Q(c,b)*S(a,c)+Q(a,c)*P(a,c)*S(c,b) otherwise
78 Then atan(x)/x ~ S(0,i)/Q(0,i) for i so that (p/2^r)^i/i is small enough.
79 The factor 2^(r*(b-a)) in Q(a,b) is implicit, thus we have to take it
80 into account when we compute with Q.
82 accu[0] = 0; /* accu[k] = Mult[0] + ... + Mult[k], where Mult[j] is the
83 number of bits of the corresponding term S[j]/Q[j] */
84 if (mpz_cmp_ui (p, 1) != 0)
86 /* p <> 1: precompute ptoj table */
87 mpz_set (ptoj[0], p);
88 for (im = 1 ; im <= m ; im ++)
89 mpz_mul (ptoj[im], ptoj[im - 1], ptoj[im - 1]);
90 /* main loop */
91 n = 1UL << m;
92 /* the ith term being X^i/(2i+1) with X=p/2^r, we can stop when
93 p^i/2^(r*i) < 2^(-precy), i.e. r*i > precy + log2(p^i) */
94 for (i = k = done = 0; (i < n) && (done == 0); i += 2, k ++)
96 /* initialize both S[k],Q[k] and S[k+1],Q[k+1] */
97 mpz_set_ui (Q[k+1], 2 * i + 3); /* Q(i+1,i+2) */
98 mpz_mul_ui (S[k+1], p, 2 * i + 1); /* S(i+1,i+2) */
99 mpz_mul_2exp (S[k], Q[k+1], r);
100 mpz_sub (S[k], S[k], S[k+1]); /* S(i,i+2) */
101 mpz_mul_ui (Q[k], Q[k+1], 2 * i + 1); /* Q(i,i+2) */
102 log2_nb_terms[k] = 1; /* S[k]/Q[k] corresponds to 2 terms */
103 for (j = (i + 2) >> 1, l = 1; (j & 1) == 0; l ++, j >>= 1, k --)
105 /* invariant: S[k-1]/Q[k-1] and S[k]/Q[k] correspond
106 to 2^l terms each. We combine them into S[k-1]/Q[k-1] */
107 MPFR_ASSERTD (k > 0);
108 mpz_mul (S[k], S[k], Q[k-1]);
109 mpz_mul (S[k], S[k], ptoj[l]);
110 mpz_mul (S[k-1], S[k-1], Q[k]);
111 mpz_mul_2exp (S[k-1], S[k-1], r << l);
112 mpz_add (S[k-1], S[k-1], S[k]);
113 mpz_mul (Q[k-1], Q[k-1], Q[k]);
114 log2_nb_terms[k-1] = l + 1;
115 /* now S[k-1]/Q[k-1] corresponds to 2^(l+1) terms */
116 MPFR_MPZ_SIZEINBASE2(mult, ptoj[l+1]);
117 /* FIXME: precompute bits(ptoj[l+1]) outside the loop? */
118 mult = (r << (l + 1)) - mult - 1;
119 accu[k-1] = (k == 1) ? mult : accu[k-2] + mult;
120 if (accu[k-1] > precy)
121 done = 1;
125 else /* special case p=1: the ith term being X^i/(2i+1) with X=1/2^r,
126 we can stop when r*i > precy i.e. i > precy/r */
128 n = 1UL << m;
129 for (i = k = 0; (i < n) && (i <= precy / r); i += 2, k ++)
131 mpz_set_ui (Q[k + 1], 2 * i + 3);
132 mpz_mul_2exp (S[k], Q[k+1], r);
133 mpz_sub_ui (S[k], S[k], 1 + 2 * i);
134 mpz_mul_ui (Q[k], Q[k + 1], 1 + 2 * i);
135 log2_nb_terms[k] = 1; /* S[k]/Q[k] corresponds to 2 terms */
136 for (j = (i + 2) >> 1, l = 1; (j & 1) == 0; l++, j >>= 1, k --)
138 MPFR_ASSERTD (k > 0);
139 mpz_mul (S[k], S[k], Q[k-1]);
140 mpz_mul (S[k-1], S[k-1], Q[k]);
141 mpz_mul_2exp (S[k-1], S[k-1], r << l);
142 mpz_add (S[k-1], S[k-1], S[k]);
143 mpz_mul (Q[k-1], Q[k-1], Q[k]);
144 log2_nb_terms[k-1] = l + 1;
149 /* we need to combine S[0]/Q[0]...S[k-1]/Q[k-1] */
150 l = 0; /* number of terms accumulated in S[k]/Q[k] */
151 while (k > 1)
153 k --;
154 /* combine S[k-1]/Q[k-1] and S[k]/Q[k] */
155 j = log2_nb_terms[k-1];
156 mpz_mul (S[k], S[k], Q[k-1]);
157 if (mpz_cmp_ui (p, 1) != 0)
158 mpz_mul (S[k], S[k], ptoj[j]);
159 mpz_mul (S[k-1], S[k-1], Q[k]);
160 l += 1 << log2_nb_terms[k];
161 mpz_mul_2exp (S[k-1], S[k-1], r * l);
162 mpz_add (S[k-1], S[k-1], S[k]);
163 mpz_mul (Q[k-1], Q[k-1], Q[k]);
165 (*__gmp_free_func) (accu, (2 * m + 2) * sizeof (mp_prec_t));
167 MPFR_MPZ_SIZEINBASE2 (diff, S[0]);
168 diff -= 2 * precy;
169 expo = diff;
170 if (diff >= 0)
171 mpz_tdiv_q_2exp (S[0], S[0], diff);
172 else
173 mpz_mul_2exp (S[0], S[0], -diff);
175 MPFR_MPZ_SIZEINBASE2 (diff, Q[0]);
176 diff -= precy;
177 expo -= diff;
178 if (diff >= 0)
179 mpz_tdiv_q_2exp (Q[0], Q[0], diff);
180 else
181 mpz_mul_2exp (Q[0], Q[0], -diff);
183 mpz_tdiv_q (S[0], S[0], Q[0]);
184 mpfr_set_z (y, S[0], GMP_RNDD);
185 MPFR_SET_EXP (y, MPFR_EXP(y) + expo - r * (i - 1));
189 mpfr_atan (mpfr_ptr atan, mpfr_srcptr x, mp_rnd_t rnd_mode)
191 mpfr_t xp, arctgt, sk, tmp, tmp2;
192 mpz_t ukz;
193 mpz_t *tabz;
194 mp_exp_t exptol;
195 mp_prec_t prec, realprec;
196 unsigned long twopoweri;
197 int comparaison, inexact, inexact2;
198 int i, n0, oldn0;
199 MPFR_GROUP_DECL (group);
200 MPFR_SAVE_EXPO_DECL (expo);
201 MPFR_ZIV_DECL (loop);
203 MPFR_LOG_FUNC (("x[%#R]=%R rnd=%d", x, x, rnd_mode),
204 ("atan[%#R]=%R inexact=%d", atan, atan, inexact));
206 /* Singular cases */
207 if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
209 if (MPFR_IS_NAN (x))
211 MPFR_SET_NAN (atan);
212 MPFR_RET_NAN;
214 else if (MPFR_IS_INF (x))
216 if (MPFR_IS_POS (x)) /* arctan(+inf) = Pi/2 */
217 inexact = mpfr_const_pi (atan, rnd_mode);
218 else /* arctan(-inf) = -Pi/2 */
220 inexact = -mpfr_const_pi (atan,
221 MPFR_INVERT_RND (rnd_mode));
222 MPFR_CHANGE_SIGN (atan);
224 inexact2 = mpfr_div_2ui (atan, atan, 1, rnd_mode);
225 if (MPFR_UNLIKELY (inexact2))
226 inexact = inexact2; /* An underflow occurs */
227 MPFR_RET (inexact);
229 else /* x is necessarily 0 */
231 MPFR_ASSERTD (MPFR_IS_ZERO (x));
232 MPFR_SET_ZERO (atan);
233 MPFR_SET_SAME_SIGN (atan, x);
234 MPFR_RET (0);
238 /* atan(x) = x - x^3/3 + x^5/5...
239 so the error is < 2^(3*EXP(x)-1)
240 so `EXP(x)-(3*EXP(x)-1)` = -2*EXP(x)+1 */
241 MPFR_FAST_COMPUTE_IF_SMALL_INPUT (atan, x, -2 * MPFR_GET_EXP (x), 1, 0,
242 rnd_mode, {});
244 /* Set x_p=|x| */
245 MPFR_TMP_INIT_ABS (xp, x);
247 /* Other simple case arctan(-+1)=-+pi/4 */
248 comparaison = mpfr_cmp_ui (xp, 1);
249 if (MPFR_UNLIKELY (comparaison == 0))
251 int neg = MPFR_IS_NEG (x);
252 inexact = mpfr_const_pi (atan, MPFR_IS_POS (x) ? rnd_mode
253 : MPFR_INVERT_RND (rnd_mode));
254 if (neg)
256 inexact = -inexact;
257 MPFR_CHANGE_SIGN (atan);
259 inexact2 = mpfr_div_2ui (atan, atan, 2, rnd_mode);
260 if (MPFR_UNLIKELY (inexact2))
261 inexact = inexact2; /* an underflow occurs */
262 return inexact;
265 realprec = MPFR_PREC (atan) + MPFR_INT_CEIL_LOG2 (MPFR_PREC (atan)) + 4;
266 prec = realprec + BITS_PER_MP_LIMB;
268 MPFR_SAVE_EXPO_MARK (expo);
270 /* Initialisation */
271 mpz_init (ukz);
272 MPFR_GROUP_INIT_4 (group, prec, sk, tmp, tmp2, arctgt);
273 oldn0 = 0;
274 tabz = (mpz_t *) 0;
276 MPFR_ZIV_INIT (loop, prec);
277 for (;;)
279 /* First, if |x| < 1, we need to have more prec to be able to round (sup)
280 n0 = ceil(log(prec_requested + 2 + 1+ln(2.4)/ln(2))/log(2)) */
281 mp_prec_t sup;
282 #if 0
283 sup = 1;
284 if (MPFR_GET_EXP (xp) < 0
285 && (mpfr_uexp_t) (2-MPFR_GET_EXP (xp)) > realprec)
286 sup = (mpfr_uexp_t) (2-MPFR_GET_EXP (xp)) - realprec;
287 #else
288 sup = MPFR_GET_EXP (xp) < 0 ? 2-MPFR_GET_EXP (xp) : 1;
289 #endif
290 n0 = MPFR_INT_CEIL_LOG2 ((realprec + sup) + 3);
291 MPFR_ASSERTD (3*n0 > 2);
292 prec = (realprec + sup) + 1 + MPFR_INT_CEIL_LOG2 (3*n0-2);
294 /* Initialisation */
295 MPFR_GROUP_REPREC_4 (group, prec, sk, tmp, tmp2, arctgt);
296 if (MPFR_LIKELY (oldn0 == 0))
298 oldn0 = 3*(n0+1);
299 tabz = (mpz_t *) (*__gmp_allocate_func) (oldn0*sizeof (mpz_t));
300 for (i = 0; i < oldn0; i++)
301 mpz_init (tabz[i]);
303 else if (MPFR_UNLIKELY (oldn0 < 3*n0+1))
305 tabz = (mpz_t *) (*__gmp_reallocate_func)
306 (tabz, oldn0*sizeof (mpz_t), 3*(n0+1)*sizeof (mpz_t));
307 for (i = oldn0; i < 3*(n0+1); i++)
308 mpz_init (tabz[i]);
309 oldn0 = 3*(n0+1);
312 if (comparaison > 0)
313 mpfr_ui_div (sk, 1, xp, GMP_RNDN);
314 else
315 mpfr_set (sk, xp, GMP_RNDN);
317 /* sk is 1/|x| if |x| > 1, and |x| otherwise, i.e. min(|x|, 1/|x|) */
319 /* Assignation */
320 MPFR_SET_ZERO (arctgt);
321 twopoweri = 1<<0;
322 MPFR_ASSERTD (n0 >= 4);
323 /* FIXME: further reduce the argument so that it is less than
324 1/n where n is the output precision. In such a way, the
325 first calls to mpfr_atan_aux will not be too expensive,
326 since the number of needed terms will be n/log(n), so the
327 factorial contribution will be O(n). */
328 for (i = 0 ; i < n0; i++)
330 if (MPFR_UNLIKELY (MPFR_IS_ZERO (sk)))
331 break;
332 /* Calculation of trunc(tmp) --> mpz */
333 mpfr_mul_2ui (tmp, sk, twopoweri, GMP_RNDN);
334 mpfr_trunc (tmp, tmp);
335 if (!MPFR_IS_ZERO (tmp))
337 exptol = mpfr_get_z_exp (ukz, tmp);
338 /* since the s_k are decreasing (see algorithms.tex),
339 and s_0 = min(|x|, 1/|x|) < 1, we have sk < 1,
340 thus exptol < 0 */
341 MPFR_ASSERTD (exptol < 0);
342 mpz_tdiv_q_2exp (ukz, ukz, (unsigned long int) (-exptol));
343 /* Calculation of arctan(Ak) */
344 mpfr_set_z (tmp, ukz, GMP_RNDN);
345 mpfr_div_2ui (tmp, tmp, twopoweri, GMP_RNDN);
346 mpfr_atan_aux (tmp2, ukz, twopoweri, n0 - i, tabz);
347 mpfr_mul (tmp2, tmp2, tmp, GMP_RNDN);
348 /* Addition */
349 mpfr_add (arctgt, arctgt, tmp2, GMP_RNDN);
350 /* Next iteration */
351 mpfr_sub (tmp2, sk, tmp, GMP_RNDN);
352 mpfr_mul (sk, sk, tmp, GMP_RNDN);
353 mpfr_add_ui (sk, sk, 1, GMP_RNDN);
354 mpfr_div (sk, tmp2, sk, GMP_RNDN);
356 twopoweri <<= 1;
358 /* Add last step (Arctan(sk) ~= sk */
359 mpfr_add (arctgt, arctgt, sk, GMP_RNDN);
360 if (comparaison > 0)
362 mpfr_const_pi (tmp, GMP_RNDN);
363 mpfr_div_2ui (tmp, tmp, 1, GMP_RNDN);
364 mpfr_sub (arctgt, tmp, arctgt, GMP_RNDN);
366 MPFR_SET_POS (arctgt);
368 if (MPFR_LIKELY (MPFR_CAN_ROUND (arctgt, realprec, MPFR_PREC (atan),
369 rnd_mode)))
370 break;
371 MPFR_ZIV_NEXT (loop, realprec);
373 MPFR_ZIV_FREE (loop);
375 inexact = mpfr_set4 (atan, arctgt, rnd_mode, MPFR_SIGN (x));
377 for (i = 0 ; i < oldn0 ; i++)
378 mpz_clear (tabz[i]);
379 mpz_clear (ukz);
380 (*__gmp_free_func) (tabz, oldn0*sizeof (mpz_t));
381 MPFR_GROUP_CLEAR (group);
383 MPFR_SAVE_EXPO_FREE (expo);
384 return mpfr_check_range (arctgt, inexact, rnd_mode);