Actually hook powernow.4 into the build.
[dragonfly.git] / contrib / mpfr / exp3.c
blob845f78ce4db2d412e78d4668ef0aa1408c6b3556
1 /* mpfr_exp -- exponential of a floating-point number
3 Copyright 1999, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
4 Contributed by the Arenaire and Cacao projects, INRIA.
6 This file is part of the GNU MPFR Library.
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or (at your
11 option) any later version.
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LIB. If not, write to
20 the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
21 MA 02110-1301, USA. */
23 #define MPFR_NEED_LONGLONG_H /* for MPFR_MPZ_SIZEINBASE2 */
24 #include "mpfr-impl.h"
26 /* y <- exp(p/2^r) within 1 ulp, using 2^m terms from the series
27 Assume |p/2^r| < 1.
28 We use the following binary splitting formula:
29 P(a,b) = p if a+1=b, P(a,c)*P(c,b) otherwise
30 Q(a,b) = a*2^r if a+1=b [except Q(0,1)=1], Q(a,c)*Q(c,b) otherwise
31 T(a,b) = P(a,b) if a+1=b, Q(c,b)*T(a,c)+P(a,c)*T(c,b) otherwise
32 Then exp(p/2^r) ~ T(0,i)/Q(0,i) for i so that (p/2^r)^i/i! is small enough.
34 Since P(a,b) = p^(b-a), and we consider only values of b-a of the form 2^j,
35 we don't need to compute P(), we only precompute p^(2^j) in the ptoj[] array
36 below.
38 Since Q(a,b) is divisible by 2^(r*(b-a-1)), we don't compute the power of
39 two part.
41 static void
42 mpfr_exp_rational (mpfr_ptr y, mpz_ptr p, long r, int m,
43 mpz_t *Q, mp_prec_t *mult)
45 unsigned long n, i, j;
46 mpz_t *S, *ptoj;
47 mp_prec_t *log2_nb_terms;
48 mp_exp_t diff, expo;
49 mp_prec_t precy = MPFR_PREC(y), prec_i_have, prec_ptoj;
50 int k, l;
52 MPFR_ASSERTN ((size_t) m < sizeof (long) * CHAR_BIT - 1);
54 S = Q + (m+1);
55 ptoj = Q + 2*(m+1); /* ptoj[i] = mantissa^(2^i) */
56 log2_nb_terms = mult + (m+1);
58 /* Normalize p */
59 MPFR_ASSERTD (mpz_cmp_ui (p, 0) != 0);
60 n = mpz_scan1 (p, 0); /* number of trailing zeros in p */
61 mpz_tdiv_q_2exp (p, p, n);
62 r -= n; /* since |p/2^r| < 1 and p >= 1, r >= 1 */
64 /* Set initial var */
65 mpz_set (ptoj[0], p);
66 for (k = 1; k < m; k++)
67 mpz_mul (ptoj[k], ptoj[k-1], ptoj[k-1]); /* ptoj[k] = p^(2^k) */
68 mpz_set_ui (Q[0], 1);
69 mpz_set_ui (S[0], 1);
70 k = 0;
71 mult[0] = 0; /* the multiplier P[k]/Q[k] for the remaining terms
72 satisfies P[k]/Q[k] <= 2^(-mult[k]) */
73 log2_nb_terms[0] = 0; /* log2(#terms) [exact in 1st loop where 2^k] */
74 prec_i_have = 0;
76 /* Main Loop */
77 n = 1UL << m;
78 for (i = 1; (prec_i_have < precy) && (i < n); i++)
80 /* invariant: Q[0]*Q[1]*...*Q[k] equals i! */
81 k++;
82 log2_nb_terms[k] = 0; /* 1 term */
83 mpz_set_ui (Q[k], i + 1);
84 mpz_set_ui (S[k], i + 1);
85 j = i + 1; /* we have computed j = i+1 terms so far */
86 l = 0;
87 while ((j & 1) == 0) /* combine and reduce */
89 /* invariant: S[k] corresponds to 2^l consecutive terms */
90 mpz_mul (S[k], S[k], ptoj[l]);
91 mpz_mul (S[k-1], S[k-1], Q[k]);
92 /* Q[k] corresponds to 2^l consecutive terms too.
93 Since it does not contains the factor 2^(r*2^l),
94 when going from l to l+1 we need to multiply
95 by 2^(r*2^(l+1))/2^(r*2^l) = 2^(r*2^l) */
96 mpz_mul_2exp (S[k-1], S[k-1], r << l);
97 mpz_add (S[k-1], S[k-1], S[k]);
98 mpz_mul (Q[k-1], Q[k-1], Q[k]);
99 log2_nb_terms[k-1] ++; /* number of terms in S[k-1]
100 is a power of 2 by construction */
101 MPFR_MPZ_SIZEINBASE2 (prec_i_have, Q[k]);
102 MPFR_MPZ_SIZEINBASE2 (prec_ptoj, ptoj[l]);
103 mult[k-1] += prec_i_have + (r << l) - prec_ptoj - 1;
104 prec_i_have = mult[k] = mult[k-1];
105 /* since mult[k] >= mult[k-1] + nbits(Q[k]),
106 we have Q[0]*...*Q[k] <= 2^mult[k] = 2^prec_i_have */
107 l ++;
108 j >>= 1;
109 k --;
113 /* accumulate all products in S[0] and Q[0]. Warning: contrary to above,
114 here we do not have log2_nb_terms[k-1] = log2_nb_terms[k]+1. */
115 l = 0; /* number of accumulated terms in the right part S[k]/Q[k] */
116 while (k > 0)
118 j = log2_nb_terms[k-1];
119 mpz_mul (S[k], S[k], ptoj[j]);
120 mpz_mul (S[k-1], S[k-1], Q[k]);
121 l += 1 << log2_nb_terms[k];
122 mpz_mul_2exp (S[k-1], S[k-1], r * l);
123 mpz_add (S[k-1], S[k-1], S[k]);
124 mpz_mul (Q[k-1], Q[k-1], Q[k]);
125 k--;
128 /* Q[0] now equals i! */
129 MPFR_MPZ_SIZEINBASE2 (prec_i_have, S[0]);
130 diff = (mp_exp_t) prec_i_have - 2 * (mp_exp_t) precy;
131 expo = diff;
132 if (diff >= 0)
133 mpz_div_2exp (S[0], S[0], diff);
134 else
135 mpz_mul_2exp (S[0], S[0], -diff);
137 MPFR_MPZ_SIZEINBASE2 (prec_i_have, Q[0]);
138 diff = (mp_exp_t) prec_i_have - (mp_prec_t) precy;
139 expo -= diff;
140 if (diff > 0)
141 mpz_div_2exp (Q[0], Q[0], diff);
142 else
143 mpz_mul_2exp (Q[0], Q[0], -diff);
145 mpz_tdiv_q (S[0], S[0], Q[0]);
146 mpfr_set_z (y, S[0], GMP_RNDD);
147 MPFR_SET_EXP (y, MPFR_GET_EXP (y) + expo - r * (i - 1) );
150 #define shift (BITS_PER_MP_LIMB/2)
153 mpfr_exp_3 (mpfr_ptr y, mpfr_srcptr x, mp_rnd_t rnd_mode)
155 mpfr_t t, x_copy, tmp;
156 mpz_t uk;
157 mp_exp_t ttt, shift_x;
158 unsigned long twopoweri;
159 mpz_t *P;
160 mp_prec_t *mult;
161 int i, k, loop;
162 int prec_x;
163 mp_prec_t realprec, Prec;
164 int iter;
165 int inexact = 0;
166 MPFR_SAVE_EXPO_DECL (expo);
167 MPFR_ZIV_DECL (ziv_loop);
169 MPFR_LOG_FUNC (("x[%#R]=%R rnd=%d", x, x, rnd_mode),
170 ("y[%#R]=%R inexact=%d", y, y, inexact));
172 MPFR_SAVE_EXPO_MARK (expo);
174 /* decompose x */
175 /* we first write x = 1.xxxxxxxxxxxxx
176 ----- k bits -- */
177 prec_x = MPFR_INT_CEIL_LOG2 (MPFR_PREC (x)) - MPFR_LOG2_BITS_PER_MP_LIMB;
178 if (prec_x < 0)
179 prec_x = 0;
181 ttt = MPFR_GET_EXP (x);
182 mpfr_init2 (x_copy, MPFR_PREC(x));
183 mpfr_set (x_copy, x, GMP_RNDD);
185 /* we shift to get a number less than 1 */
186 if (ttt > 0)
188 shift_x = ttt;
189 mpfr_div_2ui (x_copy, x, ttt, GMP_RNDN);
190 ttt = MPFR_GET_EXP (x_copy);
192 else
193 shift_x = 0;
194 MPFR_ASSERTD (ttt <= 0);
196 /* Init prec and vars */
197 realprec = MPFR_PREC (y) + MPFR_INT_CEIL_LOG2 (prec_x + MPFR_PREC (y));
198 Prec = realprec + shift + 2 + shift_x;
199 mpfr_init2 (t, Prec);
200 mpfr_init2 (tmp, Prec);
201 mpz_init (uk);
203 /* Main loop */
204 MPFR_ZIV_INIT (ziv_loop, realprec);
205 for (;;)
207 int scaled = 0;
208 MPFR_BLOCK_DECL (flags);
210 k = MPFR_INT_CEIL_LOG2 (Prec) - MPFR_LOG2_BITS_PER_MP_LIMB;
212 /* now we have to extract */
213 twopoweri = BITS_PER_MP_LIMB;
215 /* Allocate tables */
216 P = (mpz_t*) (*__gmp_allocate_func) (3*(k+2)*sizeof(mpz_t));
217 for (i = 0; i < 3*(k+2); i++)
218 mpz_init (P[i]);
219 mult = (mp_prec_t*) (*__gmp_allocate_func) (2*(k+2)*sizeof(mp_prec_t));
221 /* Particular case for i==0 */
222 mpfr_extract (uk, x_copy, 0);
223 MPFR_ASSERTD (mpz_cmp_ui (uk, 0) != 0);
224 mpfr_exp_rational (tmp, uk, shift + twopoweri - ttt, k + 1, P, mult);
225 for (loop = 0; loop < shift; loop++)
226 mpfr_sqr (tmp, tmp, GMP_RNDD);
227 twopoweri *= 2;
229 /* General case */
230 iter = (k <= prec_x) ? k : prec_x;
231 for (i = 1; i <= iter; i++)
233 mpfr_extract (uk, x_copy, i);
234 if (MPFR_LIKELY (mpz_cmp_ui (uk, 0) != 0))
236 mpfr_exp_rational (t, uk, twopoweri - ttt, k - i + 1, P, mult);
237 mpfr_mul (tmp, tmp, t, GMP_RNDD);
239 MPFR_ASSERTN (twopoweri <= LONG_MAX/2);
240 twopoweri *=2;
243 /* Clear tables */
244 for (i = 0; i < 3*(k+2); i++)
245 mpz_clear (P[i]);
246 (*__gmp_free_func) (P, 3*(k+2)*sizeof(mpz_t));
247 (*__gmp_free_func) (mult, 2*(k+2)*sizeof(mp_prec_t));
249 if (shift_x > 0)
251 MPFR_BLOCK (flags, {
252 for (loop = 0; loop < shift_x - 1; loop++)
253 mpfr_sqr (tmp, tmp, GMP_RNDD);
254 mpfr_sqr (t, tmp, GMP_RNDD);
255 } );
257 if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags)))
259 /* tmp <= exact result, so that it is a real overflow. */
260 inexact = mpfr_overflow (y, rnd_mode, 1);
261 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, MPFR_FLAGS_OVERFLOW);
262 break;
265 if (MPFR_UNLIKELY (MPFR_UNDERFLOW (flags)))
267 /* This may be a spurious underflow. So, let's scale
268 the result. */
269 mpfr_mul_2ui (tmp, tmp, 1, GMP_RNDD); /* no overflow, exact */
270 mpfr_sqr (t, tmp, GMP_RNDD);
271 if (MPFR_IS_ZERO (t))
273 /* approximate result < 2^(emin - 3), thus
274 exact result < 2^(emin - 2). */
275 inexact = mpfr_underflow (y, (rnd_mode == GMP_RNDN) ?
276 GMP_RNDZ : rnd_mode, 1);
277 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, MPFR_FLAGS_UNDERFLOW);
278 break;
280 scaled = 1;
284 if (mpfr_can_round (shift_x > 0 ? t : tmp, realprec, GMP_RNDD, GMP_RNDZ,
285 MPFR_PREC(y) + (rnd_mode == GMP_RNDN)))
287 inexact = mpfr_set (y, shift_x > 0 ? t : tmp, rnd_mode);
288 if (MPFR_UNLIKELY (scaled && MPFR_IS_PURE_FP (y)))
290 int inex2;
291 mp_exp_t ey;
293 /* The result has been scaled and needs to be corrected. */
294 ey = MPFR_GET_EXP (y);
295 inex2 = mpfr_mul_2si (y, y, -2, rnd_mode);
296 if (inex2) /* underflow */
298 if (rnd_mode == GMP_RNDN && inexact < 0 &&
299 MPFR_IS_ZERO (y) && ey == __gmpfr_emin + 1)
301 /* Double rounding case: in GMP_RNDN, the scaled
302 result has been rounded downward to 2^emin.
303 As the exact result is > 2^(emin - 2), correct
304 rounding must be done upward. */
305 /* TODO: make sure in coverage tests that this line
306 is reached. */
307 inexact = mpfr_underflow (y, GMP_RNDU, 1);
309 else
311 /* No double rounding. */
312 inexact = inex2;
314 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, MPFR_FLAGS_UNDERFLOW);
317 break;
320 MPFR_ZIV_NEXT (ziv_loop, realprec);
321 Prec = realprec + shift + 2 + shift_x;
322 mpfr_set_prec (t, Prec);
323 mpfr_set_prec (tmp, Prec);
325 MPFR_ZIV_FREE (ziv_loop);
327 mpz_clear (uk);
328 mpfr_clear (tmp);
329 mpfr_clear (t);
330 mpfr_clear (x_copy);
331 MPFR_SAVE_EXPO_FREE (expo);
332 return inexact;