beta-0.89.2
[luatex.git] / source / libs / mpfr / mpfr-3.1.3 / src / exp3.c
blob2cd201197f9432ca09c5c01e334557ae3578259b
1 /* mpfr_exp -- exponential of a floating-point number
3 Copyright 1999, 2001-2015 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramel projects, INRIA.
6 This file is part of the GNU MPFR Library.
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
23 #define MPFR_NEED_LONGLONG_H /* for MPFR_MPZ_SIZEINBASE2 */
24 #include "mpfr-impl.h"
26 /* y <- exp(p/2^r) within 1 ulp, using 2^m terms from the series
27 Assume |p/2^r| < 1.
28 We use the following binary splitting formula:
29 P(a,b) = p if a+1=b, P(a,c)*P(c,b) otherwise
30 Q(a,b) = a*2^r if a+1=b [except Q(0,1)=1], Q(a,c)*Q(c,b) otherwise
31 T(a,b) = P(a,b) if a+1=b, Q(c,b)*T(a,c)+P(a,c)*T(c,b) otherwise
32 Then exp(p/2^r) ~ T(0,i)/Q(0,i) for i so that (p/2^r)^i/i! is small enough.
34 Since P(a,b) = p^(b-a), and we consider only values of b-a of the form 2^j,
35 we don't need to compute P(), we only precompute p^(2^j) in the ptoj[] array
36 below.
38 Since Q(a,b) is divisible by 2^(r*(b-a-1)), we don't compute the power of
39 two part.
41 static void
42 mpfr_exp_rational (mpfr_ptr y, mpz_ptr p, long r, int m,
43 mpz_t *Q, mpfr_prec_t *mult)
45 unsigned long n, i, j;
46 mpz_t *S, *ptoj;
47 mpfr_prec_t *log2_nb_terms;
48 mpfr_exp_t diff, expo;
49 mpfr_prec_t precy = MPFR_PREC(y), prec_i_have, prec_ptoj;
50 int k, l;
52 MPFR_ASSERTN ((size_t) m < sizeof (long) * CHAR_BIT - 1);
54 S = Q + (m+1);
55 ptoj = Q + 2*(m+1); /* ptoj[i] = mantissa^(2^i) */
56 log2_nb_terms = mult + (m+1);
58 /* Normalize p */
59 MPFR_ASSERTD (mpz_cmp_ui (p, 0) != 0);
60 n = mpz_scan1 (p, 0); /* number of trailing zeros in p */
61 mpz_tdiv_q_2exp (p, p, n);
62 r -= n; /* since |p/2^r| < 1 and p >= 1, r >= 1 */
64 /* Set initial var */
65 mpz_set (ptoj[0], p);
66 for (k = 1; k < m; k++)
67 mpz_mul (ptoj[k], ptoj[k-1], ptoj[k-1]); /* ptoj[k] = p^(2^k) */
68 mpz_set_ui (Q[0], 1);
69 mpz_set_ui (S[0], 1);
70 k = 0;
71 mult[0] = 0; /* the multiplier P[k]/Q[k] for the remaining terms
72 satisfies P[k]/Q[k] <= 2^(-mult[k]) */
73 log2_nb_terms[0] = 0; /* log2(#terms) [exact in 1st loop where 2^k] */
74 prec_i_have = 0;
76 /* Main Loop */
77 n = 1UL << m;
78 for (i = 1; (prec_i_have < precy) && (i < n); i++)
80 /* invariant: Q[0]*Q[1]*...*Q[k] equals i! */
81 k++;
82 log2_nb_terms[k] = 0; /* 1 term */
83 mpz_set_ui (Q[k], i + 1);
84 mpz_set_ui (S[k], i + 1);
85 j = i + 1; /* we have computed j = i+1 terms so far */
86 l = 0;
87 while ((j & 1) == 0) /* combine and reduce */
89 /* invariant: S[k] corresponds to 2^l consecutive terms */
90 mpz_mul (S[k], S[k], ptoj[l]);
91 mpz_mul (S[k-1], S[k-1], Q[k]);
92 /* Q[k] corresponds to 2^l consecutive terms too.
93 Since it does not contains the factor 2^(r*2^l),
94 when going from l to l+1 we need to multiply
95 by 2^(r*2^(l+1))/2^(r*2^l) = 2^(r*2^l) */
96 mpz_mul_2exp (S[k-1], S[k-1], r << l);
97 mpz_add (S[k-1], S[k-1], S[k]);
98 mpz_mul (Q[k-1], Q[k-1], Q[k]);
99 log2_nb_terms[k-1] ++; /* number of terms in S[k-1]
100 is a power of 2 by construction */
101 MPFR_MPZ_SIZEINBASE2 (prec_i_have, Q[k]);
102 MPFR_MPZ_SIZEINBASE2 (prec_ptoj, ptoj[l]);
103 mult[k-1] += prec_i_have + (r << l) - prec_ptoj - 1;
104 prec_i_have = mult[k] = mult[k-1];
105 /* since mult[k] >= mult[k-1] + nbits(Q[k]),
106 we have Q[0]*...*Q[k] <= 2^mult[k] = 2^prec_i_have */
107 l ++;
108 j >>= 1;
109 k --;
113 /* accumulate all products in S[0] and Q[0]. Warning: contrary to above,
114 here we do not have log2_nb_terms[k-1] = log2_nb_terms[k]+1. */
115 l = 0; /* number of accumulated terms in the right part S[k]/Q[k] */
116 while (k > 0)
118 j = log2_nb_terms[k-1];
119 mpz_mul (S[k], S[k], ptoj[j]);
120 mpz_mul (S[k-1], S[k-1], Q[k]);
121 l += 1 << log2_nb_terms[k];
122 mpz_mul_2exp (S[k-1], S[k-1], r * l);
123 mpz_add (S[k-1], S[k-1], S[k]);
124 mpz_mul (Q[k-1], Q[k-1], Q[k]);
125 k--;
128 /* Q[0] now equals i! */
129 MPFR_MPZ_SIZEINBASE2 (prec_i_have, S[0]);
130 diff = (mpfr_exp_t) prec_i_have - 2 * (mpfr_exp_t) precy;
131 expo = diff;
132 if (diff >= 0)
133 mpz_fdiv_q_2exp (S[0], S[0], diff);
134 else
135 mpz_mul_2exp (S[0], S[0], -diff);
137 MPFR_MPZ_SIZEINBASE2 (prec_i_have, Q[0]);
138 diff = (mpfr_exp_t) prec_i_have - (mpfr_prec_t) precy;
139 expo -= diff;
140 if (diff > 0)
141 mpz_fdiv_q_2exp (Q[0], Q[0], diff);
142 else
143 mpz_mul_2exp (Q[0], Q[0], -diff);
145 mpz_tdiv_q (S[0], S[0], Q[0]);
146 mpfr_set_z (y, S[0], MPFR_RNDD);
147 MPFR_SET_EXP (y, MPFR_GET_EXP (y) + expo - r * (i - 1) );
150 #define shift (GMP_NUMB_BITS/2)
153 mpfr_exp_3 (mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd_mode)
155 mpfr_t t, x_copy, tmp;
156 mpz_t uk;
157 mpfr_exp_t ttt, shift_x;
158 unsigned long twopoweri;
159 mpz_t *P;
160 mpfr_prec_t *mult;
161 int i, k, loop;
162 int prec_x;
163 mpfr_prec_t realprec, Prec;
164 int iter;
165 int inexact = 0;
166 MPFR_SAVE_EXPO_DECL (expo);
167 MPFR_ZIV_DECL (ziv_loop);
169 MPFR_LOG_FUNC
170 (("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec(x), mpfr_log_prec, x, rnd_mode),
171 ("y[%Pu]=%.*Rg inexact=%d", mpfr_get_prec(y), mpfr_log_prec, y,
172 inexact));
174 MPFR_SAVE_EXPO_MARK (expo);
176 /* decompose x */
177 /* we first write x = 1.xxxxxxxxxxxxx
178 ----- k bits -- */
179 prec_x = MPFR_INT_CEIL_LOG2 (MPFR_PREC (x)) - MPFR_LOG2_GMP_NUMB_BITS;
180 if (prec_x < 0)
181 prec_x = 0;
183 ttt = MPFR_GET_EXP (x);
184 mpfr_init2 (x_copy, MPFR_PREC(x));
185 mpfr_set (x_copy, x, MPFR_RNDD);
187 /* we shift to get a number less than 1 */
188 if (ttt > 0)
190 shift_x = ttt;
191 mpfr_div_2ui (x_copy, x, ttt, MPFR_RNDN);
192 ttt = MPFR_GET_EXP (x_copy);
194 else
195 shift_x = 0;
196 MPFR_ASSERTD (ttt <= 0);
198 /* Init prec and vars */
199 realprec = MPFR_PREC (y) + MPFR_INT_CEIL_LOG2 (prec_x + MPFR_PREC (y));
200 Prec = realprec + shift + 2 + shift_x;
201 mpfr_init2 (t, Prec);
202 mpfr_init2 (tmp, Prec);
203 mpz_init (uk);
205 /* Main loop */
206 MPFR_ZIV_INIT (ziv_loop, realprec);
207 for (;;)
209 int scaled = 0;
210 MPFR_BLOCK_DECL (flags);
212 k = MPFR_INT_CEIL_LOG2 (Prec) - MPFR_LOG2_GMP_NUMB_BITS;
214 /* now we have to extract */
215 twopoweri = GMP_NUMB_BITS;
217 /* Allocate tables */
218 P = (mpz_t*) (*__gmp_allocate_func) (3*(k+2)*sizeof(mpz_t));
219 for (i = 0; i < 3*(k+2); i++)
220 mpz_init (P[i]);
221 mult = (mpfr_prec_t*) (*__gmp_allocate_func) (2*(k+2)*sizeof(mpfr_prec_t));
223 /* Particular case for i==0 */
224 mpfr_extract (uk, x_copy, 0);
225 MPFR_ASSERTD (mpz_cmp_ui (uk, 0) != 0);
226 mpfr_exp_rational (tmp, uk, shift + twopoweri - ttt, k + 1, P, mult);
227 for (loop = 0; loop < shift; loop++)
228 mpfr_sqr (tmp, tmp, MPFR_RNDD);
229 twopoweri *= 2;
231 /* General case */
232 iter = (k <= prec_x) ? k : prec_x;
233 for (i = 1; i <= iter; i++)
235 mpfr_extract (uk, x_copy, i);
236 if (MPFR_LIKELY (mpz_cmp_ui (uk, 0) != 0))
238 mpfr_exp_rational (t, uk, twopoweri - ttt, k - i + 1, P, mult);
239 mpfr_mul (tmp, tmp, t, MPFR_RNDD);
241 MPFR_ASSERTN (twopoweri <= LONG_MAX/2);
242 twopoweri *=2;
245 /* Clear tables */
246 for (i = 0; i < 3*(k+2); i++)
247 mpz_clear (P[i]);
248 (*__gmp_free_func) (P, 3*(k+2)*sizeof(mpz_t));
249 (*__gmp_free_func) (mult, 2*(k+2)*sizeof(mpfr_prec_t));
251 if (shift_x > 0)
253 MPFR_BLOCK (flags, {
254 for (loop = 0; loop < shift_x - 1; loop++)
255 mpfr_sqr (tmp, tmp, MPFR_RNDD);
256 mpfr_sqr (t, tmp, MPFR_RNDD);
257 } );
259 if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags)))
261 /* tmp <= exact result, so that it is a real overflow. */
262 inexact = mpfr_overflow (y, rnd_mode, 1);
263 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, MPFR_FLAGS_OVERFLOW);
264 break;
267 if (MPFR_UNLIKELY (MPFR_UNDERFLOW (flags)))
269 /* This may be a spurious underflow. So, let's scale
270 the result. */
271 mpfr_mul_2ui (tmp, tmp, 1, MPFR_RNDD); /* no overflow, exact */
272 mpfr_sqr (t, tmp, MPFR_RNDD);
273 if (MPFR_IS_ZERO (t))
275 /* approximate result < 2^(emin - 3), thus
276 exact result < 2^(emin - 2). */
277 inexact = mpfr_underflow (y, (rnd_mode == MPFR_RNDN) ?
278 MPFR_RNDZ : rnd_mode, 1);
279 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, MPFR_FLAGS_UNDERFLOW);
280 break;
282 scaled = 1;
286 if (mpfr_can_round (shift_x > 0 ? t : tmp, realprec, MPFR_RNDN, MPFR_RNDZ,
287 MPFR_PREC(y) + (rnd_mode == MPFR_RNDN)))
289 inexact = mpfr_set (y, shift_x > 0 ? t : tmp, rnd_mode);
290 if (MPFR_UNLIKELY (scaled && MPFR_IS_PURE_FP (y)))
292 int inex2;
293 mpfr_exp_t ey;
295 /* The result has been scaled and needs to be corrected. */
296 ey = MPFR_GET_EXP (y);
297 inex2 = mpfr_mul_2si (y, y, -2, rnd_mode);
298 if (inex2) /* underflow */
300 if (rnd_mode == MPFR_RNDN && inexact < 0 &&
301 MPFR_IS_ZERO (y) && ey == __gmpfr_emin + 1)
303 /* Double rounding case: in MPFR_RNDN, the scaled
304 result has been rounded downward to 2^emin.
305 As the exact result is > 2^(emin - 2), correct
306 rounding must be done upward. */
307 /* TODO: make sure in coverage tests that this line
308 is reached. */
309 inexact = mpfr_underflow (y, MPFR_RNDU, 1);
311 else
313 /* No double rounding. */
314 inexact = inex2;
316 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, MPFR_FLAGS_UNDERFLOW);
319 break;
322 MPFR_ZIV_NEXT (ziv_loop, realprec);
323 Prec = realprec + shift + 2 + shift_x;
324 mpfr_set_prec (t, Prec);
325 mpfr_set_prec (tmp, Prec);
327 MPFR_ZIV_FREE (ziv_loop);
329 mpz_clear (uk);
330 mpfr_clear (tmp);
331 mpfr_clear (t);
332 mpfr_clear (x_copy);
333 MPFR_SAVE_EXPO_FREE (expo);
334 return inexact;