1 /* mpfr_exp -- exponential of a floating-point number
3 Copyright 1999, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
4 Contributed by the Arenaire and Cacao projects, INRIA.
6 This file is part of the GNU MPFR Library.
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or (at your
11 option) any later version.
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LIB. If not, write to
20 the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
21 MA 02110-1301, USA. */
23 #define MPFR_NEED_LONGLONG_H /* for MPFR_MPZ_SIZEINBASE2 */
24 #include "mpfr-impl.h"
26 /* y <- exp(p/2^r) within 1 ulp, using 2^m terms from the series
28 We use the following binary splitting formula:
29 P(a,b) = p if a+1=b, P(a,c)*P(c,b) otherwise
30 Q(a,b) = a*2^r if a+1=b [except Q(0,1)=1], Q(a,c)*Q(c,b) otherwise
31 T(a,b) = P(a,b) if a+1=b, Q(c,b)*T(a,c)+P(a,c)*T(c,b) otherwise
32 Then exp(p/2^r) ~ T(0,i)/Q(0,i) for i so that (p/2^r)^i/i! is small enough.
34 Since P(a,b) = p^(b-a), and we consider only values of b-a of the form 2^j,
35 we don't need to compute P(), we only precompute p^(2^j) in the ptoj[] array
38 Since Q(a,b) is divisible by 2^(r*(b-a-1)), we don't compute the power of
42 mpfr_exp_rational (mpfr_ptr y
, mpz_ptr p
, long r
, int m
,
43 mpz_t
*Q
, mp_prec_t
*mult
)
45 unsigned long n
, i
, j
;
47 mp_prec_t
*log2_nb_terms
;
49 mp_prec_t precy
= MPFR_PREC(y
), prec_i_have
, prec_ptoj
;
52 MPFR_ASSERTN ((size_t) m
< sizeof (long) * CHAR_BIT
- 1);
55 ptoj
= Q
+ 2*(m
+1); /* ptoj[i] = mantissa^(2^i) */
56 log2_nb_terms
= mult
+ (m
+1);
59 MPFR_ASSERTD (mpz_cmp_ui (p
, 0) != 0);
60 n
= mpz_scan1 (p
, 0); /* number of trailing zeros in p */
61 mpz_tdiv_q_2exp (p
, p
, n
);
62 r
-= n
; /* since |p/2^r| < 1 and p >= 1, r >= 1 */
66 for (k
= 1; k
< m
; k
++)
67 mpz_mul (ptoj
[k
], ptoj
[k
-1], ptoj
[k
-1]); /* ptoj[k] = p^(2^k) */
71 mult
[0] = 0; /* the multiplier P[k]/Q[k] for the remaining terms
72 satisfies P[k]/Q[k] <= 2^(-mult[k]) */
73 log2_nb_terms
[0] = 0; /* log2(#terms) [exact in 1st loop where 2^k] */
78 for (i
= 1; (prec_i_have
< precy
) && (i
< n
); i
++)
80 /* invariant: Q[0]*Q[1]*...*Q[k] equals i! */
82 log2_nb_terms
[k
] = 0; /* 1 term */
83 mpz_set_ui (Q
[k
], i
+ 1);
84 mpz_set_ui (S
[k
], i
+ 1);
85 j
= i
+ 1; /* we have computed j = i+1 terms so far */
87 while ((j
& 1) == 0) /* combine and reduce */
89 /* invariant: S[k] corresponds to 2^l consecutive terms */
90 mpz_mul (S
[k
], S
[k
], ptoj
[l
]);
91 mpz_mul (S
[k
-1], S
[k
-1], Q
[k
]);
92 /* Q[k] corresponds to 2^l consecutive terms too.
93 Since it does not contains the factor 2^(r*2^l),
94 when going from l to l+1 we need to multiply
95 by 2^(r*2^(l+1))/2^(r*2^l) = 2^(r*2^l) */
96 mpz_mul_2exp (S
[k
-1], S
[k
-1], r
<< l
);
97 mpz_add (S
[k
-1], S
[k
-1], S
[k
]);
98 mpz_mul (Q
[k
-1], Q
[k
-1], Q
[k
]);
99 log2_nb_terms
[k
-1] ++; /* number of terms in S[k-1]
100 is a power of 2 by construction */
101 MPFR_MPZ_SIZEINBASE2 (prec_i_have
, Q
[k
]);
102 MPFR_MPZ_SIZEINBASE2 (prec_ptoj
, ptoj
[l
]);
103 mult
[k
-1] += prec_i_have
+ (r
<< l
) - prec_ptoj
- 1;
104 prec_i_have
= mult
[k
] = mult
[k
-1];
105 /* since mult[k] >= mult[k-1] + nbits(Q[k]),
106 we have Q[0]*...*Q[k] <= 2^mult[k] = 2^prec_i_have */
113 /* accumulate all products in S[0] and Q[0]. Warning: contrary to above,
114 here we do not have log2_nb_terms[k-1] = log2_nb_terms[k]+1. */
115 l
= 0; /* number of accumulated terms in the right part S[k]/Q[k] */
118 j
= log2_nb_terms
[k
-1];
119 mpz_mul (S
[k
], S
[k
], ptoj
[j
]);
120 mpz_mul (S
[k
-1], S
[k
-1], Q
[k
]);
121 l
+= 1 << log2_nb_terms
[k
];
122 mpz_mul_2exp (S
[k
-1], S
[k
-1], r
* l
);
123 mpz_add (S
[k
-1], S
[k
-1], S
[k
]);
124 mpz_mul (Q
[k
-1], Q
[k
-1], Q
[k
]);
128 /* Q[0] now equals i! */
129 MPFR_MPZ_SIZEINBASE2 (prec_i_have
, S
[0]);
130 diff
= (mp_exp_t
) prec_i_have
- 2 * (mp_exp_t
) precy
;
133 mpz_div_2exp (S
[0], S
[0], diff
);
135 mpz_mul_2exp (S
[0], S
[0], -diff
);
137 MPFR_MPZ_SIZEINBASE2 (prec_i_have
, Q
[0]);
138 diff
= (mp_exp_t
) prec_i_have
- (mp_prec_t
) precy
;
141 mpz_div_2exp (Q
[0], Q
[0], diff
);
143 mpz_mul_2exp (Q
[0], Q
[0], -diff
);
145 mpz_tdiv_q (S
[0], S
[0], Q
[0]);
146 mpfr_set_z (y
, S
[0], GMP_RNDD
);
147 MPFR_SET_EXP (y
, MPFR_GET_EXP (y
) + expo
- r
* (i
- 1) );
150 #define shift (BITS_PER_MP_LIMB/2)
153 mpfr_exp_3 (mpfr_ptr y
, mpfr_srcptr x
, mp_rnd_t rnd_mode
)
155 mpfr_t t
, x_copy
, tmp
;
157 mp_exp_t ttt
, shift_x
;
158 unsigned long twopoweri
;
163 mp_prec_t realprec
, Prec
;
166 MPFR_SAVE_EXPO_DECL (expo
);
167 MPFR_ZIV_DECL (ziv_loop
);
169 MPFR_LOG_FUNC (("x[%#R]=%R rnd=%d", x
, x
, rnd_mode
),
170 ("y[%#R]=%R inexact=%d", y
, y
, inexact
));
172 MPFR_SAVE_EXPO_MARK (expo
);
175 /* we first write x = 1.xxxxxxxxxxxxx
177 prec_x
= MPFR_INT_CEIL_LOG2 (MPFR_PREC (x
)) - MPFR_LOG2_BITS_PER_MP_LIMB
;
181 ttt
= MPFR_GET_EXP (x
);
182 mpfr_init2 (x_copy
, MPFR_PREC(x
));
183 mpfr_set (x_copy
, x
, GMP_RNDD
);
185 /* we shift to get a number less than 1 */
189 mpfr_div_2ui (x_copy
, x
, ttt
, GMP_RNDN
);
190 ttt
= MPFR_GET_EXP (x_copy
);
194 MPFR_ASSERTD (ttt
<= 0);
196 /* Init prec and vars */
197 realprec
= MPFR_PREC (y
) + MPFR_INT_CEIL_LOG2 (prec_x
+ MPFR_PREC (y
));
198 Prec
= realprec
+ shift
+ 2 + shift_x
;
199 mpfr_init2 (t
, Prec
);
200 mpfr_init2 (tmp
, Prec
);
204 MPFR_ZIV_INIT (ziv_loop
, realprec
);
208 MPFR_BLOCK_DECL (flags
);
210 k
= MPFR_INT_CEIL_LOG2 (Prec
) - MPFR_LOG2_BITS_PER_MP_LIMB
;
212 /* now we have to extract */
213 twopoweri
= BITS_PER_MP_LIMB
;
215 /* Allocate tables */
216 P
= (mpz_t
*) (*__gmp_allocate_func
) (3*(k
+2)*sizeof(mpz_t
));
217 for (i
= 0; i
< 3*(k
+2); i
++)
219 mult
= (mp_prec_t
*) (*__gmp_allocate_func
) (2*(k
+2)*sizeof(mp_prec_t
));
221 /* Particular case for i==0 */
222 mpfr_extract (uk
, x_copy
, 0);
223 MPFR_ASSERTD (mpz_cmp_ui (uk
, 0) != 0);
224 mpfr_exp_rational (tmp
, uk
, shift
+ twopoweri
- ttt
, k
+ 1, P
, mult
);
225 for (loop
= 0; loop
< shift
; loop
++)
226 mpfr_sqr (tmp
, tmp
, GMP_RNDD
);
230 iter
= (k
<= prec_x
) ? k
: prec_x
;
231 for (i
= 1; i
<= iter
; i
++)
233 mpfr_extract (uk
, x_copy
, i
);
234 if (MPFR_LIKELY (mpz_cmp_ui (uk
, 0) != 0))
236 mpfr_exp_rational (t
, uk
, twopoweri
- ttt
, k
- i
+ 1, P
, mult
);
237 mpfr_mul (tmp
, tmp
, t
, GMP_RNDD
);
239 MPFR_ASSERTN (twopoweri
<= LONG_MAX
/2);
244 for (i
= 0; i
< 3*(k
+2); i
++)
246 (*__gmp_free_func
) (P
, 3*(k
+2)*sizeof(mpz_t
));
247 (*__gmp_free_func
) (mult
, 2*(k
+2)*sizeof(mp_prec_t
));
252 for (loop
= 0; loop
< shift_x
- 1; loop
++)
253 mpfr_sqr (tmp
, tmp
, GMP_RNDD
);
254 mpfr_sqr (t
, tmp
, GMP_RNDD
);
257 if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags
)))
259 /* tmp <= exact result, so that it is a real overflow. */
260 inexact
= mpfr_overflow (y
, rnd_mode
, 1);
261 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo
, MPFR_FLAGS_OVERFLOW
);
265 if (MPFR_UNLIKELY (MPFR_UNDERFLOW (flags
)))
267 /* This may be a spurious underflow. So, let's scale
269 mpfr_mul_2ui (tmp
, tmp
, 1, GMP_RNDD
); /* no overflow, exact */
270 mpfr_sqr (t
, tmp
, GMP_RNDD
);
271 if (MPFR_IS_ZERO (t
))
273 /* approximate result < 2^(emin - 3), thus
274 exact result < 2^(emin - 2). */
275 inexact
= mpfr_underflow (y
, (rnd_mode
== GMP_RNDN
) ?
276 GMP_RNDZ
: rnd_mode
, 1);
277 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo
, MPFR_FLAGS_UNDERFLOW
);
284 if (mpfr_can_round (shift_x
> 0 ? t
: tmp
, realprec
, GMP_RNDD
, GMP_RNDZ
,
285 MPFR_PREC(y
) + (rnd_mode
== GMP_RNDN
)))
287 inexact
= mpfr_set (y
, shift_x
> 0 ? t
: tmp
, rnd_mode
);
288 if (MPFR_UNLIKELY (scaled
&& MPFR_IS_PURE_FP (y
)))
293 /* The result has been scaled and needs to be corrected. */
294 ey
= MPFR_GET_EXP (y
);
295 inex2
= mpfr_mul_2si (y
, y
, -2, rnd_mode
);
296 if (inex2
) /* underflow */
298 if (rnd_mode
== GMP_RNDN
&& inexact
< 0 &&
299 MPFR_IS_ZERO (y
) && ey
== __gmpfr_emin
+ 1)
301 /* Double rounding case: in GMP_RNDN, the scaled
302 result has been rounded downward to 2^emin.
303 As the exact result is > 2^(emin - 2), correct
304 rounding must be done upward. */
305 /* TODO: make sure in coverage tests that this line
307 inexact
= mpfr_underflow (y
, GMP_RNDU
, 1);
311 /* No double rounding. */
314 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo
, MPFR_FLAGS_UNDERFLOW
);
320 MPFR_ZIV_NEXT (ziv_loop
, realprec
);
321 Prec
= realprec
+ shift
+ 2 + shift_x
;
322 mpfr_set_prec (t
, Prec
);
323 mpfr_set_prec (tmp
, Prec
);
325 MPFR_ZIV_FREE (ziv_loop
);
331 MPFR_SAVE_EXPO_FREE (expo
);