1 /* mpfr_exp -- exponential of a floating-point number
3 Copyright 1999, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramel projects, INRIA.
6 This file is part of the GNU MPFR Library.
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
23 #define MPFR_NEED_LONGLONG_H /* for MPFR_MPZ_SIZEINBASE2 */
24 #include "mpfr-impl.h"
26 /* y <- exp(p/2^r) within 1 ulp, using 2^m terms from the series
28 We use the following binary splitting formula:
29 P(a,b) = p if a+1=b, P(a,c)*P(c,b) otherwise
30 Q(a,b) = a*2^r if a+1=b [except Q(0,1)=1], Q(a,c)*Q(c,b) otherwise
31 T(a,b) = P(a,b) if a+1=b, Q(c,b)*T(a,c)+P(a,c)*T(c,b) otherwise
32 Then exp(p/2^r) ~ T(0,i)/Q(0,i) for i so that (p/2^r)^i/i! is small enough.
34 Since P(a,b) = p^(b-a), and we consider only values of b-a of the form 2^j,
35 we don't need to compute P(), we only precompute p^(2^j) in the ptoj[] array
38 Since Q(a,b) is divisible by 2^(r*(b-a-1)), we don't compute the power of
42 mpfr_exp_rational (mpfr_ptr y
, mpz_ptr p
, long r
, int m
,
43 mpz_t
*Q
, mpfr_prec_t
*mult
)
45 unsigned long n
, i
, j
;
47 mpfr_prec_t
*log2_nb_terms
;
48 mpfr_exp_t diff
, expo
;
49 mpfr_prec_t precy
= MPFR_PREC(y
), prec_i_have
, prec_ptoj
;
52 MPFR_ASSERTN ((size_t) m
< sizeof (long) * CHAR_BIT
- 1);
55 ptoj
= Q
+ 2*(m
+1); /* ptoj[i] = mantissa^(2^i) */
56 log2_nb_terms
= mult
+ (m
+1);
59 MPFR_ASSERTD (mpz_cmp_ui (p
, 0) != 0);
60 n
= mpz_scan1 (p
, 0); /* number of trailing zeros in p */
61 mpz_tdiv_q_2exp (p
, p
, n
);
62 r
-= n
; /* since |p/2^r| < 1 and p >= 1, r >= 1 */
66 for (k
= 1; k
< m
; k
++)
67 mpz_mul (ptoj
[k
], ptoj
[k
-1], ptoj
[k
-1]); /* ptoj[k] = p^(2^k) */
71 mult
[0] = 0; /* the multiplier P[k]/Q[k] for the remaining terms
72 satisfies P[k]/Q[k] <= 2^(-mult[k]) */
73 log2_nb_terms
[0] = 0; /* log2(#terms) [exact in 1st loop where 2^k] */
78 for (i
= 1; (prec_i_have
< precy
) && (i
< n
); i
++)
80 /* invariant: Q[0]*Q[1]*...*Q[k] equals i! */
82 log2_nb_terms
[k
] = 0; /* 1 term */
83 mpz_set_ui (Q
[k
], i
+ 1);
84 mpz_set_ui (S
[k
], i
+ 1);
85 j
= i
+ 1; /* we have computed j = i+1 terms so far */
87 while ((j
& 1) == 0) /* combine and reduce */
89 /* invariant: S[k] corresponds to 2^l consecutive terms */
90 mpz_mul (S
[k
], S
[k
], ptoj
[l
]);
91 mpz_mul (S
[k
-1], S
[k
-1], Q
[k
]);
92 /* Q[k] corresponds to 2^l consecutive terms too.
93 Since it does not contains the factor 2^(r*2^l),
94 when going from l to l+1 we need to multiply
95 by 2^(r*2^(l+1))/2^(r*2^l) = 2^(r*2^l) */
96 mpz_mul_2exp (S
[k
-1], S
[k
-1], r
<< l
);
97 mpz_add (S
[k
-1], S
[k
-1], S
[k
]);
98 mpz_mul (Q
[k
-1], Q
[k
-1], Q
[k
]);
99 log2_nb_terms
[k
-1] ++; /* number of terms in S[k-1]
100 is a power of 2 by construction */
101 MPFR_MPZ_SIZEINBASE2 (prec_i_have
, Q
[k
]);
102 MPFR_MPZ_SIZEINBASE2 (prec_ptoj
, ptoj
[l
]);
103 mult
[k
-1] += prec_i_have
+ (r
<< l
) - prec_ptoj
- 1;
104 prec_i_have
= mult
[k
] = mult
[k
-1];
105 /* since mult[k] >= mult[k-1] + nbits(Q[k]),
106 we have Q[0]*...*Q[k] <= 2^mult[k] = 2^prec_i_have */
113 /* accumulate all products in S[0] and Q[0]. Warning: contrary to above,
114 here we do not have log2_nb_terms[k-1] = log2_nb_terms[k]+1. */
115 l
= 0; /* number of accumulated terms in the right part S[k]/Q[k] */
118 j
= log2_nb_terms
[k
-1];
119 mpz_mul (S
[k
], S
[k
], ptoj
[j
]);
120 mpz_mul (S
[k
-1], S
[k
-1], Q
[k
]);
121 l
+= 1 << log2_nb_terms
[k
];
122 mpz_mul_2exp (S
[k
-1], S
[k
-1], r
* l
);
123 mpz_add (S
[k
-1], S
[k
-1], S
[k
]);
124 mpz_mul (Q
[k
-1], Q
[k
-1], Q
[k
]);
128 /* Q[0] now equals i! */
129 MPFR_MPZ_SIZEINBASE2 (prec_i_have
, S
[0]);
130 diff
= (mpfr_exp_t
) prec_i_have
- 2 * (mpfr_exp_t
) precy
;
133 mpz_fdiv_q_2exp (S
[0], S
[0], diff
);
135 mpz_mul_2exp (S
[0], S
[0], -diff
);
137 MPFR_MPZ_SIZEINBASE2 (prec_i_have
, Q
[0]);
138 diff
= (mpfr_exp_t
) prec_i_have
- (mpfr_prec_t
) precy
;
141 mpz_fdiv_q_2exp (Q
[0], Q
[0], diff
);
143 mpz_mul_2exp (Q
[0], Q
[0], -diff
);
145 mpz_tdiv_q (S
[0], S
[0], Q
[0]);
146 mpfr_set_z (y
, S
[0], MPFR_RNDD
);
147 MPFR_SET_EXP (y
, MPFR_GET_EXP (y
) + expo
- r
* (i
- 1) );
150 #define shift (GMP_NUMB_BITS/2)
153 mpfr_exp_3 (mpfr_ptr y
, mpfr_srcptr x
, mpfr_rnd_t rnd_mode
)
155 mpfr_t t
, x_copy
, tmp
;
157 mpfr_exp_t ttt
, shift_x
;
158 unsigned long twopoweri
;
163 mpfr_prec_t realprec
, Prec
;
166 MPFR_SAVE_EXPO_DECL (expo
);
167 MPFR_ZIV_DECL (ziv_loop
);
170 (("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec(x
), mpfr_log_prec
, x
, rnd_mode
),
171 ("y[%Pu]=%.*Rg inexact=%d", mpfr_get_prec(y
), mpfr_log_prec
, y
,
174 MPFR_SAVE_EXPO_MARK (expo
);
177 /* we first write x = 1.xxxxxxxxxxxxx
179 prec_x
= MPFR_INT_CEIL_LOG2 (MPFR_PREC (x
)) - MPFR_LOG2_GMP_NUMB_BITS
;
183 ttt
= MPFR_GET_EXP (x
);
184 mpfr_init2 (x_copy
, MPFR_PREC(x
));
185 mpfr_set (x_copy
, x
, MPFR_RNDD
);
187 /* we shift to get a number less than 1 */
191 mpfr_div_2ui (x_copy
, x
, ttt
, MPFR_RNDN
);
192 ttt
= MPFR_GET_EXP (x_copy
);
196 MPFR_ASSERTD (ttt
<= 0);
198 /* Init prec and vars */
199 realprec
= MPFR_PREC (y
) + MPFR_INT_CEIL_LOG2 (prec_x
+ MPFR_PREC (y
));
200 Prec
= realprec
+ shift
+ 2 + shift_x
;
201 mpfr_init2 (t
, Prec
);
202 mpfr_init2 (tmp
, Prec
);
206 MPFR_ZIV_INIT (ziv_loop
, realprec
);
210 MPFR_BLOCK_DECL (flags
);
212 k
= MPFR_INT_CEIL_LOG2 (Prec
) - MPFR_LOG2_GMP_NUMB_BITS
;
214 /* now we have to extract */
215 twopoweri
= GMP_NUMB_BITS
;
217 /* Allocate tables */
218 P
= (mpz_t
*) (*__gmp_allocate_func
) (3*(k
+2)*sizeof(mpz_t
));
219 for (i
= 0; i
< 3*(k
+2); i
++)
221 mult
= (mpfr_prec_t
*) (*__gmp_allocate_func
) (2*(k
+2)*sizeof(mpfr_prec_t
));
223 /* Particular case for i==0 */
224 mpfr_extract (uk
, x_copy
, 0);
225 MPFR_ASSERTD (mpz_cmp_ui (uk
, 0) != 0);
226 mpfr_exp_rational (tmp
, uk
, shift
+ twopoweri
- ttt
, k
+ 1, P
, mult
);
227 for (loop
= 0; loop
< shift
; loop
++)
228 mpfr_sqr (tmp
, tmp
, MPFR_RNDD
);
232 iter
= (k
<= prec_x
) ? k
: prec_x
;
233 for (i
= 1; i
<= iter
; i
++)
235 mpfr_extract (uk
, x_copy
, i
);
236 if (MPFR_LIKELY (mpz_cmp_ui (uk
, 0) != 0))
238 mpfr_exp_rational (t
, uk
, twopoweri
- ttt
, k
- i
+ 1, P
, mult
);
239 mpfr_mul (tmp
, tmp
, t
, MPFR_RNDD
);
241 MPFR_ASSERTN (twopoweri
<= LONG_MAX
/2);
246 for (i
= 0; i
< 3*(k
+2); i
++)
248 (*__gmp_free_func
) (P
, 3*(k
+2)*sizeof(mpz_t
));
249 (*__gmp_free_func
) (mult
, 2*(k
+2)*sizeof(mpfr_prec_t
));
254 for (loop
= 0; loop
< shift_x
- 1; loop
++)
255 mpfr_sqr (tmp
, tmp
, MPFR_RNDD
);
256 mpfr_sqr (t
, tmp
, MPFR_RNDD
);
259 if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags
)))
261 /* tmp <= exact result, so that it is a real overflow. */
262 inexact
= mpfr_overflow (y
, rnd_mode
, 1);
263 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo
, MPFR_FLAGS_OVERFLOW
);
267 if (MPFR_UNLIKELY (MPFR_UNDERFLOW (flags
)))
269 /* This may be a spurious underflow. So, let's scale
271 mpfr_mul_2ui (tmp
, tmp
, 1, MPFR_RNDD
); /* no overflow, exact */
272 mpfr_sqr (t
, tmp
, MPFR_RNDD
);
273 if (MPFR_IS_ZERO (t
))
275 /* approximate result < 2^(emin - 3), thus
276 exact result < 2^(emin - 2). */
277 inexact
= mpfr_underflow (y
, (rnd_mode
== MPFR_RNDN
) ?
278 MPFR_RNDZ
: rnd_mode
, 1);
279 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo
, MPFR_FLAGS_UNDERFLOW
);
286 if (mpfr_can_round (shift_x
> 0 ? t
: tmp
, realprec
, MPFR_RNDD
, MPFR_RNDZ
,
287 MPFR_PREC(y
) + (rnd_mode
== MPFR_RNDN
)))
289 inexact
= mpfr_set (y
, shift_x
> 0 ? t
: tmp
, rnd_mode
);
290 if (MPFR_UNLIKELY (scaled
&& MPFR_IS_PURE_FP (y
)))
295 /* The result has been scaled and needs to be corrected. */
296 ey
= MPFR_GET_EXP (y
);
297 inex2
= mpfr_mul_2si (y
, y
, -2, rnd_mode
);
298 if (inex2
) /* underflow */
300 if (rnd_mode
== MPFR_RNDN
&& inexact
< 0 &&
301 MPFR_IS_ZERO (y
) && ey
== __gmpfr_emin
+ 1)
303 /* Double rounding case: in MPFR_RNDN, the scaled
304 result has been rounded downward to 2^emin.
305 As the exact result is > 2^(emin - 2), correct
306 rounding must be done upward. */
307 /* TODO: make sure in coverage tests that this line
309 inexact
= mpfr_underflow (y
, MPFR_RNDU
, 1);
313 /* No double rounding. */
316 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo
, MPFR_FLAGS_UNDERFLOW
);
322 MPFR_ZIV_NEXT (ziv_loop
, realprec
);
323 Prec
= realprec
+ shift
+ 2 + shift_x
;
324 mpfr_set_prec (t
, Prec
);
325 mpfr_set_prec (tmp
, Prec
);
327 MPFR_ZIV_FREE (ziv_loop
);
333 MPFR_SAVE_EXPO_FREE (expo
);