1 /* mpfr_exp_2 -- exponential of a floating-point number
2 using algorithms in O(n^(1/2)*M(n)) and O(n^(1/3)*M(n))
4 Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
5 Contributed by the Arenaire and Cacao projects, INRIA.
7 This file is part of the GNU MPFR Library.
9 The GNU MPFR Library is free software; you can redistribute it and/or modify
10 it under the terms of the GNU Lesser General Public License as published by
11 the Free Software Foundation; either version 2.1 of the License, or (at your
12 option) any later version.
14 The GNU MPFR Library is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
17 License for more details.
19 You should have received a copy of the GNU Lesser General Public License
20 along with the GNU MPFR Library; see the file COPYING.LIB. If not, write to
21 the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
22 MA 02110-1301, USA. */
25 #define MPFR_NEED_LONGLONG_H /* for count_leading_zeros */
26 #include "mpfr-impl.h"
29 mpfr_exp2_aux (mpz_t
, mpfr_srcptr
, mp_prec_t
, mp_exp_t
*);
31 mpfr_exp2_aux2 (mpz_t
, mpfr_srcptr
, mp_prec_t
, mp_exp_t
*);
33 mpz_normalize (mpz_t
, mpz_t
, mp_exp_t
);
35 mpz_normalize2 (mpz_t
, mpz_t
, mp_exp_t
, mp_exp_t
);
37 #define MY_INIT_MPZ(x, s) { \
38 (x)->_mp_alloc = (s); \
39 PTR(x) = (mp_ptr) MPFR_TMP_ALLOC((s)*BYTES_PER_MP_LIMB); \
42 /* if k = the number of bits of z > q, divides z by 2^(k-q) and returns k-q.
43 Otherwise do nothing and return 0.
46 mpz_normalize (mpz_t rop
, mpz_t z
, mp_exp_t q
)
50 MPFR_MPZ_SIZEINBASE2 (k
, z
);
51 MPFR_ASSERTD (k
== (mpfr_uexp_t
) k
);
52 if (q
< 0 || (mpfr_uexp_t
) k
> (mpfr_uexp_t
) q
)
54 mpz_div_2exp(rop
, z
, (unsigned long) ((mpfr_uexp_t
) k
- q
));
55 return (mp_exp_t
) k
- q
;
57 if (MPFR_UNLIKELY(rop
!= z
))
62 /* if expz > target, shift z by (expz-target) bits to the left.
63 if expz < target, shift z by (target-expz) bits to the right.
67 mpz_normalize2 (mpz_t rop
, mpz_t z
, mp_exp_t expz
, mp_exp_t target
)
70 mpz_div_2exp(rop
, z
, target
-expz
);
72 mpz_mul_2exp(rop
, z
, expz
-target
);
76 /* use Brent's formula exp(x) = (1+r+r^2/2!+r^3/3!+...)^(2^K)*2^n
77 where x = n*log(2)+(2^K)*r
78 together with the Paterson-Stockmeyer O(t^(1/2)) algorithm for the
79 evaluation of power series. The resulting complexity is O(n^(1/3)*M(n)).
80 This function returns with the exact flags due to exp.
83 mpfr_exp_2 (mpfr_ptr y
, mpfr_srcptr x
, mp_rnd_t rnd_mode
)
86 unsigned long K
, k
, l
, err
; /* FIXME: Which type ? */
94 MPFR_TMP_DECL(marker
);
96 MPFR_LOG_FUNC (("x[%#R]=%R rnd=%d", x
, x
, rnd_mode
),
97 ("y[%#R]=%R inexact=%d", y
, y
, inexact
));
101 /* Warning: we cannot use the 'double' type here, since on 64-bit machines
102 x may be as large as 2^62*log(2) without overflow, and then x/log(2)
103 is about 2^62: not every integer of that size can be represented as a
104 'double', thus the argument reduction would fail. */
105 if (MPFR_GET_EXP (x
) <= -2)
106 /* |x| <= 0.25, thus n = round(x/log(2)) = 0 */
110 mpfr_init2 (r
, sizeof (long) * CHAR_BIT
);
111 mpfr_const_log2 (r
, GMP_RNDZ
);
112 mpfr_div (r
, x
, r
, GMP_RNDN
);
113 n
= mpfr_get_si (r
, GMP_RNDN
);
116 MPFR_LOG_MSG (("d(x)=%1.30e n=%ld\n", mpfr_get_d1(x
), n
));
118 /* error bounds the cancelled bits in x - n*log(2) */
119 if (MPFR_UNLIKELY (n
== 0))
122 count_leading_zeros (error_r
, (mp_limb_t
) SAFE_ABS (unsigned long, n
));
123 error_r
= BITS_PER_MP_LIMB
- error_r
+ 2;
125 /* for the O(n^(1/2)*M(n)) method, the Taylor series computation of
126 n/K terms costs about n/(2K) multiplications when computed in fixed
128 K
= (precy
< MPFR_EXP_2_THRESHOLD
) ? __gmpfr_isqrt ((precy
+ 1) / 2)
129 : __gmpfr_cuberoot (4*precy
);
130 l
= (precy
- 1) / K
+ 1;
131 err
= K
+ MPFR_INT_CEIL_LOG2 (2 * l
+ 18);
132 /* add K extra bits, i.e. failure probability <= 1/2^K = O(1/precy) */
133 q
= precy
+ err
+ K
+ 5;
135 mpfr_init2 (r
, q
+ error_r
);
136 mpfr_init2 (s
, q
+ error_r
);
138 /* the algorithm consists in computing an upper bound of exp(x) using
139 a precision of q bits, and see if we can round to MPFR_PREC(y) taking
140 into account the maximal error. Otherwise we increase q. */
141 MPFR_ZIV_INIT (loop
, q
);
144 MPFR_LOG_MSG (("n=%ld K=%lu l=%lu q=%lu error_r=%d\n",
145 n
, K
, l
, (unsigned long) q
, error_r
));
147 /* First reduce the argument to r = x - n * log(2),
148 so that r is small in absolute value. We want an upper
149 bound on r to get an upper bound on exp(x). */
151 /* if n<0, we have to get an upper bound of log(2)
152 in order to get an upper bound of r = x-n*log(2) */
153 mpfr_const_log2 (s
, (n
>= 0) ? GMP_RNDZ
: GMP_RNDU
);
154 /* s is within 1 ulp of log(2) */
156 mpfr_mul_ui (r
, s
, (n
< 0) ? -n
: n
, (n
>= 0) ? GMP_RNDZ
: GMP_RNDU
);
157 /* r is within 3 ulps of |n|*log(2) */
159 MPFR_CHANGE_SIGN (r
);
160 /* r <= n*log(2), within 3 ulps */
165 mpfr_sub (r
, x
, r
, GMP_RNDU
);
166 /* possible cancellation here: if r is zero, increase the working
167 precision (Ziv's loop); otherwise, the error on r is at most
168 3*2^(EXP(old_r)-EXP(new_r)) ulps */
170 if (MPFR_IS_PURE_FP (r
))
174 /* number of cancelled bits */
175 cancel
= MPFR_GET_EXP (x
) - MPFR_GET_EXP (r
);
176 if (cancel
< 0) /* this might happen in the second loop if x is
177 tiny negative: the initial n is 0, then in the
178 first loop n becomes -1 and r = x + log(2) */
180 while (MPFR_IS_NEG (r
))
181 { /* initial approximation n was too large */
183 mpfr_add (r
, r
, s
, GMP_RNDU
);
185 mpfr_prec_round (r
, q
, GMP_RNDU
);
187 MPFR_ASSERTD (MPFR_IS_POS (r
));
188 mpfr_div_2ui (r
, r
, K
, GMP_RNDU
); /* r = (x-n*log(2))/2^K, exact */
190 MPFR_TMP_MARK(marker
);
191 MY_INIT_MPZ(ss
, 3 + 2*((q
-1)/BITS_PER_MP_LIMB
));
192 exps
= mpfr_get_z_exp (ss
, s
);
193 /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! */
194 MPFR_ASSERTD (MPFR_IS_PURE_FP (r
) && MPFR_EXP (r
) < 0);
195 l
= (precy
< MPFR_EXP_2_THRESHOLD
)
196 ? mpfr_exp2_aux (ss
, r
, q
, &exps
) /* naive method */
197 : mpfr_exp2_aux2 (ss
, r
, q
, &exps
); /* Paterson/Stockmeyer meth */
199 MPFR_LOG_MSG (("l=%lu q=%lu (K+l)*q^2=%1.3e\n",
200 l
, (unsigned long) q
, (K
+ l
) * (double) q
* q
));
202 for (k
= 0; k
< K
; k
++)
204 mpz_mul (ss
, ss
, ss
);
206 exps
+= mpz_normalize (ss
, ss
, q
);
208 mpfr_set_z (s
, ss
, GMP_RNDN
);
210 MPFR_SET_EXP(s
, MPFR_GET_EXP (s
) + exps
);
211 MPFR_TMP_FREE(marker
); /* don't need ss anymore */
213 /* error is at most 2^K*l, plus cancel+2 to take into account of
214 the error of 3*2^(EXP(old_r)-EXP(new_r)) on r */
215 K
+= MPFR_INT_CEIL_LOG2 (l
) + cancel
+ 2;
217 MPFR_LOG_MSG (("before mult. by 2^n:\n", 0));
219 MPFR_LOG_MSG (("err=%lu bits\n", K
));
221 if (MPFR_LIKELY (MPFR_CAN_ROUND (s
, q
- K
, precy
, rnd_mode
)))
224 inexact
= mpfr_mul_2si (y
, s
, n
, rnd_mode
);
229 MPFR_ZIV_NEXT (loop
, q
);
230 mpfr_set_prec (r
, q
);
231 mpfr_set_prec (s
, q
);
233 MPFR_ZIV_FREE (loop
);
241 /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! while MPFR_EXP(r^l/l!)+MPFR_EXPR(r)>-q
242 using naive method with O(l) multiplications.
243 Return the number of iterations l.
244 The absolute error on s is less than 3*l*(l+1)*2^(-q).
245 Version using fixed-point arithmetic with mpz instead
246 of mpfr for internal computations.
247 s must have at least qn+1 limbs (qn should be enough, but currently fails
248 since mpz_mul_2exp(s, s, q-1) reallocates qn+1 limbs)
251 mpfr_exp2_aux (mpz_t s
, mpfr_srcptr r
, mp_prec_t q
, mp_exp_t
*exps
)
254 mp_exp_t dif
, expt
, expr
;
257 mp_size_t sbit
, tbit
;
258 MPFR_TMP_DECL(marker
);
260 MPFR_ASSERTN (MPFR_IS_PURE_FP (r
));
262 MPFR_TMP_MARK(marker
);
263 qn
= 1 + (q
-1)/BITS_PER_MP_LIMB
;
265 *exps
= 1 - (mp_exp_t
) q
; /* s = 2^(q-1) */
266 MY_INIT_MPZ(t
, 2*qn
+1);
267 MY_INIT_MPZ(rr
, qn
+1);
270 mpz_mul_2exp(s
, s
, q
-1);
271 expr
= mpfr_get_z_exp(rr
, r
); /* no error here */
278 MPFR_MPZ_SIZEINBASE2 (sbit
, s
);
279 MPFR_MPZ_SIZEINBASE2 (tbit
, t
);
280 dif
= *exps
+ sbit
- expt
- tbit
;
281 /* truncates the bits of t which are < ulp(s) = 2^(1-q) */
282 expt
+= mpz_normalize(t
, t
, (mp_exp_t
) q
-dif
); /* error at most 2^(1-q) */
283 mpz_div_ui(t
, t
, l
); /* error at most 2^(1-q) */
284 /* the error wrt t^l/l! is here at most 3*l*ulp(s) */
285 MPFR_ASSERTD (expt
== *exps
);
286 if (mpz_sgn (t
) == 0)
288 mpz_add(s
, s
, t
); /* no error here: exact */
289 /* ensures rr has the same size as t: after several shifts, the error
290 on rr is still at most ulp(t)=ulp(s) */
291 MPFR_MPZ_SIZEINBASE2 (tbit
, t
);
292 expr
+= mpz_normalize(rr
, rr
, tbit
);
295 MPFR_TMP_FREE(marker
);
299 /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! while MPFR_EXP(r^l/l!)+MPFR_EXPR(r)>-q
300 using Paterson-Stockmeyer algorithm with O(sqrt(l)) multiplications.
302 Uses m multiplications of full size and 2l/m of decreasing size,
303 i.e. a total equivalent to about m+l/m full multiplications,
304 i.e. 2*sqrt(l) for m=sqrt(l).
305 Version using mpz. ss must have at least (sizer+1) limbs.
306 The error is bounded by (l^2+4*l) ulps where l is the return value.
309 mpfr_exp2_aux2 (mpz_t s
, mpfr_srcptr r
, mp_prec_t q
, mp_exp_t
*exps
)
311 mp_exp_t expr
, *expR
, expt
;
314 unsigned long l
, m
, i
;
315 mpz_t t
, *R
, rr
, tmp
;
316 mp_size_t sbit
, rrbit
;
317 MPFR_TMP_DECL(marker
);
319 /* estimate value of l */
320 MPFR_ASSERTD (MPFR_GET_EXP (r
) < 0);
321 l
= q
/ (- MPFR_GET_EXP (r
));
322 m
= __gmpfr_isqrt (l
);
323 /* we access R[2], thus we need m >= 2 */
327 MPFR_TMP_MARK(marker
);
328 R
= (mpz_t
*) MPFR_TMP_ALLOC((m
+1)*sizeof(mpz_t
)); /* R[i] is r^i */
329 expR
= (mp_exp_t
*) MPFR_TMP_ALLOC((m
+1)*sizeof(mp_exp_t
)); /* exponent for R[i] */
330 sizer
= 1 + (MPFR_PREC(r
)-1)/BITS_PER_MP_LIMB
;
332 MY_INIT_MPZ(rr
, sizer
+2);
333 MY_INIT_MPZ(t
, 2*sizer
); /* double size for products */
335 *exps
= 1-q
; /* 1 ulp = 2^(1-q) */
336 for (i
= 0 ; i
<= m
; i
++)
337 MY_INIT_MPZ(R
[i
], sizer
+2);
338 expR
[1] = mpfr_get_z_exp(R
[1], r
); /* exact operation: no error */
339 expR
[1] = mpz_normalize2(R
[1], R
[1], expR
[1], 1-q
); /* error <= 1 ulp */
340 mpz_mul(t
, R
[1], R
[1]); /* err(t) <= 2 ulps */
341 mpz_div_2exp(R
[2], t
, q
-1); /* err(R[2]) <= 3 ulps */
343 for (i
= 3 ; i
<= m
; i
++)
345 mpz_mul(t
, R
[i
-1], R
[1]); /* err(t) <= 2*i-2 */
346 mpz_div_2exp(R
[i
], t
, q
-1); /* err(R[i]) <= 2*i-1 ulps */
349 mpz_set_ui (R
[0], 1);
350 mpz_mul_2exp (R
[0], R
[0], q
-1);
351 expR
[0] = 1-q
; /* R[0]=1 */
353 expr
= 0; /* rr contains r^l/l! */
354 /* by induction: err(rr) <= 2*l ulps */
357 ql
= q
; /* precision used for current giant step */
360 /* all R[i] must have exponent 1-ql */
362 for (i
= 0 ; i
< m
; i
++)
363 expR
[i
] = mpz_normalize2 (R
[i
], R
[i
], expR
[i
], 1-ql
);
364 /* the absolute error on R[i]*rr is still 2*i-1 ulps */
365 expt
= mpz_normalize2 (t
, R
[m
-1], expR
[m
-1], 1-ql
);
366 /* err(t) <= 2*m-1 ulps */
367 /* computes t = 1 + r/(l+1) + ... + r^(m-1)*l!/(l+m-1)!
368 using Horner's scheme */
369 for (i
= m
-1 ; i
-- != 0 ; )
371 mpz_div_ui (t
, t
, l
+i
+1); /* err(t) += 1 ulp */
372 mpz_add (t
, t
, R
[i
]);
374 /* now err(t) <= (3m-2) ulps */
376 /* now multiplies t by r^l/l! and adds to s */
379 expt
= mpz_normalize2 (t
, t
, expt
, *exps
);
380 /* err(t) <= (3m-1) + err_rr(l) <= (3m-2) + 2*l */
381 MPFR_ASSERTD (expt
== *exps
);
382 mpz_add (s
, s
, t
); /* no error here */
384 /* updates rr, the multiplication of the factors l+i could be done
385 using binary splitting too, but it is not sure it would save much */
386 mpz_mul (t
, rr
, R
[m
]); /* err(t) <= err(rr) + 2m-1 */
389 for (i
= 1 ; i
<= m
; i
++)
390 mpz_mul_ui (tmp
, tmp
, l
+ i
);
391 mpz_fdiv_q (t
, t
, tmp
); /* err(t) <= err(rr) + 2m */
393 if (MPFR_UNLIKELY (mpz_sgn (t
) == 0))
395 expr
+= mpz_normalize (rr
, t
, ql
); /* err_rr(l+1) <= err_rr(l) + 2m+1 */
396 if (MPFR_UNLIKELY (mpz_sgn (rr
) == 0))
399 MPFR_MPZ_SIZEINBASE2 (rrbit
, rr
);
400 MPFR_MPZ_SIZEINBASE2 (sbit
, s
);
401 ql
= q
- *exps
- sbit
+ expr
+ rrbit
;
402 /* TODO: Wrong cast. I don't want what is right, but this is
405 while ((size_t) expr
+rrbit
> (size_t) (int) -q
);
407 MPFR_TMP_FREE(marker
);