1 /* mpfr_exp_2 -- exponential of a floating-point number
2 using algorithms in O(n^(1/2)*M(n)) and O(n^(1/3)*M(n))
4 Copyright 1999-2015 Free Software Foundation, Inc.
5 Contributed by the AriC and Caramel projects, INRIA.
7 This file is part of the GNU MPFR Library.
9 The GNU MPFR Library is free software; you can redistribute it and/or modify
10 it under the terms of the GNU Lesser General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or (at your
12 option) any later version.
14 The GNU MPFR Library is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
17 License for more details.
19 You should have received a copy of the GNU Lesser General Public License
20 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
21 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
22 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
25 #define MPFR_NEED_LONGLONG_H /* for count_leading_zeros */
26 #include "mpfr-impl.h"
29 mpfr_exp2_aux (mpz_t
, mpfr_srcptr
, mpfr_prec_t
, mpfr_exp_t
*);
31 mpfr_exp2_aux2 (mpz_t
, mpfr_srcptr
, mpfr_prec_t
, mpfr_exp_t
*);
33 mpz_normalize (mpz_t
, mpz_t
, mpfr_exp_t
);
35 mpz_normalize2 (mpz_t
, mpz_t
, mpfr_exp_t
, mpfr_exp_t
);
37 /* if k = the number of bits of z > q, divides z by 2^(k-q) and returns k-q.
38 Otherwise do nothing and return 0.
41 mpz_normalize (mpz_t rop
, mpz_t z
, mpfr_exp_t q
)
45 MPFR_MPZ_SIZEINBASE2 (k
, z
);
46 MPFR_ASSERTD (k
== (mpfr_uexp_t
) k
);
47 if (q
< 0 || (mpfr_uexp_t
) k
> (mpfr_uexp_t
) q
)
49 mpz_fdiv_q_2exp (rop
, z
, (unsigned long) ((mpfr_uexp_t
) k
- q
));
50 return (mpfr_exp_t
) k
- q
;
52 if (MPFR_UNLIKELY(rop
!= z
))
57 /* if expz > target, shift z by (expz-target) bits to the left.
58 if expz < target, shift z by (target-expz) bits to the right.
62 mpz_normalize2 (mpz_t rop
, mpz_t z
, mpfr_exp_t expz
, mpfr_exp_t target
)
65 mpz_fdiv_q_2exp (rop
, z
, target
- expz
);
67 mpz_mul_2exp (rop
, z
, expz
- target
);
71 /* use Brent's formula exp(x) = (1+r+r^2/2!+r^3/3!+...)^(2^K)*2^n
72 where x = n*log(2)+(2^K)*r
73 together with the Paterson-Stockmeyer O(t^(1/2)) algorithm for the
74 evaluation of power series. The resulting complexity is O(n^(1/3)*M(n)).
75 This function returns with the exact flags due to exp.
78 mpfr_exp_2 (mpfr_ptr y
, mpfr_srcptr x
, mpfr_rnd_t rnd_mode
)
81 unsigned long K
, k
, l
, err
; /* FIXME: Which type ? */
83 mpfr_exp_t exps
, expx
;
91 (("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec(x
), mpfr_log_prec
, x
, rnd_mode
),
92 ("y[%Pu]=%.*Rg inexact=%d", mpfr_get_prec(y
), mpfr_log_prec
, y
,
95 expx
= MPFR_GET_EXP (x
);
98 /* Warning: we cannot use the 'double' type here, since on 64-bit machines
99 x may be as large as 2^62*log(2) without overflow, and then x/log(2)
100 is about 2^62: not every integer of that size can be represented as a
101 'double', thus the argument reduction would fail. */
103 /* |x| <= 0.25, thus n = round(x/log(2)) = 0 */
107 mpfr_init2 (r
, sizeof (long) * CHAR_BIT
);
108 mpfr_const_log2 (r
, MPFR_RNDZ
);
109 mpfr_div (r
, x
, r
, MPFR_RNDN
);
110 n
= mpfr_get_si (r
, MPFR_RNDN
);
113 /* we have |x| <= (|n|+1)*log(2) */
114 MPFR_LOG_MSG (("d(x)=%1.30e n=%ld\n", mpfr_get_d1(x
), n
));
116 /* error_r bounds the cancelled bits in x - n*log(2) */
117 if (MPFR_UNLIKELY (n
== 0))
121 count_leading_zeros (error_r
, (mp_limb_t
) SAFE_ABS (unsigned long, n
) + 1);
122 error_r
= GMP_NUMB_BITS
- error_r
;
123 /* we have |x| <= 2^error_r * log(2) */
126 /* for the O(n^(1/2)*M(n)) method, the Taylor series computation of
127 n/K terms costs about n/(2K) multiplications when computed in fixed
129 K
= (precy
< MPFR_EXP_2_THRESHOLD
) ? __gmpfr_isqrt ((precy
+ 1) / 2)
130 : __gmpfr_cuberoot (4*precy
);
131 l
= (precy
- 1) / K
+ 1;
132 err
= K
+ MPFR_INT_CEIL_LOG2 (2 * l
+ 18);
133 /* add K extra bits, i.e. failure probability <= 1/2^K = O(1/precy) */
134 q
= precy
+ err
+ K
+ 8;
135 /* if |x| >> 1, take into account the cancelled bits */
139 /* Note: due to the mpfr_prec_round below, it is not possible to use
140 the MPFR_GROUP_* macros here. */
142 mpfr_init2 (r
, q
+ error_r
);
143 mpfr_init2 (s
, q
+ error_r
);
145 /* the algorithm consists in computing an upper bound of exp(x) using
146 a precision of q bits, and see if we can round to MPFR_PREC(y) taking
147 into account the maximal error. Otherwise we increase q. */
148 MPFR_ZIV_INIT (loop
, q
);
151 MPFR_LOG_MSG (("n=%ld K=%lu l=%lu q=%lu error_r=%d\n",
152 n
, K
, l
, (unsigned long) q
, error_r
));
154 /* First reduce the argument to r = x - n * log(2),
155 so that r is small in absolute value. We want an upper
156 bound on r to get an upper bound on exp(x). */
158 /* if n<0, we have to get an upper bound of log(2)
159 in order to get an upper bound of r = x-n*log(2) */
160 mpfr_const_log2 (s
, (n
>= 0) ? MPFR_RNDZ
: MPFR_RNDU
);
161 /* s is within 1 ulp(s) of log(2) */
163 mpfr_mul_ui (r
, s
, (n
< 0) ? -n
: n
, (n
>= 0) ? MPFR_RNDZ
: MPFR_RNDU
);
164 /* r is within 3 ulps of |n|*log(2) */
166 MPFR_CHANGE_SIGN (r
);
167 /* r <= n*log(2), within 3 ulps */
172 mpfr_sub (r
, x
, r
, MPFR_RNDU
);
174 if (MPFR_IS_PURE_FP (r
))
176 while (MPFR_IS_NEG (r
))
177 { /* initial approximation n was too large */
179 mpfr_add (r
, r
, s
, MPFR_RNDU
);
182 /* since there was a cancellation in x - n*log(2), the low error_r
183 bits from r are zero and thus non significant, thus we can reduce
184 the working precision */
186 mpfr_prec_round (r
, q
, MPFR_RNDU
);
187 /* the error on r is at most 3 ulps (3 ulps if error_r = 0,
188 and 1 + 3/2 if error_r > 0) */
190 MPFR_ASSERTD (MPFR_IS_POS (r
));
191 mpfr_div_2ui (r
, r
, K
, MPFR_RNDU
); /* r = (x-n*log(2))/2^K, exact */
194 exps
= mpfr_get_z_2exp (ss
, s
);
195 /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! */
196 MPFR_ASSERTD (MPFR_IS_PURE_FP (r
) && MPFR_EXP (r
) < 0);
197 l
= (precy
< MPFR_EXP_2_THRESHOLD
)
198 ? mpfr_exp2_aux (ss
, r
, q
, &exps
) /* naive method */
199 : mpfr_exp2_aux2 (ss
, r
, q
, &exps
); /* Paterson/Stockmeyer meth */
201 MPFR_LOG_MSG (("l=%lu q=%lu (K+l)*q^2=%1.3e\n",
202 l
, (unsigned long) q
, (K
+ l
) * (double) q
* q
));
204 for (k
= 0; k
< K
; k
++)
206 mpz_mul (ss
, ss
, ss
);
208 exps
+= mpz_normalize (ss
, ss
, q
);
210 mpfr_set_z (s
, ss
, MPFR_RNDN
);
212 MPFR_SET_EXP(s
, MPFR_GET_EXP (s
) + exps
);
215 /* error is at most 2^K*l, plus 2 to take into account of
216 the error of 3 ulps on r */
217 err
= K
+ MPFR_INT_CEIL_LOG2 (l
) + 2;
219 MPFR_LOG_MSG (("before mult. by 2^n:\n", 0));
221 MPFR_LOG_MSG (("err=%lu bits\n", K
));
223 if (MPFR_LIKELY (MPFR_CAN_ROUND (s
, q
- err
, precy
, rnd_mode
)))
226 inexact
= mpfr_mul_2si (y
, s
, n
, rnd_mode
);
231 MPFR_ZIV_NEXT (loop
, q
);
232 mpfr_set_prec (r
, q
+ error_r
);
233 mpfr_set_prec (s
, q
+ error_r
);
235 MPFR_ZIV_FREE (loop
);
243 /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! while MPFR_EXP(r^l/l!)+MPFR_EXPR(r)>-q
244 using naive method with O(l) multiplications.
245 Return the number of iterations l.
246 The absolute error on s is less than 3*l*(l+1)*2^(-q).
247 Version using fixed-point arithmetic with mpz instead
248 of mpfr for internal computations.
249 NOTE[VL]: the following sentence seems to be obsolete since MY_INIT_MPZ
250 is no longer used (r6919); qn was the number of limbs of q.
251 s must have at least qn+1 limbs (qn should be enough, but currently fails
252 since mpz_mul_2exp(s, s, q-1) reallocates qn+1 limbs)
255 mpfr_exp2_aux (mpz_t s
, mpfr_srcptr r
, mpfr_prec_t q
, mpfr_exp_t
*exps
)
258 mpfr_exp_t dif
, expt
, expr
;
260 mp_size_t sbit
, tbit
;
262 MPFR_ASSERTN (MPFR_IS_PURE_FP (r
));
265 *exps
= 1 - (mpfr_exp_t
) q
; /* s = 2^(q-1) */
270 mpz_mul_2exp(s
, s
, q
-1);
271 expr
= mpfr_get_z_2exp(rr
, r
); /* no error here */
278 MPFR_MPZ_SIZEINBASE2 (sbit
, s
);
279 MPFR_MPZ_SIZEINBASE2 (tbit
, t
);
280 dif
= *exps
+ sbit
- expt
- tbit
;
281 /* truncates the bits of t which are < ulp(s) = 2^(1-q) */
282 expt
+= mpz_normalize(t
, t
, (mpfr_exp_t
) q
-dif
); /* error at most 2^(1-q) */
283 mpz_fdiv_q_ui (t
, t
, l
); /* error at most 2^(1-q) */
284 /* the error wrt t^l/l! is here at most 3*l*ulp(s) */
285 MPFR_ASSERTD (expt
== *exps
);
286 if (mpz_sgn (t
) == 0)
288 mpz_add(s
, s
, t
); /* no error here: exact */
289 /* ensures rr has the same size as t: after several shifts, the error
290 on rr is still at most ulp(t)=ulp(s) */
291 MPFR_MPZ_SIZEINBASE2 (tbit
, t
);
292 expr
+= mpz_normalize(rr
, rr
, tbit
);
298 return 3 * l
* (l
+ 1);
301 /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! while MPFR_EXP(r^l/l!)+MPFR_EXPR(r)>-q
302 using Paterson-Stockmeyer algorithm with O(sqrt(l)) multiplications.
304 Uses m multiplications of full size and 2l/m of decreasing size,
305 i.e. a total equivalent to about m+l/m full multiplications,
306 i.e. 2*sqrt(l) for m=sqrt(l).
307 NOTE[VL]: The following sentence seems to be obsolete since MY_INIT_MPZ
308 is no longer used (r6919); sizer was the number of limbs of r.
309 Version using mpz. ss must have at least (sizer+1) limbs.
310 The error is bounded by (l^2+4*l) ulps where l is the return value.
313 mpfr_exp2_aux2 (mpz_t s
, mpfr_srcptr r
, mpfr_prec_t q
, mpfr_exp_t
*exps
)
315 mpfr_exp_t expr
, *expR
, expt
;
317 unsigned long l
, m
, i
;
318 mpz_t t
, *R
, rr
, tmp
;
319 mp_size_t sbit
, rrbit
;
320 MPFR_TMP_DECL(marker
);
322 /* estimate value of l */
323 MPFR_ASSERTD (MPFR_GET_EXP (r
) < 0);
324 l
= q
/ (- MPFR_GET_EXP (r
));
325 m
= __gmpfr_isqrt (l
);
326 /* we access R[2], thus we need m >= 2 */
330 MPFR_TMP_MARK(marker
);
331 R
= (mpz_t
*) MPFR_TMP_ALLOC ((m
+ 1) * sizeof (mpz_t
)); /* R[i] is r^i */
332 expR
= (mpfr_exp_t
*) MPFR_TMP_ALLOC((m
+ 1) * sizeof (mpfr_exp_t
));
333 /* expR[i] is the exponent for R[i] */
338 *exps
= 1 - q
; /* 1 ulp = 2^(1-q) */
339 for (i
= 0 ; i
<= m
; i
++)
341 expR
[1] = mpfr_get_z_2exp (R
[1], r
); /* exact operation: no error */
342 expR
[1] = mpz_normalize2 (R
[1], R
[1], expR
[1], 1 - q
); /* error <= 1 ulp */
343 mpz_mul (t
, R
[1], R
[1]); /* err(t) <= 2 ulps */
344 mpz_fdiv_q_2exp (R
[2], t
, q
- 1); /* err(R[2]) <= 3 ulps */
346 for (i
= 3 ; i
<= m
; i
++)
349 mpz_mul (t
, R
[i
-1], R
[1]); /* err(t) <= 2*i-2 */
351 mpz_mul (t
, R
[i
/2], R
[i
/2]);
352 mpz_fdiv_q_2exp (R
[i
], t
, q
- 1); /* err(R[i]) <= 2*i-1 ulps */
355 mpz_set_ui (R
[0], 1);
356 mpz_mul_2exp (R
[0], R
[0], q
-1);
357 expR
[0] = 1-q
; /* R[0]=1 */
359 expr
= 0; /* rr contains r^l/l! */
360 /* by induction: err(rr) <= 2*l ulps */
363 ql
= q
; /* precision used for current giant step */
366 /* all R[i] must have exponent 1-ql */
368 for (i
= 0 ; i
< m
; i
++)
369 expR
[i
] = mpz_normalize2 (R
[i
], R
[i
], expR
[i
], 1 - ql
);
370 /* the absolute error on R[i]*rr is still 2*i-1 ulps */
371 expt
= mpz_normalize2 (t
, R
[m
-1], expR
[m
-1], 1 - ql
);
372 /* err(t) <= 2*m-1 ulps */
373 /* computes t = 1 + r/(l+1) + ... + r^(m-1)*l!/(l+m-1)!
374 using Horner's scheme */
375 for (i
= m
-1 ; i
-- != 0 ; )
377 mpz_fdiv_q_ui (t
, t
, l
+i
+1); /* err(t) += 1 ulp */
378 mpz_add (t
, t
, R
[i
]);
380 /* now err(t) <= (3m-2) ulps */
382 /* now multiplies t by r^l/l! and adds to s */
385 expt
= mpz_normalize2 (t
, t
, expt
, *exps
);
386 /* err(t) <= (3m-1) + err_rr(l) <= (3m-2) + 2*l */
387 MPFR_ASSERTD (expt
== *exps
);
388 mpz_add (s
, s
, t
); /* no error here */
390 /* updates rr, the multiplication of the factors l+i could be done
391 using binary splitting too, but it is not sure it would save much */
392 mpz_mul (t
, rr
, R
[m
]); /* err(t) <= err(rr) + 2m-1 */
395 for (i
= 1 ; i
<= m
; i
++)
396 mpz_mul_ui (tmp
, tmp
, l
+ i
);
397 mpz_fdiv_q (t
, t
, tmp
); /* err(t) <= err(rr) + 2m */
399 if (MPFR_UNLIKELY (mpz_sgn (t
) == 0))
401 expr
+= mpz_normalize (rr
, t
, ql
); /* err_rr(l+1) <= err_rr(l) + 2m+1 */
402 if (MPFR_UNLIKELY (mpz_sgn (rr
) == 0))
405 MPFR_MPZ_SIZEINBASE2 (rrbit
, rr
);
406 MPFR_MPZ_SIZEINBASE2 (sbit
, s
);
407 ql
= q
- *exps
- sbit
+ expr
+ rrbit
;
408 /* TODO: Wrong cast. I don't want what is right, but this is
411 while ((size_t) expr
+ rrbit
> (size_t) -q
);
413 for (i
= 0 ; i
<= m
; i
++)
415 MPFR_TMP_FREE(marker
);