iscontrol(8): Fix synopsis, sync usage() & improve markup
[dragonfly.git] / contrib / mpfr / exp_2.c
blobd25f6fac9d772239406e8b5ad3b372e1050bb829
1 /* mpfr_exp_2 -- exponential of a floating-point number
2 using algorithms in O(n^(1/2)*M(n)) and O(n^(1/3)*M(n))
4 Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
5 Contributed by the Arenaire and Cacao projects, INRIA.
7 This file is part of the GNU MPFR Library.
9 The GNU MPFR Library is free software; you can redistribute it and/or modify
10 it under the terms of the GNU Lesser General Public License as published by
11 the Free Software Foundation; either version 2.1 of the License, or (at your
12 option) any later version.
14 The GNU MPFR Library is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
17 License for more details.
19 You should have received a copy of the GNU Lesser General Public License
20 along with the GNU MPFR Library; see the file COPYING.LIB. If not, write to
21 the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
22 MA 02110-1301, USA. */
24 /* #define DEBUG */
25 #define MPFR_NEED_LONGLONG_H /* for count_leading_zeros */
26 #include "mpfr-impl.h"
28 static unsigned long
29 mpfr_exp2_aux (mpz_t, mpfr_srcptr, mp_prec_t, mp_exp_t *);
30 static unsigned long
31 mpfr_exp2_aux2 (mpz_t, mpfr_srcptr, mp_prec_t, mp_exp_t *);
32 static mp_exp_t
33 mpz_normalize (mpz_t, mpz_t, mp_exp_t);
34 static mp_exp_t
35 mpz_normalize2 (mpz_t, mpz_t, mp_exp_t, mp_exp_t);
37 #define MY_INIT_MPZ(x, s) { \
38 (x)->_mp_alloc = (s); \
39 PTR(x) = (mp_ptr) MPFR_TMP_ALLOC((s)*BYTES_PER_MP_LIMB); \
40 (x)->_mp_size = 0; }
42 /* if k = the number of bits of z > q, divides z by 2^(k-q) and returns k-q.
43 Otherwise do nothing and return 0.
45 static mp_exp_t
46 mpz_normalize (mpz_t rop, mpz_t z, mp_exp_t q)
48 size_t k;
50 MPFR_MPZ_SIZEINBASE2 (k, z);
51 MPFR_ASSERTD (k == (mpfr_uexp_t) k);
52 if (q < 0 || (mpfr_uexp_t) k > (mpfr_uexp_t) q)
54 mpz_div_2exp(rop, z, (unsigned long) ((mpfr_uexp_t) k - q));
55 return (mp_exp_t) k - q;
57 if (MPFR_UNLIKELY(rop != z))
58 mpz_set(rop, z);
59 return 0;
62 /* if expz > target, shift z by (expz-target) bits to the left.
63 if expz < target, shift z by (target-expz) bits to the right.
64 Returns target.
66 static mp_exp_t
67 mpz_normalize2 (mpz_t rop, mpz_t z, mp_exp_t expz, mp_exp_t target)
69 if (target > expz)
70 mpz_div_2exp(rop, z, target-expz);
71 else
72 mpz_mul_2exp(rop, z, expz-target);
73 return target;
76 /* use Brent's formula exp(x) = (1+r+r^2/2!+r^3/3!+...)^(2^K)*2^n
77 where x = n*log(2)+(2^K)*r
78 together with the Paterson-Stockmeyer O(t^(1/2)) algorithm for the
79 evaluation of power series. The resulting complexity is O(n^(1/3)*M(n)).
80 This function returns with the exact flags due to exp.
82 int
83 mpfr_exp_2 (mpfr_ptr y, mpfr_srcptr x, mp_rnd_t rnd_mode)
85 long n;
86 unsigned long K, k, l, err; /* FIXME: Which type ? */
87 int error_r;
88 mp_exp_t exps;
89 mp_prec_t q, precy;
90 int inexact;
91 mpfr_t r, s;
92 mpz_t ss;
93 MPFR_ZIV_DECL (loop);
94 MPFR_TMP_DECL(marker);
96 MPFR_LOG_FUNC (("x[%#R]=%R rnd=%d", x, x, rnd_mode),
97 ("y[%#R]=%R inexact=%d", y, y, inexact));
99 precy = MPFR_PREC(y);
101 /* Warning: we cannot use the 'double' type here, since on 64-bit machines
102 x may be as large as 2^62*log(2) without overflow, and then x/log(2)
103 is about 2^62: not every integer of that size can be represented as a
104 'double', thus the argument reduction would fail. */
105 if (MPFR_GET_EXP (x) <= -2)
106 /* |x| <= 0.25, thus n = round(x/log(2)) = 0 */
107 n = 0;
108 else
110 mpfr_init2 (r, sizeof (long) * CHAR_BIT);
111 mpfr_const_log2 (r, GMP_RNDZ);
112 mpfr_div (r, x, r, GMP_RNDN);
113 n = mpfr_get_si (r, GMP_RNDN);
114 mpfr_clear (r);
116 MPFR_LOG_MSG (("d(x)=%1.30e n=%ld\n", mpfr_get_d1(x), n));
118 /* error bounds the cancelled bits in x - n*log(2) */
119 if (MPFR_UNLIKELY (n == 0))
120 error_r = 0;
121 else
122 count_leading_zeros (error_r, (mp_limb_t) SAFE_ABS (unsigned long, n));
123 error_r = BITS_PER_MP_LIMB - error_r + 2;
125 /* for the O(n^(1/2)*M(n)) method, the Taylor series computation of
126 n/K terms costs about n/(2K) multiplications when computed in fixed
127 point */
128 K = (precy < MPFR_EXP_2_THRESHOLD) ? __gmpfr_isqrt ((precy + 1) / 2)
129 : __gmpfr_cuberoot (4*precy);
130 l = (precy - 1) / K + 1;
131 err = K + MPFR_INT_CEIL_LOG2 (2 * l + 18);
132 /* add K extra bits, i.e. failure probability <= 1/2^K = O(1/precy) */
133 q = precy + err + K + 5;
135 mpfr_init2 (r, q + error_r);
136 mpfr_init2 (s, q + error_r);
138 /* the algorithm consists in computing an upper bound of exp(x) using
139 a precision of q bits, and see if we can round to MPFR_PREC(y) taking
140 into account the maximal error. Otherwise we increase q. */
141 MPFR_ZIV_INIT (loop, q);
142 for (;;)
144 MPFR_LOG_MSG (("n=%ld K=%lu l=%lu q=%lu error_r=%d\n",
145 n, K, l, (unsigned long) q, error_r));
147 /* First reduce the argument to r = x - n * log(2),
148 so that r is small in absolute value. We want an upper
149 bound on r to get an upper bound on exp(x). */
151 /* if n<0, we have to get an upper bound of log(2)
152 in order to get an upper bound of r = x-n*log(2) */
153 mpfr_const_log2 (s, (n >= 0) ? GMP_RNDZ : GMP_RNDU);
154 /* s is within 1 ulp of log(2) */
156 mpfr_mul_ui (r, s, (n < 0) ? -n : n, (n >= 0) ? GMP_RNDZ : GMP_RNDU);
157 /* r is within 3 ulps of |n|*log(2) */
158 if (n < 0)
159 MPFR_CHANGE_SIGN (r);
160 /* r <= n*log(2), within 3 ulps */
162 MPFR_LOG_VAR (x);
163 MPFR_LOG_VAR (r);
165 mpfr_sub (r, x, r, GMP_RNDU);
166 /* possible cancellation here: if r is zero, increase the working
167 precision (Ziv's loop); otherwise, the error on r is at most
168 3*2^(EXP(old_r)-EXP(new_r)) ulps */
170 if (MPFR_IS_PURE_FP (r))
172 mp_exp_t cancel;
174 /* number of cancelled bits */
175 cancel = MPFR_GET_EXP (x) - MPFR_GET_EXP (r);
176 if (cancel < 0) /* this might happen in the second loop if x is
177 tiny negative: the initial n is 0, then in the
178 first loop n becomes -1 and r = x + log(2) */
179 cancel = 0;
180 while (MPFR_IS_NEG (r))
181 { /* initial approximation n was too large */
182 n--;
183 mpfr_add (r, r, s, GMP_RNDU);
185 mpfr_prec_round (r, q, GMP_RNDU);
186 MPFR_LOG_VAR (r);
187 MPFR_ASSERTD (MPFR_IS_POS (r));
188 mpfr_div_2ui (r, r, K, GMP_RNDU); /* r = (x-n*log(2))/2^K, exact */
190 MPFR_TMP_MARK(marker);
191 MY_INIT_MPZ(ss, 3 + 2*((q-1)/BITS_PER_MP_LIMB));
192 exps = mpfr_get_z_exp (ss, s);
193 /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! */
194 MPFR_ASSERTD (MPFR_IS_PURE_FP (r) && MPFR_EXP (r) < 0);
195 l = (precy < MPFR_EXP_2_THRESHOLD)
196 ? mpfr_exp2_aux (ss, r, q, &exps) /* naive method */
197 : mpfr_exp2_aux2 (ss, r, q, &exps); /* Paterson/Stockmeyer meth */
199 MPFR_LOG_MSG (("l=%lu q=%lu (K+l)*q^2=%1.3e\n",
200 l, (unsigned long) q, (K + l) * (double) q * q));
202 for (k = 0; k < K; k++)
204 mpz_mul (ss, ss, ss);
205 exps <<= 1;
206 exps += mpz_normalize (ss, ss, q);
208 mpfr_set_z (s, ss, GMP_RNDN);
210 MPFR_SET_EXP(s, MPFR_GET_EXP (s) + exps);
211 MPFR_TMP_FREE(marker); /* don't need ss anymore */
213 /* error is at most 2^K*l, plus cancel+2 to take into account of
214 the error of 3*2^(EXP(old_r)-EXP(new_r)) on r */
215 K += MPFR_INT_CEIL_LOG2 (l) + cancel + 2;
217 MPFR_LOG_MSG (("before mult. by 2^n:\n", 0));
218 MPFR_LOG_VAR (s);
219 MPFR_LOG_MSG (("err=%lu bits\n", K));
221 if (MPFR_LIKELY (MPFR_CAN_ROUND (s, q - K, precy, rnd_mode)))
223 mpfr_clear_flags ();
224 inexact = mpfr_mul_2si (y, s, n, rnd_mode);
225 break;
229 MPFR_ZIV_NEXT (loop, q);
230 mpfr_set_prec (r, q);
231 mpfr_set_prec (s, q);
233 MPFR_ZIV_FREE (loop);
235 mpfr_clear (r);
236 mpfr_clear (s);
238 return inexact;
241 /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! while MPFR_EXP(r^l/l!)+MPFR_EXPR(r)>-q
242 using naive method with O(l) multiplications.
243 Return the number of iterations l.
244 The absolute error on s is less than 3*l*(l+1)*2^(-q).
245 Version using fixed-point arithmetic with mpz instead
246 of mpfr for internal computations.
247 s must have at least qn+1 limbs (qn should be enough, but currently fails
248 since mpz_mul_2exp(s, s, q-1) reallocates qn+1 limbs)
250 static unsigned long
251 mpfr_exp2_aux (mpz_t s, mpfr_srcptr r, mp_prec_t q, mp_exp_t *exps)
253 unsigned long l;
254 mp_exp_t dif, expt, expr;
255 mp_size_t qn;
256 mpz_t t, rr;
257 mp_size_t sbit, tbit;
258 MPFR_TMP_DECL(marker);
260 MPFR_ASSERTN (MPFR_IS_PURE_FP (r));
262 MPFR_TMP_MARK(marker);
263 qn = 1 + (q-1)/BITS_PER_MP_LIMB;
264 expt = 0;
265 *exps = 1 - (mp_exp_t) q; /* s = 2^(q-1) */
266 MY_INIT_MPZ(t, 2*qn+1);
267 MY_INIT_MPZ(rr, qn+1);
268 mpz_set_ui(t, 1);
269 mpz_set_ui(s, 1);
270 mpz_mul_2exp(s, s, q-1);
271 expr = mpfr_get_z_exp(rr, r); /* no error here */
273 l = 0;
274 for (;;) {
275 l++;
276 mpz_mul(t, t, rr);
277 expt += expr;
278 MPFR_MPZ_SIZEINBASE2 (sbit, s);
279 MPFR_MPZ_SIZEINBASE2 (tbit, t);
280 dif = *exps + sbit - expt - tbit;
281 /* truncates the bits of t which are < ulp(s) = 2^(1-q) */
282 expt += mpz_normalize(t, t, (mp_exp_t) q-dif); /* error at most 2^(1-q) */
283 mpz_div_ui(t, t, l); /* error at most 2^(1-q) */
284 /* the error wrt t^l/l! is here at most 3*l*ulp(s) */
285 MPFR_ASSERTD (expt == *exps);
286 if (mpz_sgn (t) == 0)
287 break;
288 mpz_add(s, s, t); /* no error here: exact */
289 /* ensures rr has the same size as t: after several shifts, the error
290 on rr is still at most ulp(t)=ulp(s) */
291 MPFR_MPZ_SIZEINBASE2 (tbit, t);
292 expr += mpz_normalize(rr, rr, tbit);
295 MPFR_TMP_FREE(marker);
296 return 3*l*(l+1);
299 /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! while MPFR_EXP(r^l/l!)+MPFR_EXPR(r)>-q
300 using Paterson-Stockmeyer algorithm with O(sqrt(l)) multiplications.
301 Return l.
302 Uses m multiplications of full size and 2l/m of decreasing size,
303 i.e. a total equivalent to about m+l/m full multiplications,
304 i.e. 2*sqrt(l) for m=sqrt(l).
305 Version using mpz. ss must have at least (sizer+1) limbs.
306 The error is bounded by (l^2+4*l) ulps where l is the return value.
308 static unsigned long
309 mpfr_exp2_aux2 (mpz_t s, mpfr_srcptr r, mp_prec_t q, mp_exp_t *exps)
311 mp_exp_t expr, *expR, expt;
312 mp_size_t sizer;
313 mp_prec_t ql;
314 unsigned long l, m, i;
315 mpz_t t, *R, rr, tmp;
316 mp_size_t sbit, rrbit;
317 MPFR_TMP_DECL(marker);
319 /* estimate value of l */
320 MPFR_ASSERTD (MPFR_GET_EXP (r) < 0);
321 l = q / (- MPFR_GET_EXP (r));
322 m = __gmpfr_isqrt (l);
323 /* we access R[2], thus we need m >= 2 */
324 if (m < 2)
325 m = 2;
327 MPFR_TMP_MARK(marker);
328 R = (mpz_t*) MPFR_TMP_ALLOC((m+1)*sizeof(mpz_t)); /* R[i] is r^i */
329 expR = (mp_exp_t*) MPFR_TMP_ALLOC((m+1)*sizeof(mp_exp_t)); /* exponent for R[i] */
330 sizer = 1 + (MPFR_PREC(r)-1)/BITS_PER_MP_LIMB;
331 mpz_init(tmp);
332 MY_INIT_MPZ(rr, sizer+2);
333 MY_INIT_MPZ(t, 2*sizer); /* double size for products */
334 mpz_set_ui(s, 0);
335 *exps = 1-q; /* 1 ulp = 2^(1-q) */
336 for (i = 0 ; i <= m ; i++)
337 MY_INIT_MPZ(R[i], sizer+2);
338 expR[1] = mpfr_get_z_exp(R[1], r); /* exact operation: no error */
339 expR[1] = mpz_normalize2(R[1], R[1], expR[1], 1-q); /* error <= 1 ulp */
340 mpz_mul(t, R[1], R[1]); /* err(t) <= 2 ulps */
341 mpz_div_2exp(R[2], t, q-1); /* err(R[2]) <= 3 ulps */
342 expR[2] = 1-q;
343 for (i = 3 ; i <= m ; i++)
345 mpz_mul(t, R[i-1], R[1]); /* err(t) <= 2*i-2 */
346 mpz_div_2exp(R[i], t, q-1); /* err(R[i]) <= 2*i-1 ulps */
347 expR[i] = 1-q;
349 mpz_set_ui (R[0], 1);
350 mpz_mul_2exp (R[0], R[0], q-1);
351 expR[0] = 1-q; /* R[0]=1 */
352 mpz_set_ui (rr, 1);
353 expr = 0; /* rr contains r^l/l! */
354 /* by induction: err(rr) <= 2*l ulps */
356 l = 0;
357 ql = q; /* precision used for current giant step */
360 /* all R[i] must have exponent 1-ql */
361 if (l != 0)
362 for (i = 0 ; i < m ; i++)
363 expR[i] = mpz_normalize2 (R[i], R[i], expR[i], 1-ql);
364 /* the absolute error on R[i]*rr is still 2*i-1 ulps */
365 expt = mpz_normalize2 (t, R[m-1], expR[m-1], 1-ql);
366 /* err(t) <= 2*m-1 ulps */
367 /* computes t = 1 + r/(l+1) + ... + r^(m-1)*l!/(l+m-1)!
368 using Horner's scheme */
369 for (i = m-1 ; i-- != 0 ; )
371 mpz_div_ui (t, t, l+i+1); /* err(t) += 1 ulp */
372 mpz_add (t, t, R[i]);
374 /* now err(t) <= (3m-2) ulps */
376 /* now multiplies t by r^l/l! and adds to s */
377 mpz_mul (t, t, rr);
378 expt += expr;
379 expt = mpz_normalize2 (t, t, expt, *exps);
380 /* err(t) <= (3m-1) + err_rr(l) <= (3m-2) + 2*l */
381 MPFR_ASSERTD (expt == *exps);
382 mpz_add (s, s, t); /* no error here */
384 /* updates rr, the multiplication of the factors l+i could be done
385 using binary splitting too, but it is not sure it would save much */
386 mpz_mul (t, rr, R[m]); /* err(t) <= err(rr) + 2m-1 */
387 expr += expR[m];
388 mpz_set_ui (tmp, 1);
389 for (i = 1 ; i <= m ; i++)
390 mpz_mul_ui (tmp, tmp, l + i);
391 mpz_fdiv_q (t, t, tmp); /* err(t) <= err(rr) + 2m */
392 l += m;
393 if (MPFR_UNLIKELY (mpz_sgn (t) == 0))
394 break;
395 expr += mpz_normalize (rr, t, ql); /* err_rr(l+1) <= err_rr(l) + 2m+1 */
396 if (MPFR_UNLIKELY (mpz_sgn (rr) == 0))
397 rrbit = 1;
398 else
399 MPFR_MPZ_SIZEINBASE2 (rrbit, rr);
400 MPFR_MPZ_SIZEINBASE2 (sbit, s);
401 ql = q - *exps - sbit + expr + rrbit;
402 /* TODO: Wrong cast. I don't want what is right, but this is
403 certainly wrong */
405 while ((size_t) expr+rrbit > (size_t) (int) -q);
407 MPFR_TMP_FREE(marker);
408 mpz_clear(tmp);
409 return l*(l+4);