contrib/mpfr/exp_2.c

   1 /* mpfr_exp_2 -- exponential of a floating-point number
   2                  using algorithms in O(n^(1/2)*M(n)) and O(n^(1/3)*M(n))
   3
   4 Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
   5 Contributed by the Arenaire and Cacao projects, INRIA.
   6
   7 This file is part of the GNU MPFR Library.
   8
   9 The GNU MPFR Library is free software; you can redistribute it and/or modify
  10 it under the terms of the GNU Lesser General Public License as published by
  11 the Free Software Foundation; either version 2.1 of the License, or (at your
  12 option) any later version.
  13
  14 The GNU MPFR Library is distributed in the hope that it will be useful, but
  15 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  16 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  17 License for more details.
  18
  19 You should have received a copy of the GNU Lesser General Public License
  20 along with the GNU MPFR Library; see the file COPYING.LIB.  If not, write to
  21 the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
  22 MA 02110-1301, USA. */
  23
  24 /* #define DEBUG */
  25 #define MPFR_NEED_LONGLONG_H /* for count_leading_zeros */
  26 #include "mpfr-impl.h"
  27
  28 static unsigned long
  29 mpfr_exp2_aux (mpz_t, mpfr_srcptr, mp_prec_t, mp_exp_t *);
  30 static unsigned long
  31 mpfr_exp2_aux2 (mpz_t, mpfr_srcptr, mp_prec_t, mp_exp_t *);
  32 static mp_exp_t
  33 mpz_normalize  (mpz_t, mpz_t, mp_exp_t);
  34 static mp_exp_t
  35 mpz_normalize2 (mpz_t, mpz_t, mp_exp_t, mp_exp_t);
  36
  37 #define MY_INIT_MPZ(x, s) { \
  38    (x)->_mp_alloc = (s); \
  39    PTR(x) = (mp_ptr) MPFR_TMP_ALLOC((s)*BYTES_PER_MP_LIMB); \
  40    (x)->_mp_size = 0; }
  41
  42 /* if k = the number of bits of z > q, divides z by 2^(k-q) and returns k-q.
  43    Otherwise do nothing and return 0.
  44  */
  45 static mp_exp_t
  46 mpz_normalize (mpz_t rop, mpz_t z, mp_exp_t q)
  47 {
  48   size_t k;
  49
  50   MPFR_MPZ_SIZEINBASE2 (k, z);
  51   MPFR_ASSERTD (k == (mpfr_uexp_t) k);
  52   if (q < 0 || (mpfr_uexp_t) k > (mpfr_uexp_t) q)
  53     {
  54       mpz_div_2exp(rop, z, (unsigned long) ((mpfr_uexp_t) k - q));
  55       return (mp_exp_t) k - q;
  56     }
  57   if (MPFR_UNLIKELY(rop != z))
  58     mpz_set(rop, z);
  59   return 0;
  60 }
  61
  62 /* if expz > target, shift z by (expz-target) bits to the left.
  63    if expz < target, shift z by (target-expz) bits to the right.
  64    Returns target.
  65 */
  66 static mp_exp_t
  67 mpz_normalize2 (mpz_t rop, mpz_t z, mp_exp_t expz, mp_exp_t target)
  68 {
  69   if (target > expz)
  70     mpz_div_2exp(rop, z, target-expz);
  71   else
  72     mpz_mul_2exp(rop, z, expz-target);
  73   return target;
  74 }
  75
  76 /* use Brent's formula exp(x) = (1+r+r^2/2!+r^3/3!+...)^(2^K)*2^n
  77    where x = n*log(2)+(2^K)*r
  78    together with the Paterson-Stockmeyer O(t^(1/2)) algorithm for the
  79    evaluation of power series. The resulting complexity is O(n^(1/3)*M(n)).
  80    This function returns with the exact flags due to exp.
  81 */
  82 int
  83 mpfr_exp_2 (mpfr_ptr y, mpfr_srcptr x, mp_rnd_t rnd_mode)
  84 {
  85   long n;
  86   unsigned long K, k, l, err; /* FIXME: Which type ? */
  87   int error_r;
  88   mp_exp_t exps;
  89   mp_prec_t q, precy;
  90   int inexact;
  91   mpfr_t r, s;
  92   mpz_t ss;
  93   MPFR_ZIV_DECL (loop);
  94   MPFR_TMP_DECL(marker);
  95
  96   MPFR_LOG_FUNC (("x[%#R]=%R rnd=%d", x, x, rnd_mode),
  97                  ("y[%#R]=%R inexact=%d", y, y, inexact));
  98
  99   precy = MPFR_PREC(y);
 100
 101   /* Warning: we cannot use the 'double' type here, since on 64-bit machines
 102      x may be as large as 2^62*log(2) without overflow, and then x/log(2)
 103      is about 2^62: not every integer of that size can be represented as a
 104      'double', thus the argument reduction would fail. */
 105   if (MPFR_GET_EXP (x) <= -2)
 106     /* |x| <= 0.25, thus n = round(x/log(2)) = 0 */
 107     n = 0;
 108   else
 109     {
 110       mpfr_init2 (r, sizeof (long) * CHAR_BIT);
 111       mpfr_const_log2 (r, GMP_RNDZ);
 112       mpfr_div (r, x, r, GMP_RNDN);
 113       n = mpfr_get_si (r, GMP_RNDN);
 114       mpfr_clear (r);
 115     }
 116   MPFR_LOG_MSG (("d(x)=%1.30e n=%ld\n", mpfr_get_d1(x), n));
 117
 118   /* error bounds the cancelled bits in x - n*log(2) */
 119   if (MPFR_UNLIKELY (n == 0))
 120     error_r = 0;
 121   else
 122     count_leading_zeros (error_r, (mp_limb_t) SAFE_ABS (unsigned long, n));
 123   error_r = BITS_PER_MP_LIMB - error_r + 2;
 124
 125   /* for the O(n^(1/2)*M(n)) method, the Taylor series computation of
 126      n/K terms costs about n/(2K) multiplications when computed in fixed
 127      point */
 128   K = (precy < MPFR_EXP_2_THRESHOLD) ? __gmpfr_isqrt ((precy + 1) / 2)
 129     : __gmpfr_cuberoot (4*precy);
 130   l = (precy - 1) / K + 1;
 131   err = K + MPFR_INT_CEIL_LOG2 (2 * l + 18);
 132   /* add K extra bits, i.e. failure probability <= 1/2^K = O(1/precy) */
 133   q = precy + err + K + 5;
 134
 135   mpfr_init2 (r, q + error_r);
 136   mpfr_init2 (s, q + error_r);
 137
 138   /* the algorithm consists in computing an upper bound of exp(x) using
 139      a precision of q bits, and see if we can round to MPFR_PREC(y) taking
 140      into account the maximal error. Otherwise we increase q. */
 141   MPFR_ZIV_INIT (loop, q);
 142   for (;;)
 143     {
 144       MPFR_LOG_MSG (("n=%ld K=%lu l=%lu q=%lu error_r=%d\n",
 145                      n, K, l, (unsigned long) q, error_r));
 146
 147       /* First reduce the argument to r = x - n * log(2),
 148          so that r is small in absolute value. We want an upper
 149          bound on r to get an upper bound on exp(x). */
 150
 151       /* if n<0, we have to get an upper bound of log(2)
 152          in order to get an upper bound of r = x-n*log(2) */
 153       mpfr_const_log2 (s, (n >= 0) ? GMP_RNDZ : GMP_RNDU);
 154       /* s is within 1 ulp of log(2) */
 155
 156       mpfr_mul_ui (r, s, (n < 0) ? -n : n, (n >= 0) ? GMP_RNDZ : GMP_RNDU);
 157       /* r is within 3 ulps of |n|*log(2) */
 158       if (n < 0)
 159         MPFR_CHANGE_SIGN (r);
 160       /* r <= n*log(2), within 3 ulps */
 161
 162       MPFR_LOG_VAR (x);
 163       MPFR_LOG_VAR (r);
 164
 165       mpfr_sub (r, x, r, GMP_RNDU);
 166       /* possible cancellation here: if r is zero, increase the working
 167          precision (Ziv's loop); otherwise, the error on r is at most
 168          3*2^(EXP(old_r)-EXP(new_r)) ulps */
 169
 170       if (MPFR_IS_PURE_FP (r))
 171         {
 172           mp_exp_t cancel;
 173
 174           /* number of cancelled bits */
 175           cancel = MPFR_GET_EXP (x) - MPFR_GET_EXP (r);
 176           if (cancel < 0) /* this might happen in the second loop if x is
 177                              tiny negative: the initial n is 0, then in the
 178                              first loop n becomes -1 and r = x + log(2) */
 179             cancel = 0;
 180           while (MPFR_IS_NEG (r))
 181             { /* initial approximation n was too large */
 182               n--;
 183               mpfr_add (r, r, s, GMP_RNDU);
 184             }
 185           mpfr_prec_round (r, q, GMP_RNDU);
 186           MPFR_LOG_VAR (r);
 187           MPFR_ASSERTD (MPFR_IS_POS (r));
 188           mpfr_div_2ui (r, r, K, GMP_RNDU); /* r = (x-n*log(2))/2^K, exact */
 189
 190           MPFR_TMP_MARK(marker);
 191           MY_INIT_MPZ(ss, 3 + 2*((q-1)/BITS_PER_MP_LIMB));
 192           exps = mpfr_get_z_exp (ss, s);
 193           /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! */
 194           MPFR_ASSERTD (MPFR_IS_PURE_FP (r) && MPFR_EXP (r) < 0);
 195           l = (precy < MPFR_EXP_2_THRESHOLD)
 196             ? mpfr_exp2_aux (ss, r, q, &exps)   /* naive method */
 197             : mpfr_exp2_aux2 (ss, r, q, &exps); /* Paterson/Stockmeyer meth */
 198
 199           MPFR_LOG_MSG (("l=%lu q=%lu (K+l)*q^2=%1.3e\n",
 200                          l, (unsigned long) q, (K + l) * (double) q * q));
 201
 202           for (k = 0; k < K; k++)
 203             {
 204               mpz_mul (ss, ss, ss);
 205               exps <<= 1;
 206               exps += mpz_normalize (ss, ss, q);
 207             }
 208           mpfr_set_z (s, ss, GMP_RNDN);
 209
 210           MPFR_SET_EXP(s, MPFR_GET_EXP (s) + exps);
 211           MPFR_TMP_FREE(marker); /* don't need ss anymore */
 212
 213           /* error is at most 2^K*l, plus cancel+2 to take into account of
 214              the error of 3*2^(EXP(old_r)-EXP(new_r)) on r */
 215           K += MPFR_INT_CEIL_LOG2 (l) + cancel + 2;
 216
 217           MPFR_LOG_MSG (("before mult. by 2^n:\n", 0));
 218           MPFR_LOG_VAR (s);
 219           MPFR_LOG_MSG (("err=%lu bits\n", K));
 220
 221           if (MPFR_LIKELY (MPFR_CAN_ROUND (s, q - K, precy, rnd_mode)))
 222             {
 223               mpfr_clear_flags ();
 224               inexact = mpfr_mul_2si (y, s, n, rnd_mode);
 225               break;
 226             }
 227         }
 228
 229       MPFR_ZIV_NEXT (loop, q);
 230       mpfr_set_prec (r, q);
 231       mpfr_set_prec (s, q);
 232     }
 233   MPFR_ZIV_FREE (loop);
 234
 235   mpfr_clear (r);
 236   mpfr_clear (s);
 237
 238   return inexact;
 239 }
 240
 241 /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! while MPFR_EXP(r^l/l!)+MPFR_EXPR(r)>-q
 242    using naive method with O(l) multiplications.
 243    Return the number of iterations l.
 244    The absolute error on s is less than 3*l*(l+1)*2^(-q).
 245    Version using fixed-point arithmetic with mpz instead
 246    of mpfr for internal computations.
 247    s must have at least qn+1 limbs (qn should be enough, but currently fails
 248    since mpz_mul_2exp(s, s, q-1) reallocates qn+1 limbs)
 249 */
 250 static unsigned long
 251 mpfr_exp2_aux (mpz_t s, mpfr_srcptr r, mp_prec_t q, mp_exp_t *exps)
 252 {
 253   unsigned long l;
 254   mp_exp_t dif, expt, expr;
 255   mp_size_t qn;
 256   mpz_t t, rr;
 257   mp_size_t sbit, tbit;
 258   MPFR_TMP_DECL(marker);
 259
 260   MPFR_ASSERTN (MPFR_IS_PURE_FP (r));
 261
 262   MPFR_TMP_MARK(marker);
 263   qn = 1 + (q-1)/BITS_PER_MP_LIMB;
 264   expt = 0;
 265   *exps = 1 - (mp_exp_t) q;                   /* s = 2^(q-1) */
 266   MY_INIT_MPZ(t, 2*qn+1);
 267   MY_INIT_MPZ(rr, qn+1);
 268   mpz_set_ui(t, 1);
 269   mpz_set_ui(s, 1);
 270   mpz_mul_2exp(s, s, q-1);
 271   expr = mpfr_get_z_exp(rr, r);               /* no error here */
 272
 273   l = 0;
 274   for (;;) {
 275     l++;
 276     mpz_mul(t, t, rr);
 277     expt += expr;
 278     MPFR_MPZ_SIZEINBASE2 (sbit, s);
 279     MPFR_MPZ_SIZEINBASE2 (tbit, t);
 280     dif = *exps + sbit - expt - tbit;
 281     /* truncates the bits of t which are < ulp(s) = 2^(1-q) */
 282     expt += mpz_normalize(t, t, (mp_exp_t) q-dif); /* error at most 2^(1-q) */
 283     mpz_div_ui(t, t, l);                   /* error at most 2^(1-q) */
 284     /* the error wrt t^l/l! is here at most 3*l*ulp(s) */
 285     MPFR_ASSERTD (expt == *exps);
 286     if (mpz_sgn (t) == 0)
 287       break;
 288     mpz_add(s, s, t);                      /* no error here: exact */
 289     /* ensures rr has the same size as t: after several shifts, the error
 290        on rr is still at most ulp(t)=ulp(s) */
 291     MPFR_MPZ_SIZEINBASE2 (tbit, t);
 292     expr += mpz_normalize(rr, rr, tbit);
 293   }
 294
 295   MPFR_TMP_FREE(marker);
 296   return 3*l*(l+1);
 297 }
 298
 299 /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! while MPFR_EXP(r^l/l!)+MPFR_EXPR(r)>-q
 300    using Paterson-Stockmeyer algorithm with O(sqrt(l)) multiplications.
 301    Return l.
 302    Uses m multiplications of full size and 2l/m of decreasing size,
 303    i.e. a total equivalent to about m+l/m full multiplications,
 304    i.e. 2*sqrt(l) for m=sqrt(l).
 305    Version using mpz. ss must have at least (sizer+1) limbs.
 306    The error is bounded by (l^2+4*l) ulps where l is the return value.
 307 */
 308 static unsigned long
 309 mpfr_exp2_aux2 (mpz_t s, mpfr_srcptr r, mp_prec_t q, mp_exp_t *exps)
 310 {
 311   mp_exp_t expr, *expR, expt;
 312   mp_size_t sizer;
 313   mp_prec_t ql;
 314   unsigned long l, m, i;
 315   mpz_t t, *R, rr, tmp;
 316   mp_size_t sbit, rrbit;
 317   MPFR_TMP_DECL(marker);
 318
 319   /* estimate value of l */
 320   MPFR_ASSERTD (MPFR_GET_EXP (r) < 0);
 321   l = q / (- MPFR_GET_EXP (r));
 322   m = __gmpfr_isqrt (l);
 323   /* we access R[2], thus we need m >= 2 */
 324   if (m < 2)
 325     m = 2;
 326
 327   MPFR_TMP_MARK(marker);
 328   R = (mpz_t*) MPFR_TMP_ALLOC((m+1)*sizeof(mpz_t));          /* R[i] is r^i */
 329   expR = (mp_exp_t*) MPFR_TMP_ALLOC((m+1)*sizeof(mp_exp_t)); /* exponent for R[i] */
 330   sizer = 1 + (MPFR_PREC(r)-1)/BITS_PER_MP_LIMB;
 331   mpz_init(tmp);
 332   MY_INIT_MPZ(rr, sizer+2);
 333   MY_INIT_MPZ(t, 2*sizer);            /* double size for products */
 334   mpz_set_ui(s, 0);
 335   *exps = 1-q;                        /* 1 ulp = 2^(1-q) */
 336   for (i = 0 ; i <= m ; i++)
 337     MY_INIT_MPZ(R[i], sizer+2);
 338   expR[1] = mpfr_get_z_exp(R[1], r); /* exact operation: no error */
 339   expR[1] = mpz_normalize2(R[1], R[1], expR[1], 1-q); /* error <= 1 ulp */
 340   mpz_mul(t, R[1], R[1]); /* err(t) <= 2 ulps */
 341   mpz_div_2exp(R[2], t, q-1); /* err(R[2]) <= 3 ulps */
 342   expR[2] = 1-q;
 343   for (i = 3 ; i <= m ; i++)
 344     {
 345       mpz_mul(t, R[i-1], R[1]); /* err(t) <= 2*i-2 */
 346       mpz_div_2exp(R[i], t, q-1); /* err(R[i]) <= 2*i-1 ulps */
 347       expR[i] = 1-q;
 348     }
 349   mpz_set_ui (R[0], 1);
 350   mpz_mul_2exp (R[0], R[0], q-1);
 351   expR[0] = 1-q; /* R[0]=1 */
 352   mpz_set_ui (rr, 1);
 353   expr = 0; /* rr contains r^l/l! */
 354   /* by induction: err(rr) <= 2*l ulps */
 355
 356   l = 0;
 357   ql = q; /* precision used for current giant step */
 358   do
 359     {
 360       /* all R[i] must have exponent 1-ql */
 361       if (l != 0)
 362         for (i = 0 ; i < m ; i++)
 363           expR[i] = mpz_normalize2 (R[i], R[i], expR[i], 1-ql);
 364       /* the absolute error on R[i]*rr is still 2*i-1 ulps */
 365       expt = mpz_normalize2 (t, R[m-1], expR[m-1], 1-ql);
 366       /* err(t) <= 2*m-1 ulps */
 367       /* computes t = 1 + r/(l+1) + ... + r^(m-1)*l!/(l+m-1)!
 368          using Horner's scheme */
 369       for (i = m-1 ; i-- != 0 ; )
 370         {
 371           mpz_div_ui (t, t, l+i+1); /* err(t) += 1 ulp */
 372           mpz_add (t, t, R[i]);
 373         }
 374       /* now err(t) <= (3m-2) ulps */
 375
 376       /* now multiplies t by r^l/l! and adds to s */
 377       mpz_mul (t, t, rr);
 378       expt += expr;
 379       expt = mpz_normalize2 (t, t, expt, *exps);
 380       /* err(t) <= (3m-1) + err_rr(l) <= (3m-2) + 2*l */
 381       MPFR_ASSERTD (expt == *exps);
 382       mpz_add (s, s, t); /* no error here */
 383
 384       /* updates rr, the multiplication of the factors l+i could be done
 385          using binary splitting too, but it is not sure it would save much */
 386       mpz_mul (t, rr, R[m]); /* err(t) <= err(rr) + 2m-1 */
 387       expr += expR[m];
 388       mpz_set_ui (tmp, 1);
 389       for (i = 1 ; i <= m ; i++)
 390         mpz_mul_ui (tmp, tmp, l + i);
 391       mpz_fdiv_q (t, t, tmp); /* err(t) <= err(rr) + 2m */
 392       l += m;
 393       if (MPFR_UNLIKELY (mpz_sgn (t) == 0))
 394         break;
 395       expr += mpz_normalize (rr, t, ql); /* err_rr(l+1) <= err_rr(l) + 2m+1 */
 396       if (MPFR_UNLIKELY (mpz_sgn (rr) == 0))
 397         rrbit = 1;
 398       else
 399         MPFR_MPZ_SIZEINBASE2 (rrbit, rr);
 400       MPFR_MPZ_SIZEINBASE2 (sbit, s);
 401       ql = q - *exps - sbit + expr + rrbit;
 402       /* TODO: Wrong cast. I don't want what is right, but this is
 403          certainly wrong */
 404     }
 405   while ((size_t) expr+rrbit > (size_t) (int) -q);
 406
 407   MPFR_TMP_FREE(marker);
 408   mpz_clear(tmp);
 409   return l*(l+4);
 410 }