contrib/mpfr/src/lngamma.c

   1 /* mpfr_lngamma -- lngamma function
   2
   3 Copyright 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
   4 Contributed by the AriC and Caramel projects, INRIA.
   5
   6 This file is part of the GNU MPFR Library.
   7
   8 The GNU MPFR Library is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU Lesser General Public License as published by
  10 the Free Software Foundation; either version 3 of the License, or (at your
  11 option) any later version.
  12
  13 The GNU MPFR Library is distributed in the hope that it will be useful, but
  14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  16 License for more details.
  17
  18 You should have received a copy of the GNU Lesser General Public License
  19 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
  20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
  21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
  22
  23 #define MPFR_NEED_LONGLONG_H
  24 #include "mpfr-impl.h"
  25
  26 /* given a precision p, return alpha, such that the argument reduction
  27    will use k = alpha*p*log(2).
  28
  29    Warning: we should always have alpha >= log(2)/(2Pi) ~ 0.11,
  30    and the smallest value of alpha multiplied by the smallest working
  31    precision should be >= 4.
  32 */
  33 static void
  34 mpfr_gamma_alpha (mpfr_t s, mpfr_prec_t p)
  35 {
  36   if (p <= 100)
  37     mpfr_set_ui_2exp (s, 614, -10, MPFR_RNDN); /* about 0.6 */
  38   else if (p <= 500)
  39     mpfr_set_ui_2exp (s, 819, -10, MPFR_RNDN); /* about 0.8 */
  40   else if (p <= 1000)
  41     mpfr_set_ui_2exp (s, 1331, -10, MPFR_RNDN); /* about 1.3 */
  42   else if (p <= 2000)
  43     mpfr_set_ui_2exp (s, 1741, -10, MPFR_RNDN); /* about 1.7 */
  44   else if (p <= 5000)
  45     mpfr_set_ui_2exp (s, 2253, -10, MPFR_RNDN); /* about 2.2 */
  46   else if (p <= 10000)
  47     mpfr_set_ui_2exp (s, 3482, -10, MPFR_RNDN); /* about 3.4 */
  48   else
  49     mpfr_set_ui_2exp (s, 9, -1, MPFR_RNDN); /* 4.5 */
  50 }
  51
  52 #ifdef IS_GAMMA
  53
  54 /* This function is called in case of intermediate overflow/underflow.
  55    The s1 and s2 arguments are temporary MPFR numbers, having the
  56    working precision. If the result could be determined, then the
  57    flags are updated via pexpo, y is set to the result, and the
  58    (non-zero) ternary value is returned. Otherwise 0 is returned
  59    in order to perform the next Ziv iteration. */
  60 static int
  61 mpfr_explgamma (mpfr_ptr y, mpfr_srcptr x, mpfr_save_expo_t *pexpo,
  62                 mpfr_ptr s1, mpfr_ptr s2, mpfr_rnd_t rnd)
  63 {
  64   mpfr_t t1, t2;
  65   int inex1, inex2, sign;
  66   MPFR_BLOCK_DECL (flags1);
  67   MPFR_BLOCK_DECL (flags2);
  68   MPFR_GROUP_DECL (group);
  69
  70   MPFR_BLOCK (flags1, inex1 = mpfr_lgamma (s1, &sign, x, MPFR_RNDD));
  71   MPFR_ASSERTN (inex1 != 0);
  72   /* s1 = RNDD(lngamma(x)), inexact */
  73   if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags1)))
  74     {
  75       if (MPFR_SIGN (s1) > 0)
  76         {
  77           MPFR_SAVE_EXPO_UPDATE_FLAGS (*pexpo, MPFR_FLAGS_OVERFLOW);
  78           return mpfr_overflow (y, rnd, sign);
  79         }
  80       else
  81         {
  82           MPFR_SAVE_EXPO_UPDATE_FLAGS (*pexpo, MPFR_FLAGS_UNDERFLOW);
  83           return mpfr_underflow (y, rnd == MPFR_RNDN ? MPFR_RNDZ : rnd, sign);
  84         }
  85     }
  86
  87   mpfr_set (s2, s1, MPFR_RNDN);     /* exact */
  88   mpfr_nextabove (s2);              /* v = RNDU(lngamma(z0)) */
  89
  90   if (sign < 0)
  91     rnd = MPFR_INVERT_RND (rnd);  /* since the result with be negated */
  92   MPFR_GROUP_INIT_2 (group, MPFR_PREC (y), t1, t2);
  93   MPFR_BLOCK (flags1, inex1 = mpfr_exp (t1, s1, rnd));
  94   MPFR_BLOCK (flags2, inex2 = mpfr_exp (t2, s2, rnd));
  95   /* t1 is the rounding with mode 'rnd' of a lower bound on |Gamma(x)|,
  96      t2 is the rounding with mode 'rnd' of an upper bound, thus if both
  97      are equal, so is the wanted result. If t1 and t2 differ or the flags
  98      differ, at some point of Ziv's loop they should agree. */
  99   if (mpfr_equal_p (t1, t2) && flags1 == flags2)
 100     {
 101       MPFR_ASSERTN ((inex1 > 0 && inex2 > 0) || (inex1 < 0 && inex2 < 0));
 102       mpfr_set4 (y, t1, MPFR_RNDN, sign);  /* exact */
 103       if (sign < 0)
 104         inex1 = - inex1;
 105       MPFR_SAVE_EXPO_UPDATE_FLAGS (*pexpo, flags1);
 106     }
 107   else
 108     inex1 = 0;  /* couldn't determine the result */
 109   MPFR_GROUP_CLEAR (group);
 110
 111   return inex1;
 112 }
 113
 114 #else
 115
 116 static int
 117 unit_bit (mpfr_srcptr x)
 118 {
 119   mpfr_exp_t expo;
 120   mpfr_prec_t prec;
 121   mp_limb_t x0;
 122
 123   expo = MPFR_GET_EXP (x);
 124   if (expo <= 0)
 125     return 0;  /* |x| < 1 */
 126
 127   prec = MPFR_PREC (x);
 128   if (expo > prec)
 129     return 0;  /* y is a multiple of 2^(expo-prec), thus an even integer */
 130
 131   /* Now, the unit bit is represented. */
 132
 133   prec = MPFR_PREC2LIMBS (prec) * GMP_NUMB_BITS - expo;
 134   /* number of represented fractional bits (including the trailing 0's) */
 135
 136   x0 = *(MPFR_MANT (x) + prec / GMP_NUMB_BITS);
 137   /* limb containing the unit bit */
 138
 139   return (x0 >> (prec % GMP_NUMB_BITS)) & 1;
 140 }
 141
 142 #endif
 143
 144 /* lngamma(x) = log(gamma(x)).
 145    We use formula [6.1.40] from Abramowitz&Stegun:
 146    lngamma(z) = (z-1/2)*log(z) - z + 1/2*log(2*Pi)
 147               + sum (Bernoulli[2m]/(2m)/(2m-1)/z^(2m-1),m=1..infinity)
 148    According to [6.1.42], if the sum is truncated after m=n, the error
 149    R_n(z) is bounded by |B[2n+2]|*K(z)/(2n+1)/(2n+2)/|z|^(2n+1)
 150    where K(z) = max (z^2/(u^2+z^2)) for u >= 0.
 151    For z real, |K(z)| <= 1 thus R_n(z) is bounded by the first neglected term.
 152  */
 153 #ifdef IS_GAMMA
 154 #define GAMMA_FUNC mpfr_gamma_aux
 155 #else
 156 #define GAMMA_FUNC mpfr_lngamma_aux
 157 #endif
 158
 159 static int
 160 GAMMA_FUNC (mpfr_ptr y, mpfr_srcptr z0, mpfr_rnd_t rnd)
 161 {
 162   mpfr_prec_t precy, w; /* working precision */
 163   mpfr_t s, t, u, v, z;
 164   unsigned long m, k, maxm;
 165   mpz_t *INITIALIZED(B);  /* variable B declared as initialized */
 166   int compared;
 167   int inexact = 0;  /* 0 means: result y not set yet */
 168   mpfr_exp_t err_s, err_t;
 169   unsigned long Bm = 0; /* number of allocated B[] */
 170   unsigned long oldBm;
 171   double d;
 172   MPFR_SAVE_EXPO_DECL (expo);
 173   MPFR_ZIV_DECL (loop);
 174
 175   compared = mpfr_cmp_ui (z0, 1);
 176
 177   MPFR_SAVE_EXPO_MARK (expo);
 178
 179 #ifndef IS_GAMMA /* lngamma or lgamma */
 180   if (compared == 0 || (compared > 0 && mpfr_cmp_ui (z0, 2) == 0))
 181     {
 182       MPFR_SAVE_EXPO_FREE (expo);
 183       return mpfr_set_ui (y, 0, MPFR_RNDN);  /* lngamma(1 or 2) = +0 */
 184     }
 185
 186   /* Deal here with tiny inputs. We have for -0.3 <= x <= 0.3:
 187      - log|x| - gamma*x <= log|gamma(x)| <= - log|x| - gamma*x + x^2 */
 188   if (MPFR_EXP(z0) <= - (mpfr_exp_t) MPFR_PREC(y))
 189     {
 190       mpfr_t l, h, g;
 191       int ok, inex1, inex2;
 192       mpfr_prec_t prec = MPFR_PREC(y) + 14;
 193       MPFR_ZIV_DECL (loop);
 194
 195       MPFR_ZIV_INIT (loop, prec);
 196       do
 197         {
 198           mpfr_init2 (l, prec);
 199           if (MPFR_IS_POS(z0))
 200             {
 201               mpfr_log (l, z0, MPFR_RNDU); /* upper bound for log(z0) */
 202               mpfr_init2 (h, MPFR_PREC(l));
 203             }
 204           else
 205             {
 206               mpfr_init2 (h, MPFR_PREC(z0));
 207               mpfr_neg (h, z0, MPFR_RNDN); /* exact */
 208               mpfr_log (l, h, MPFR_RNDU); /* upper bound for log(-z0) */
 209               mpfr_set_prec (h, MPFR_PREC(l));
 210             }
 211           mpfr_neg (l, l, MPFR_RNDD); /* lower bound for -log(|z0|) */
 212           mpfr_set (h, l, MPFR_RNDD); /* exact */
 213           mpfr_nextabove (h); /* upper bound for -log(|z0|), avoids two calls
 214                                  to mpfr_log */
 215           mpfr_init2 (g, MPFR_PREC(l));
 216           /* if z0>0, we need an upper approximation of Euler's constant
 217              for the left bound */
 218           mpfr_const_euler (g, MPFR_IS_POS(z0) ? MPFR_RNDU : MPFR_RNDD);
 219           mpfr_mul (g, g, z0, MPFR_RNDD);
 220           mpfr_sub (l, l, g, MPFR_RNDD);
 221           mpfr_const_euler (g, MPFR_IS_POS(z0) ? MPFR_RNDD : MPFR_RNDU); /* cached */
 222           mpfr_mul (g, g, z0, MPFR_RNDU);
 223           mpfr_sub (h, h, g, MPFR_RNDD);
 224           mpfr_mul (g, z0, z0, MPFR_RNDU);
 225           mpfr_add (h, h, g, MPFR_RNDU);
 226           inex1 = mpfr_prec_round (l, MPFR_PREC(y), rnd);
 227           inex2 = mpfr_prec_round (h, MPFR_PREC(y), rnd);
 228           /* Caution: we not only need l = h, but both inexact flags should
 229              agree. Indeed, one of the inexact flags might be zero. In that
 230              case if we assume lngamma(z0) cannot be exact, the other flag
 231              should be correct. We are conservative here and request that both
 232              inexact flags agree. */
 233           ok = SAME_SIGN (inex1, inex2) && mpfr_cmp (l, h) == 0;
 234           if (ok)
 235             mpfr_set (y, h, rnd); /* exact */
 236           mpfr_clear (l);
 237           mpfr_clear (h);
 238           mpfr_clear (g);
 239           if (ok)
 240             {
 241               MPFR_ZIV_FREE (loop);
 242               MPFR_SAVE_EXPO_FREE (expo);
 243               return mpfr_check_range (y, inex1, rnd);
 244             }
 245           /* since we have log|gamma(x)| = - log|x| - gamma*x + O(x^2),
 246              if x ~ 2^(-n), then we have a n-bit approximation, thus
 247              we can try again with a working precision of n bits,
 248              especially when n >> PREC(y).
 249              Otherwise we would use the reflection formula evaluating x-1,
 250              which would need precision n. */
 251           MPFR_ZIV_NEXT (loop, prec);
 252         }
 253       while (prec <= -MPFR_EXP(z0));
 254       MPFR_ZIV_FREE (loop);
 255     }
 256 #endif
 257
 258   precy = MPFR_PREC(y);
 259
 260   mpfr_init2 (s, MPFR_PREC_MIN);
 261   mpfr_init2 (t, MPFR_PREC_MIN);
 262   mpfr_init2 (u, MPFR_PREC_MIN);
 263   mpfr_init2 (v, MPFR_PREC_MIN);
 264   mpfr_init2 (z, MPFR_PREC_MIN);
 265
 266   if (compared < 0)
 267     {
 268       mpfr_exp_t err_u;
 269
 270       /* use reflection formula:
 271          gamma(x) = Pi*(x-1)/sin(Pi*(2-x))/gamma(2-x)
 272          thus lngamma(x) = log(Pi*(x-1)/sin(Pi*(2-x))) - lngamma(2-x) */
 273
 274       w = precy + MPFR_INT_CEIL_LOG2 (precy);
 275       w += MPFR_INT_CEIL_LOG2 (w) + 14;
 276       MPFR_ZIV_INIT (loop, w);
 277       while (1)
 278         {
 279           MPFR_ASSERTD(w >= 3);
 280           mpfr_set_prec (s, w);
 281           mpfr_set_prec (t, w);
 282           mpfr_set_prec (u, w);
 283           mpfr_set_prec (v, w);
 284           /* In the following, we write r for a real of absolute value
 285              at most 2^(-w). Different instances of r may represent different
 286              values. */
 287           mpfr_ui_sub (s, 2, z0, MPFR_RNDD); /* s = (2-z0) * (1+2r) >= 1 */
 288           mpfr_const_pi (t, MPFR_RNDN);      /* t = Pi * (1+r) */
 289           mpfr_lngamma (u, s, MPFR_RNDN); /* lngamma(2-x) */
 290           /* Let s = (2-z0) + h. By construction, -(2-z0)*2^(1-w) <= h <= 0.
 291              We have lngamma(s) = lngamma(2-z0) + h*Psi(z), z in [2-z0+h,2-z0].
 292              Since 2-z0+h = s >= 1 and |Psi(x)| <= max(1,log(x)) for x >= 1,
 293              the error on u is bounded by
 294              ulp(u)/2 + (2-z0)*max(1,log(2-z0))*2^(1-w)
 295              = (1/2 + (2-z0)*max(1,log(2-z0))*2^(1-E(u))) ulp(u) */
 296           d = (double) MPFR_GET_EXP(s) * 0.694; /* upper bound for log(2-z0) */
 297           err_u = MPFR_GET_EXP(s) + __gmpfr_ceil_log2 (d) + 1 - MPFR_GET_EXP(u);
 298           err_u = (err_u >= 0) ? err_u + 1 : 0;
 299           /* now the error on u is bounded by 2^err_u ulps */
 300
 301           mpfr_mul (s, s, t, MPFR_RNDN); /* Pi*(2-x) * (1+r)^4 */
 302           err_s = MPFR_GET_EXP(s); /* 2-x <= 2^err_s */
 303           mpfr_sin (s, s, MPFR_RNDN); /* sin(Pi*(2-x)) */
 304           /* the error on s is bounded by 1/2*ulp(s) + [(1+2^(-w))^4-1]*(2-x)
 305              <= 1/2*ulp(s) + 5*2^(-w)*(2-x) for w >= 3
 306              <= (1/2 + 5 * 2^(-E(s)) * (2-x)) ulp(s) */
 307           err_s += 3 - MPFR_GET_EXP(s);
 308           err_s = (err_s >= 0) ? err_s + 1 : 0;
 309           /* the error on s is bounded by 2^err_s ulp(s), thus by
 310              2^(err_s+1)*2^(-w)*|s| since ulp(s) <= 2^(1-w)*|s|.
 311              Now n*2^(-w) can always be written |(1+r)^n-1| for some
 312              |r|<=2^(-w), thus taking n=2^(err_s+1) we see that
 313              |S - s| <= |(1+r)^(2^(err_s+1))-1| * |s|, where S is the
 314              true value.
 315              In fact if ulp(s) <= ulp(S) the same inequality holds for
 316              |S| instead of |s| in the right hand side, i.e., we can
 317              write s = (1+r)^(2^(err_s+1)) * S.
 318              But if ulp(S) < ulp(s), we need to add one ``bit'' to the error,
 319              to get s = (1+r)^(2^(err_s+2)) * S. This is true since with
 320              E = n*2^(-w) we have |s - S| <= E * |s|, thus
 321              |s - S| <= E/(1-E) * |S|.
 322              Now E/(1-E) is bounded by 2E as long as E<=1/2,
 323              and 2E can be written (1+r)^(2n)-1 as above.
 324           */
 325           err_s += 2; /* exponent of relative error */
 326
 327           mpfr_sub_ui (v, z0, 1, MPFR_RNDN); /* v = (x-1) * (1+r) */
 328           mpfr_mul (v, v, t, MPFR_RNDN); /* v = Pi*(x-1) * (1+r)^3 */
 329           mpfr_div (v, v, s, MPFR_RNDN); /* Pi*(x-1)/sin(Pi*(2-x)) */
 330           mpfr_abs (v, v, MPFR_RNDN);
 331           /* (1+r)^(3+2^err_s+1) */
 332           err_s = (err_s <= 1) ? 3 : err_s + 1;
 333           /* now (1+r)^M with M <= 2^err_s */
 334           mpfr_log (v, v, MPFR_RNDN);
 335           /* log(v*(1+e)) = log(v)+log(1+e) where |e| <= 2^(err_s-w).
 336              Since |log(1+e)| <= 2*e for |e| <= 1/4, the error on v is
 337              bounded by ulp(v)/2 + 2^(err_s+1-w). */
 338           if (err_s + 2 > w)
 339             {
 340               w += err_s + 2;
 341             }
 342           else
 343             {
 344               err_s += 1 - MPFR_GET_EXP(v);
 345               err_s = (err_s >= 0) ? err_s + 1 : 0;
 346               /* the error on v is bounded by 2^err_s ulps */
 347               err_u += MPFR_GET_EXP(u); /* absolute error on u */
 348               err_s += MPFR_GET_EXP(v); /* absolute error on v */
 349               mpfr_sub (s, v, u, MPFR_RNDN);
 350               /* the total error on s is bounded by ulp(s)/2 + 2^(err_u-w)
 351                  + 2^(err_s-w) <= ulp(s)/2 + 2^(max(err_u,err_s)+1-w) */
 352               err_s = (err_s >= err_u) ? err_s : err_u;
 353               err_s += 1 - MPFR_GET_EXP(s); /* error is 2^err_s ulp(s) */
 354               err_s = (err_s >= 0) ? err_s + 1 : 0;
 355               if (mpfr_can_round (s, w - err_s, MPFR_RNDN, MPFR_RNDZ, precy
 356                                   + (rnd == MPFR_RNDN)))
 357                 goto end;
 358             }
 359           MPFR_ZIV_NEXT (loop, w);
 360         }
 361       MPFR_ZIV_FREE (loop);
 362     }
 363
 364   /* now z0 > 1 */
 365
 366   MPFR_ASSERTD (compared > 0);
 367
 368   /* since k is O(w), the value of log(z0*...*(z0+k-1)) is about w*log(w),
 369      so there is a cancellation of ~log(w) in the argument reconstruction */
 370   w = precy + MPFR_INT_CEIL_LOG2 (precy);
 371   w += MPFR_INT_CEIL_LOG2 (w) + 13;
 372   MPFR_ZIV_INIT (loop, w);
 373   while (1)
 374     {
 375       MPFR_ASSERTD (w >= 3);
 376
 377       /* argument reduction: we compute gamma(z0 + k), where the series
 378          has error term B_{2n}/(z0+k)^(2n) ~ (n/(Pi*e*(z0+k)))^(2n)
 379          and we need k steps of argument reconstruction. Assuming k is large
 380          with respect to z0, and k = n, we get 1/(Pi*e)^(2n) ~ 2^(-w), i.e.,
 381          k ~ w*log(2)/2/log(Pi*e) ~ 0.1616 * w.
 382          However, since the series is more expensive to compute, the optimal
 383          value seems to be k ~ 4.5 * w experimentally. */
 384       mpfr_set_prec (s, 53);
 385       mpfr_gamma_alpha (s, w);
 386       mpfr_set_ui_2exp (s, 9, -1, MPFR_RNDU);
 387       mpfr_mul_ui (s, s, w, MPFR_RNDU);
 388       if (mpfr_cmp (z0, s) < 0)
 389         {
 390           mpfr_sub (s, s, z0, MPFR_RNDU);
 391           k = mpfr_get_ui (s, MPFR_RNDU);
 392           if (k < 3)
 393             k = 3;
 394         }
 395       else
 396         k = 3;
 397
 398       mpfr_set_prec (s, w);
 399       mpfr_set_prec (t, w);
 400       mpfr_set_prec (u, w);
 401       mpfr_set_prec (v, w);
 402       mpfr_set_prec (z, w);
 403
 404       mpfr_add_ui (z, z0, k, MPFR_RNDN);
 405       /* z = (z0+k)*(1+t1) with |t1| <= 2^(-w) */
 406
 407       /* z >= 4 ensures the relative error on log(z) is small,
 408          and also (z-1/2)*log(z)-z >= 0 */
 409       MPFR_ASSERTD (mpfr_cmp_ui (z, 4) >= 0);
 410
 411       mpfr_log (s, z, MPFR_RNDN); /* log(z) */
 412       /* we have s = log((z0+k)*(1+t1))*(1+t2) with |t1|, |t2| <= 2^(-w).
 413          Since w >= 2 and z0+k >= 4, we can write log((z0+k)*(1+t1))
 414          = log(z0+k) * (1+t3) with |t3| <= 2^(-w), thus we have
 415          s = log(z0+k) * (1+t4)^2 with |t4| <= 2^(-w) */
 416       mpfr_mul_2ui (t, z, 1, MPFR_RNDN); /* t = 2z * (1+t5) */
 417       mpfr_sub_ui (t, t, 1, MPFR_RNDN); /* t = 2z-1 * (1+t6)^3 */
 418       /* since we can write 2z*(1+t5) = (2z-1)*(1+t5') with
 419          t5' = 2z/(2z-1) * t5, thus |t5'| <= 8/7 * t5 */
 420       mpfr_mul (s, s, t, MPFR_RNDN); /* (2z-1)*log(z) * (1+t7)^6 */
 421       mpfr_div_2ui (s, s, 1, MPFR_RNDN); /* (z-1/2)*log(z) * (1+t7)^6 */
 422       mpfr_sub (s, s, z, MPFR_RNDN); /* (z-1/2)*log(z)-z */
 423       /* s = [(z-1/2)*log(z)-z]*(1+u)^14, s >= 1/2 */
 424
 425       mpfr_ui_div (u, 1, z, MPFR_RNDN); /* 1/z * (1+u), u <= 1/4 since z >= 4 */
 426
 427       /* the first term is B[2]/2/z = 1/12/z: t=1/12/z, C[2]=1 */
 428       mpfr_div_ui (t, u, 12, MPFR_RNDN); /* 1/(12z) * (1+u)^2, t <= 3/128 */
 429       mpfr_set (v, t, MPFR_RNDN);        /* (1+u)^2, v < 2^(-5) */
 430       mpfr_add (s, s, v, MPFR_RNDN);     /* (1+u)^15 */
 431
 432       mpfr_mul (u, u, u, MPFR_RNDN); /* 1/z^2 * (1+u)^3 */
 433
 434       if (Bm == 0)
 435         {
 436           B = mpfr_bernoulli_internal ((mpz_t *) 0, 0);
 437           B = mpfr_bernoulli_internal (B, 1);
 438           Bm = 2;
 439         }
 440
 441       /* m <= maxm ensures that 2*m*(2*m+1) <= ULONG_MAX */
 442       maxm = 1UL << (GMP_NUMB_BITS / 2 - 1);
 443
 444       /* s:(1+u)^15, t:(1+u)^2, t <= 3/128 */
 445
 446       for (m = 2; MPFR_GET_EXP(v) + (mpfr_exp_t) w >= MPFR_GET_EXP(s); m++)
 447         {
 448           mpfr_mul (t, t, u, MPFR_RNDN); /* (1+u)^(10m-14) */
 449           if (m <= maxm)
 450             {
 451               mpfr_mul_ui (t, t, 2*(m-1)*(2*m-3), MPFR_RNDN);
 452               mpfr_div_ui (t, t, 2*m*(2*m-1), MPFR_RNDN);
 453               mpfr_div_ui (t, t, 2*m*(2*m+1), MPFR_RNDN);
 454             }
 455           else
 456             {
 457               mpfr_mul_ui (t, t, 2*(m-1), MPFR_RNDN);
 458               mpfr_mul_ui (t, t, 2*m-3, MPFR_RNDN);
 459               mpfr_div_ui (t, t, 2*m, MPFR_RNDN);
 460               mpfr_div_ui (t, t, 2*m-1, MPFR_RNDN);
 461               mpfr_div_ui (t, t, 2*m, MPFR_RNDN);
 462               mpfr_div_ui (t, t, 2*m+1, MPFR_RNDN);
 463             }
 464           /* (1+u)^(10m-8) */
 465           /* invariant: t=1/(2m)/(2m-1)/z^(2m-1)/(2m+1)! */
 466           if (Bm <= m)
 467             {
 468               B = mpfr_bernoulli_internal (B, m); /* B[2m]*(2m+1)!, exact */
 469               Bm ++;
 470             }
 471           mpfr_mul_z (v, t, B[m], MPFR_RNDN); /* (1+u)^(10m-7) */
 472           MPFR_ASSERTD(MPFR_GET_EXP(v) <= - (2 * m + 3));
 473           mpfr_add (s, s, v, MPFR_RNDN);
 474         }
 475       /* m <= 1/2*Pi*e*z ensures that |v[m]| < 1/2^(2m+3) */
 476       MPFR_ASSERTD ((double) m <= 4.26 * mpfr_get_d (z, MPFR_RNDZ));
 477
 478       /* We have sum([(1+u)^(10m-7)-1]*1/2^(2m+3), m=2..infinity)
 479          <= 1.46*u for u <= 2^(-3).
 480          We have 0 < lngamma(z) - [(z - 1/2) ln(z) - z + 1/2 ln(2 Pi)] < 0.021
 481          for z >= 4, thus since the initial s >= 0.85, the different values of
 482          s differ by at most one binade, and the total rounding error on s
 483          in the for-loop is bounded by 2*(m-1)*ulp(final_s).
 484          The error coming from the v's is bounded by
 485          1.46*2^(-w) <= 2*ulp(final_s).
 486          Thus the total error so far is bounded by [(1+u)^15-1]*s+2m*ulp(s)
 487          <= (2m+47)*ulp(s).
 488          Taking into account the truncation error (which is bounded by the last
 489          term v[] according to 6.1.42 in A&S), the bound is (2m+48)*ulp(s).
 490       */
 491
 492       /* add 1/2*log(2*Pi) and subtract log(z0*(z0+1)*...*(z0+k-1)) */
 493       mpfr_const_pi (v, MPFR_RNDN); /* v = Pi*(1+u) */
 494       mpfr_mul_2ui (v, v, 1, MPFR_RNDN); /* v = 2*Pi * (1+u) */
 495       if (k)
 496         {
 497           unsigned long l;
 498           mpfr_set (t, z0, MPFR_RNDN); /* t = z0*(1+u) */
 499           for (l = 1; l < k; l++)
 500             {
 501               mpfr_add_ui (u, z0, l, MPFR_RNDN); /* u = (z0+l)*(1+u) */
 502               mpfr_mul (t, t, u, MPFR_RNDN);     /* (1+u)^(2l+1) */
 503             }
 504           /* now t: (1+u)^(2k-1) */
 505           /* instead of computing log(sqrt(2*Pi)/t), we compute
 506              1/2*log(2*Pi/t^2), which trades a square root for a square */
 507           mpfr_mul (t, t, t, MPFR_RNDN); /* (z0*...*(z0+k-1))^2, (1+u)^(4k-1) */
 508           mpfr_div (v, v, t, MPFR_RNDN);
 509           /* 2*Pi/(z0*...*(z0+k-1))^2 (1+u)^(4k+1) */
 510         }
 511 #ifdef IS_GAMMA
 512       err_s = MPFR_GET_EXP(s);
 513       mpfr_exp (s, s, MPFR_RNDN);
 514       /* If s is +Inf, we compute exp(lngamma(z0)). */
 515       if (mpfr_inf_p (s))
 516         {
 517           inexact = mpfr_explgamma (y, z0, &expo, s, t, rnd);
 518           if (inexact)
 519             goto end0;
 520           else
 521             goto ziv_next;
 522         }
 523       /* before the exponential, we have s = s0 + h where
 524          |h| <= (2m+48)*ulp(s), thus exp(s0) = exp(s) * exp(-h).
 525          For |h| <= 1/4, we have |exp(h)-1| <= 1.2*|h| thus
 526          |exp(s) - exp(s0)| <= 1.2 * exp(s) * (2m+48)* 2^(EXP(s)-w). */
 527       d = 1.2 * (2.0 * (double) m + 48.0);
 528       /* the error on s is bounded by d*2^err_s * 2^(-w) */
 529       mpfr_sqrt (t, v, MPFR_RNDN);
 530       /* let v0 be the exact value of v. We have v = v0*(1+u)^(4k+1),
 531          thus t = sqrt(v0)*(1+u)^(2k+3/2). */
 532       mpfr_mul (s, s, t, MPFR_RNDN);
 533       /* the error on input s is bounded by (1+u)^(d*2^err_s),
 534          and that on t is (1+u)^(2k+3/2), thus the
 535          total error is (1+u)^(d*2^err_s+2k+5/2) */
 536       err_s += __gmpfr_ceil_log2 (d);
 537       err_t = __gmpfr_ceil_log2 (2.0 * (double) k + 2.5);
 538       err_s = (err_s >= err_t) ? err_s + 1 : err_t + 1;
 539 #else
 540       mpfr_log (t, v, MPFR_RNDN);
 541       /* let v0 be the exact value of v. We have v = v0*(1+u)^(4k+1),
 542          thus log(v) = log(v0) + (4k+1)*log(1+u). Since |log(1+u)/u| <= 1.07
 543          for |u| <= 2^(-3), the absolute error on log(v) is bounded by
 544          1.07*(4k+1)*u, and the rounding error by ulp(t). */
 545       mpfr_div_2ui (t, t, 1, MPFR_RNDN);
 546       /* the error on t is now bounded by ulp(t) + 0.54*(4k+1)*2^(-w).
 547          We have sqrt(2*Pi)/(z0*(z0+1)*...*(z0+k-1)) <= sqrt(2*Pi)/k! <= 0.5
 548          since k>=3, thus t <= -0.5 and ulp(t) >= 2^(-w).
 549          Thus the error on t is bounded by (2.16*k+1.54)*ulp(t). */
 550       err_t = MPFR_GET_EXP(t) + (mpfr_exp_t)
 551         __gmpfr_ceil_log2 (2.2 * (double) k + 1.6);
 552       err_s = MPFR_GET_EXP(s) + (mpfr_exp_t)
 553         __gmpfr_ceil_log2 (2.0 * (double) m + 48.0);
 554       mpfr_add (s, s, t, MPFR_RNDN); /* this is a subtraction in fact */
 555       /* the final error in ulp(s) is
 556          <= 1 + 2^(err_t-EXP(s)) + 2^(err_s-EXP(s))
 557          <= 2^(1+max(err_t,err_s)-EXP(s)) if err_t <> err_s
 558          <= 2^(2+max(err_t,err_s)-EXP(s)) if err_t = err_s */
 559       err_s = (err_t == err_s) ? 1 + err_s : ((err_t > err_s) ? err_t : err_s);
 560       err_s += 1 - MPFR_GET_EXP(s);
 561 #endif
 562       if (MPFR_LIKELY (MPFR_CAN_ROUND (s, w - err_s, precy, rnd)))
 563         break;
 564 #ifdef IS_GAMMA
 565     ziv_next:
 566 #endif
 567       MPFR_ZIV_NEXT (loop, w);
 568     }
 569
 570 #ifdef IS_GAMMA
 571  end0:
 572 #endif
 573   oldBm = Bm;
 574   while (Bm--)
 575     mpz_clear (B[Bm]);
 576   (*__gmp_free_func) (B, oldBm * sizeof (mpz_t));
 577
 578  end:
 579   if (inexact == 0)
 580     inexact = mpfr_set (y, s, rnd);
 581   MPFR_ZIV_FREE (loop);
 582
 583   mpfr_clear (s);
 584   mpfr_clear (t);
 585   mpfr_clear (u);
 586   mpfr_clear (v);
 587   mpfr_clear (z);
 588
 589   MPFR_SAVE_EXPO_FREE (expo);
 590   return mpfr_check_range (y, inexact, rnd);
 591 }
 592
 593 #ifndef IS_GAMMA
 594
 595 int
 596 mpfr_lngamma (mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd)
 597 {
 598   int inex;
 599
 600   MPFR_LOG_FUNC
 601     (("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec (x), mpfr_log_prec, x, rnd),
 602      ("y[%Pu]=%.*Rg inexact=%d",
 603       mpfr_get_prec (y), mpfr_log_prec, y, inex));
 604
 605   /* special cases */
 606   if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
 607     {
 608       if (MPFR_IS_NAN (x) || MPFR_IS_NEG (x))
 609         {
 610           MPFR_SET_NAN (y);
 611           MPFR_RET_NAN;
 612         }
 613       else /* lngamma(+Inf) = lngamma(+0) = +Inf */
 614         {
 615           if (MPFR_IS_ZERO (x))
 616             mpfr_set_divby0 ();
 617           MPFR_SET_INF (y);
 618           MPFR_SET_POS (y);
 619           MPFR_RET (0);  /* exact */
 620         }
 621     }
 622
 623   /* if x < 0 and -2k-1 <= x <= -2k, then lngamma(x) = NaN */
 624   if (MPFR_IS_NEG (x) && (unit_bit (x) == 0 || mpfr_integer_p (x)))
 625     {
 626       MPFR_SET_NAN (y);
 627       MPFR_RET_NAN;
 628     }
 629
 630   inex = mpfr_lngamma_aux (y, x, rnd);
 631   return inex;
 632 }
 633
 634 int
 635 mpfr_lgamma (mpfr_ptr y, int *signp, mpfr_srcptr x, mpfr_rnd_t rnd)
 636 {
 637   int inex;
 638
 639   MPFR_LOG_FUNC
 640     (("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec (x), mpfr_log_prec, x, rnd),
 641      ("y[%Pu]=%.*Rg signp=%d inexact=%d",
 642       mpfr_get_prec (y), mpfr_log_prec, y, *signp, inex));
 643
 644   *signp = 1;  /* most common case */
 645
 646   if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
 647     {
 648       if (MPFR_IS_NAN (x))
 649         {
 650           MPFR_SET_NAN (y);
 651           MPFR_RET_NAN;
 652         }
 653       else
 654         {
 655           if (MPFR_IS_ZERO (x))
 656             mpfr_set_divby0 ();
 657           *signp = MPFR_INT_SIGN (x);
 658           MPFR_SET_INF (y);
 659           MPFR_SET_POS (y);
 660           MPFR_RET (0);
 661         }
 662     }
 663
 664   if (MPFR_IS_NEG (x))
 665     {
 666       if (mpfr_integer_p (x))
 667         {
 668           MPFR_SET_INF (y);
 669           MPFR_SET_POS (y);
 670           mpfr_set_divby0 ();
 671           MPFR_RET (0);
 672         }
 673
 674       if (unit_bit (x) == 0)
 675         *signp = -1;
 676
 677       /* For tiny negative x, we have gamma(x) = 1/x - euler + O(x),
 678          thus |gamma(x)| = -1/x + euler + O(x), and
 679          log |gamma(x)| = -log(-x) - euler*x + O(x^2).
 680          More precisely we have for -0.4 <= x < 0:
 681          -log(-x) <= log |gamma(x)| <= -log(-x) - x.
 682          Since log(x) is not representable, we may have an instance of the
 683          Table Maker Dilemma. The only way to ensure correct rounding is to
 684          compute an interval [l,h] such that l <= -log(-x) and
 685          -log(-x) - x <= h, and check whether l and h round to the same number
 686          for the target precision and rounding modes. */
 687       if (MPFR_EXP(x) + 1 <= - (mpfr_exp_t) MPFR_PREC(y))
 688         /* since PREC(y) >= 1, this ensures EXP(x) <= -2,
 689            thus |x| <= 0.25 < 0.4 */
 690         {
 691           mpfr_t l, h;
 692           int ok, inex2;
 693           mpfr_prec_t w = MPFR_PREC (y) + 14;
 694           mpfr_exp_t expl;
 695
 696           while (1)
 697             {
 698               mpfr_init2 (l, w);
 699               mpfr_init2 (h, w);
 700               /* we want a lower bound on -log(-x), thus an upper bound
 701                  on log(-x), thus an upper bound on -x. */
 702               mpfr_neg (l, x, MPFR_RNDU); /* upper bound on -x */
 703               mpfr_log (l, l, MPFR_RNDU); /* upper bound for log(-x) */
 704               mpfr_neg (l, l, MPFR_RNDD); /* lower bound for -log(-x) */
 705               mpfr_neg (h, x, MPFR_RNDD); /* lower bound on -x */
 706               mpfr_log (h, h, MPFR_RNDD); /* lower bound on log(-x) */
 707               mpfr_neg (h, h, MPFR_RNDU); /* upper bound for -log(-x) */
 708               mpfr_sub (h, h, x, MPFR_RNDU); /* upper bound for -log(-x) - x */
 709               inex = mpfr_prec_round (l, MPFR_PREC (y), rnd);
 710               inex2 = mpfr_prec_round (h, MPFR_PREC (y), rnd);
 711               /* Caution: we not only need l = h, but both inexact flags
 712                  should agree. Indeed, one of the inexact flags might be
 713                  zero. In that case if we assume ln|gamma(x)| cannot be
 714                  exact, the other flag should be correct. We are conservative
 715                  here and request that both inexact flags agree. */
 716               ok = SAME_SIGN (inex, inex2) && mpfr_equal_p (l, h);
 717               if (ok)
 718                 mpfr_set (y, h, rnd); /* exact */
 719               else
 720                 expl = MPFR_EXP (l);
 721               mpfr_clear (l);
 722               mpfr_clear (h);
 723               if (ok)
 724                 return inex;
 725               /* if ulp(log(-x)) <= |x| there is no reason to loop,
 726                  since the width of [l, h] will be at least |x| */
 727               if (expl < MPFR_EXP(x) + (mpfr_exp_t) w)
 728                 break;
 729               w += MPFR_INT_CEIL_LOG2(w) + 3;
 730             }
 731         }
 732     }
 733
 734   inex = mpfr_lngamma_aux (y, x, rnd);
 735   return inex;
 736 }
 737
 738 #endif