contrib/mpfr/lngamma.c

   1 /* mpfr_lngamma -- lngamma function
   2
   3 Copyright 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
   4 Contributed by the Arenaire and Cacao projects, INRIA.
   5
   6 This file is part of the GNU MPFR Library.
   7
   8 The GNU MPFR Library is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU Lesser General Public License as published by
  10 the Free Software Foundation; either version 2.1 of the License, or (at your
  11 option) any later version.
  12
  13 The GNU MPFR Library is distributed in the hope that it will be useful, but
  14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  16 License for more details.
  17
  18 You should have received a copy of the GNU Lesser General Public License
  19 along with the GNU MPFR Library; see the file COPYING.LIB.  If not, write to
  20 the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
  21 MA 02110-1301, USA. */
  22
  23 #define MPFR_NEED_LONGLONG_H
  24 #include "mpfr-impl.h"
  25
  26 /* assuming b[0]...b[2(n-1)] are computed, computes and stores B[2n]*(2n+1)!
  27
  28    t/(exp(t)-1) = sum(B[j]*t^j/j!, j=0..infinity)
  29    thus t = (exp(t)-1) * sum(B[j]*t^j/j!, n=0..infinity).
  30    Taking the coefficient of degree n+1 > 1, we get:
  31    0 = sum(1/(n+1-k)!*B[k]/k!, k=0..n)
  32    which gives:
  33    B[n] = -sum(binomial(n+1,k)*B[k], k=0..n-1)/(n+1).
  34
  35    Let C[n] = B[n]*(n+1)!.
  36    Then C[n] = -sum(binomial(n+1,k)*C[k]*n!/(k+1)!,  k=0..n-1),
  37    which proves that the C[n] are integers.
  38 */
  39 static mpz_t*
  40 bernoulli (mpz_t *b, unsigned long n)
  41 {
  42   if (n == 0)
  43     {
  44       b = (mpz_t *) (*__gmp_allocate_func) (sizeof (mpz_t));
  45       mpz_init_set_ui (b[0], 1);
  46     }
  47   else
  48     {
  49       mpz_t t;
  50       unsigned long k;
  51
  52       b = (mpz_t *) (*__gmp_reallocate_func)
  53         (b, n * sizeof (mpz_t), (n + 1) * sizeof (mpz_t));
  54       mpz_init (b[n]);
  55       /* b[n] = -sum(binomial(2n+1,2k)*C[k]*(2n)!/(2k+1)!,  k=0..n-1) */
  56       mpz_init_set_ui (t, 2 * n + 1);
  57       mpz_mul_ui (t, t, 2 * n - 1);
  58       mpz_mul_ui (t, t, 2 * n);
  59       mpz_mul_ui (t, t, n);
  60       mpz_div_ui (t, t, 3); /* exact: t=binomial(2*n+1,2*k)*(2*n)!/(2*k+1)!
  61                                for k=n-1 */
  62       mpz_mul (b[n], t, b[n-1]);
  63       for (k = n - 1; k-- > 0;)
  64         {
  65           mpz_mul_ui (t, t, 2 * k + 1);
  66           mpz_mul_ui (t, t, 2 * k + 2);
  67           mpz_mul_ui (t, t, 2 * k + 2);
  68           mpz_mul_ui (t, t, 2 * k + 3);
  69           mpz_div_ui (t, t, 2 * (n - k) + 1);
  70           mpz_div_ui (t, t, 2 * (n - k));
  71           mpz_addmul (b[n], t, b[k]);
  72         }
  73       /* take into account C[1] */
  74       mpz_mul_ui (t, t, 2 * n + 1);
  75       mpz_div_2exp (t, t, 1);
  76       mpz_sub (b[n], b[n], t);
  77       mpz_neg (b[n], b[n]);
  78       mpz_clear (t);
  79     }
  80   return b;
  81 }
  82
  83 /* given a precision p, return alpha, such that the argument reduction
  84    will use k = alpha*p*log(2).
  85
  86    Warning: we should always have alpha >= log(2)/(2Pi) ~ 0.11,
  87    and the smallest value of alpha multiplied by the smallest working
  88    precision should be >= 4.
  89 */
  90 static double
  91 mpfr_gamma_alpha (mp_prec_t p)
  92 {
  93   if (p <= 100)
  94     return 0.6;
  95   else if (p <= 200)
  96     return 0.8;
  97   else if (p <= 500)
  98     return 0.8;
  99   else if (p <= 1000)
 100     return 1.3;
 101   else if (p <= 2000)
 102     return 1.7;
 103   else if (p <= 5000)
 104     return 2.2;
 105   else if (p <= 10000)
 106     return 3.4;
 107   else /* heuristic fit from above */
 108     return 0.26 * (double) MPFR_INT_CEIL_LOG2 ((unsigned long) p);
 109 }
 110
 111 #ifndef IS_GAMMA
 112 static int
 113 unit_bit (mpfr_srcptr (x))
 114 {
 115   mp_exp_t expo;
 116   mp_prec_t prec;
 117   mp_limb_t x0;
 118
 119   expo = MPFR_GET_EXP (x);
 120   if (expo <= 0)
 121     return 0;  /* |x| < 1 */
 122
 123   prec = MPFR_PREC (x);
 124   if (expo > prec)
 125     return 0;  /* y is a multiple of 2^(expo-prec), thus an even integer */
 126
 127   /* Now, the unit bit is represented. */
 128
 129   prec = ((prec - 1) / BITS_PER_MP_LIMB + 1) * BITS_PER_MP_LIMB - expo;
 130   /* number of represented fractional bits (including the trailing 0's) */
 131
 132   x0 = *(MPFR_MANT (x) + prec / BITS_PER_MP_LIMB);
 133   /* limb containing the unit bit */
 134
 135   return (x0 >> (prec % BITS_PER_MP_LIMB)) & 1;
 136 }
 137 #endif
 138
 139 /* lngamma(x) = log(gamma(x)).
 140    We use formula [6.1.40] from Abramowitz&Stegun:
 141    lngamma(z) = (z-1/2)*log(z) - z + 1/2*log(2*Pi)
 142               + sum (Bernoulli[2n]/(2m)/(2m-1)/z^(2m-1),m=1..infinity)
 143    According to [6.1.42], if the sum is truncated after m=n, the error
 144    R_n(z) is bounded by |B[2n+2]|*K(z)/(2n+1)/(2n+2)/|z|^(2n+1)
 145    where K(z) = max (z^2/(u^2+z^2)) for u >= 0.
 146    For z real, |K(z)| <= 1 thus R_n(z) is bounded by the first neglected term.
 147  */
 148 #ifdef IS_GAMMA
 149 #define GAMMA_FUNC mpfr_gamma_aux
 150 #else
 151 #define GAMMA_FUNC mpfr_lngamma_aux
 152 #endif
 153
 154 static int
 155 GAMMA_FUNC (mpfr_ptr y, mpfr_srcptr z0, mp_rnd_t rnd)
 156 {
 157   mp_prec_t precy, w; /* working precision */
 158   mpfr_t s, t, u, v, z;
 159   unsigned long m, k, maxm;
 160   mpz_t *INITIALIZED(B);  /* variable B declared as initialized */
 161   int inexact, compared;
 162   mp_exp_t err_s, err_t;
 163   unsigned long Bm = 0; /* number of allocated B[] */
 164   unsigned long oldBm;
 165   double d;
 166   MPFR_SAVE_EXPO_DECL (expo);
 167
 168   compared = mpfr_cmp_ui (z0, 1);
 169
 170   MPFR_SAVE_EXPO_MARK (expo);
 171
 172 #ifndef IS_GAMMA /* lngamma or lgamma */
 173   if (compared == 0 || (compared > 0 && mpfr_cmp_ui (z0, 2) == 0))
 174     {
 175       MPFR_SAVE_EXPO_FREE (expo);
 176       return mpfr_set_ui (y, 0, GMP_RNDN);  /* lngamma(1 or 2) = +0 */
 177     }
 178
 179   /* Deal here with tiny inputs. We have for -0.3 <= x <= 0.3:
 180      - log|x| - gamma*x <= log|gamma(x)| <= - log|x| - gamma*x + x^2 */
 181   if (MPFR_EXP(z0) <= - (mp_exp_t) MPFR_PREC(y))
 182     {
 183       mpfr_t l, h, g;
 184       int ok, inex2;
 185       mp_prec_t prec = MPFR_PREC(y) + 14;
 186       MPFR_ZIV_DECL (loop);
 187
 188       MPFR_ZIV_INIT (loop, prec);
 189       do
 190         {
 191           mpfr_init2 (l, prec);
 192           if (MPFR_IS_POS(z0))
 193             {
 194               mpfr_log (l, z0, GMP_RNDU); /* upper bound for log(z0) */
 195               mpfr_init2 (h, MPFR_PREC(l));
 196             }
 197           else
 198             {
 199               mpfr_init2 (h, MPFR_PREC(z0));
 200               mpfr_neg (h, z0, GMP_RNDN); /* exact */
 201               mpfr_log (l, h, GMP_RNDU); /* upper bound for log(-z0) */
 202               mpfr_set_prec (h, MPFR_PREC(l));
 203             }
 204           mpfr_neg (l, l, GMP_RNDD); /* lower bound for -log(|z0|) */
 205           mpfr_set (h, l, GMP_RNDD); /* exact */
 206           mpfr_nextabove (h); /* upper bound for -log(|z0|), avoids two calls
 207                                  to mpfr_log */
 208           mpfr_init2 (g, MPFR_PREC(l));
 209           /* if z0>0, we need an upper approximation of Euler's constant
 210              for the left bound */
 211           mpfr_const_euler (g, MPFR_IS_POS(z0) ? GMP_RNDU : GMP_RNDD);
 212           mpfr_mul (g, g, z0, GMP_RNDD);
 213           mpfr_sub (l, l, g, GMP_RNDD);
 214           mpfr_const_euler (g, MPFR_IS_POS(z0) ? GMP_RNDD : GMP_RNDU); /* cached */
 215           mpfr_mul (g, g, z0, GMP_RNDU);
 216           mpfr_sub (h, h, g, GMP_RNDD);
 217           mpfr_mul (g, z0, z0, GMP_RNDU);
 218           mpfr_add (h, h, g, GMP_RNDU);
 219           inexact = mpfr_prec_round (l, MPFR_PREC(y), rnd);
 220           inex2 = mpfr_prec_round (h, MPFR_PREC(y), rnd);
 221           /* Caution: we not only need l = h, but both inexact flags should
 222              agree. Indeed, one of the inexact flags might be zero. In that
 223              case if we assume lngamma(z0) cannot be exact, the other flag
 224              should be correct. We are conservative here and request that both
 225              inexact flags agree. */
 226           ok = SAME_SIGN (inexact, inex2) && mpfr_cmp (l, h) == 0;
 227           if (ok)
 228             mpfr_set (y, h, rnd); /* exact */
 229           mpfr_clear (l);
 230           mpfr_clear (h);
 231           mpfr_clear (g);
 232           if (ok)
 233             {
 234               MPFR_SAVE_EXPO_FREE (expo);
 235               return mpfr_check_range (y, inexact, rnd);
 236             }
 237           /* since we have log|gamma(x)| = - log|x| - gamma*x + O(x^2),
 238              if x ~ 2^(-n), then we have a n-bit approximation, thus
 239              we can try again with a working precision of n bits,
 240              especially when n >> PREC(y).
 241              Otherwise we would use the reflection formula evaluating x-1,
 242              which would need precision n. */
 243           MPFR_ZIV_NEXT (loop, prec);
 244         }
 245       while (prec <= -MPFR_EXP(z0));
 246       MPFR_ZIV_FREE (loop);
 247     }
 248 #endif
 249
 250   precy = MPFR_PREC(y);
 251
 252   mpfr_init2 (s, MPFR_PREC_MIN);
 253   mpfr_init2 (t, MPFR_PREC_MIN);
 254   mpfr_init2 (u, MPFR_PREC_MIN);
 255   mpfr_init2 (v, MPFR_PREC_MIN);
 256   mpfr_init2 (z, MPFR_PREC_MIN);
 257
 258   if (compared < 0)
 259     {
 260       mp_exp_t err_u;
 261
 262       /* use reflection formula:
 263          gamma(x) = Pi*(x-1)/sin(Pi*(2-x))/gamma(2-x)
 264          thus lngamma(x) = log(Pi*(x-1)/sin(Pi*(2-x))) - lngamma(2-x) */
 265
 266       w = precy + MPFR_INT_CEIL_LOG2 (precy);
 267       while (1)
 268         {
 269           w += MPFR_INT_CEIL_LOG2 (w) + 14;
 270           MPFR_ASSERTD(w >= 3);
 271           mpfr_set_prec (s, w);
 272           mpfr_set_prec (t, w);
 273           mpfr_set_prec (u, w);
 274           mpfr_set_prec (v, w);
 275           /* In the following, we write r for a real of absolute value
 276              at most 2^(-w). Different instances of r may represent different
 277              values. */
 278           mpfr_ui_sub (s, 2, z0, GMP_RNDD); /* s = (2-z0) * (1+2r) >= 1 */
 279           mpfr_const_pi (t, GMP_RNDN);      /* t = Pi * (1+r) */
 280           mpfr_lngamma (u, s, GMP_RNDN); /* lngamma(2-x) */
 281           /* Let s = (2-z0) + h. By construction, -(2-z0)*2^(1-w) <= h <= 0.
 282              We have lngamma(s) = lngamma(2-z0) + h*Psi(z), z in [2-z0+h,2-z0].
 283              Since 2-z0+h = s >= 1 and |Psi(x)| <= max(1,log(x)) for x >= 1,
 284              the error on u is bounded by
 285              ulp(u)/2 + (2-z0)*max(1,log(2-z0))*2^(1-w)
 286              = (1/2 + (2-z0)*max(1,log(2-z0))*2^(1-E(u))) ulp(u) */
 287           d = (double) MPFR_GET_EXP(s) * 0.694; /* upper bound for log(2-z0) */
 288           err_u = MPFR_GET_EXP(s) + __gmpfr_ceil_log2 (d) + 1 - MPFR_GET_EXP(u);
 289           err_u = (err_u >= 0) ? err_u + 1 : 0;
 290           /* now the error on u is bounded by 2^err_u ulps */
 291
 292           mpfr_mul (s, s, t, GMP_RNDN); /* Pi*(2-x) * (1+r)^4 */
 293           err_s = MPFR_GET_EXP(s); /* 2-x <= 2^err_s */
 294           mpfr_sin (s, s, GMP_RNDN); /* sin(Pi*(2-x)) */
 295           /* the error on s is bounded by 1/2*ulp(s) + [(1+2^(-w))^4-1]*(2-x)
 296              <= 1/2*ulp(s) + 5*2^(-w)*(2-x) for w >= 3
 297              <= (1/2 + 5 * 2^(-E(s)) * (2-x)) ulp(s) */
 298           err_s += 3 - MPFR_GET_EXP(s);
 299           err_s = (err_s >= 0) ? err_s + 1 : 0;
 300           /* the error on s is bounded by 2^err_s ulp(s), thus by
 301              2^(err_s+1)*2^(-w)*|s| since ulp(s) <= 2^(1-w)*|s|.
 302              Now n*2^(-w) can always be written |(1+r)^n-1| for some
 303              |r|<=2^(-w), thus taking n=2^(err_s+1) we see that
 304              |S - s| <= |(1+r)^(2^(err_s+1))-1| * |s|, where S is the
 305              true value.
 306              In fact if ulp(s) <= ulp(S) the same inequality holds for
 307              |S| instead of |s| in the right hand side, i.e., we can
 308              write s = (1+r)^(2^(err_s+1)) * S.
 309              But if ulp(S) < ulp(s), we need to add one ``bit'' to the error,
 310              to get s = (1+r)^(2^(err_s+2)) * S. This is true since with
 311              E = n*2^(-w) we have |s - S| <= E * |s|, thus
 312              |s - S| <= E/(1-E) * |S|.
 313              Now E/(1-E) is bounded by 2E as long as E<=1/2,
 314              and 2E can be written (1+r)^(2n)-1 as above.
 315           */
 316           err_s += 2; /* exponent of relative error */
 317
 318           mpfr_sub_ui (v, z0, 1, GMP_RNDN); /* v = (x-1) * (1+r) */
 319           mpfr_mul (v, v, t, GMP_RNDN); /* v = Pi*(x-1) * (1+r)^3 */
 320           mpfr_div (v, v, s, GMP_RNDN); /* Pi*(x-1)/sin(Pi*(2-x)) */
 321           mpfr_abs (v, v, GMP_RNDN);
 322           /* (1+r)^(3+2^err_s+1) */
 323           err_s = (err_s <= 1) ? 3 : err_s + 1;
 324           /* now (1+r)^M with M <= 2^err_s */
 325           mpfr_log (v, v, GMP_RNDN);
 326           /* log(v*(1+e)) = log(v)+log(1+e) where |e| <= 2^(err_s-w).
 327              Since |log(1+e)| <= 2*e for |e| <= 1/4, the error on v is
 328              bounded by ulp(v)/2 + 2^(err_s+1-w). */
 329           if (err_s + 2 > w)
 330             {
 331               w += err_s + 2;
 332             }
 333           else
 334             {
 335               err_s += 1 - MPFR_GET_EXP(v);
 336               err_s = (err_s >= 0) ? err_s + 1 : 0;
 337               /* the error on v is bounded by 2^err_s ulps */
 338               err_u += MPFR_GET_EXP(u); /* absolute error on u */
 339               err_s += MPFR_GET_EXP(v); /* absolute error on v */
 340               mpfr_sub (s, v, u, GMP_RNDN);
 341               /* the total error on s is bounded by ulp(s)/2 + 2^(err_u-w)
 342                  + 2^(err_s-w) <= ulp(s)/2 + 2^(max(err_u,err_s)+1-w) */
 343               err_s = (err_s >= err_u) ? err_s : err_u;
 344               err_s += 1 - MPFR_GET_EXP(s); /* error is 2^err_s ulp(s) */
 345               err_s = (err_s >= 0) ? err_s + 1 : 0;
 346               if (mpfr_can_round (s, w - err_s, GMP_RNDN, GMP_RNDZ, precy
 347                                   + (rnd == GMP_RNDN)))
 348                 goto end;
 349             }
 350         }
 351     }
 352
 353   /* now z0 > 1 */
 354
 355   MPFR_ASSERTD (compared > 0);
 356
 357   /* since k is O(w), the value of log(z0*...*(z0+k-1)) is about w*log(w),
 358      so there is a cancellation of ~log(w) in the argument reconstruction */
 359   w = precy + MPFR_INT_CEIL_LOG2 (precy);
 360
 361   do
 362     {
 363       w += MPFR_INT_CEIL_LOG2 (w) + 13;
 364       MPFR_ASSERTD (w >= 3);
 365
 366       mpfr_set_prec (s, 53);
 367       /* we need z >= w*log(2)/(2*Pi) to get an absolute error less than 2^(-w)
 368          but the optimal value is about 0.155665*w */
 369       /* FIXME: replace double by mpfr_t types. */
 370       mpfr_set_d (s, mpfr_gamma_alpha (precy) * (double) w, GMP_RNDU);
 371       if (mpfr_cmp (z0, s) < 0)
 372         {
 373           mpfr_sub (s, s, z0, GMP_RNDU);
 374           k = mpfr_get_ui (s, GMP_RNDU);
 375           if (k < 3)
 376             k = 3;
 377         }
 378       else
 379         k = 3;
 380
 381       mpfr_set_prec (s, w);
 382       mpfr_set_prec (t, w);
 383       mpfr_set_prec (u, w);
 384       mpfr_set_prec (v, w);
 385       mpfr_set_prec (z, w);
 386
 387       mpfr_add_ui (z, z0, k, GMP_RNDN);
 388       /* z = (z0+k)*(1+t1) with |t1| <= 2^(-w) */
 389
 390       /* z >= 4 ensures the relative error on log(z) is small,
 391          and also (z-1/2)*log(z)-z >= 0 */
 392       MPFR_ASSERTD (mpfr_cmp_ui (z, 4) >= 0);
 393
 394       mpfr_log (s, z, GMP_RNDN); /* log(z) */
 395       /* we have s = log((z0+k)*(1+t1))*(1+t2) with |t1|, |t2| <= 2^(-w).
 396          Since w >= 2 and z0+k >= 4, we can write log((z0+k)*(1+t1))
 397          = log(z0+k) * (1+t3) with |t3| <= 2^(-w), thus we have
 398          s = log(z0+k) * (1+t4)^2 with |t4| <= 2^(-w) */
 399       mpfr_mul_2ui (t, z, 1, GMP_RNDN); /* t = 2z * (1+t5) */
 400       mpfr_sub_ui (t, t, 1, GMP_RNDN); /* t = 2z-1 * (1+t6)^3 */
 401       /* since we can write 2z*(1+t5) = (2z-1)*(1+t5') with
 402          t5' = 2z/(2z-1) * t5, thus |t5'| <= 8/7 * t5 */
 403       mpfr_mul (s, s, t, GMP_RNDN); /* (2z-1)*log(z) * (1+t7)^6 */
 404       mpfr_div_2ui (s, s, 1, GMP_RNDN); /* (z-1/2)*log(z) * (1+t7)^6 */
 405       mpfr_sub (s, s, z, GMP_RNDN); /* (z-1/2)*log(z)-z */
 406       /* s = [(z-1/2)*log(z)-z]*(1+u)^14, s >= 1/2 */
 407
 408       mpfr_ui_div (u, 1, z, GMP_RNDN); /* 1/z * (1+u), u <= 1/4 since z >= 4 */
 409
 410       /* the first term is B[2]/2/z = 1/12/z: t=1/12/z, C[2]=1 */
 411       mpfr_div_ui (t, u, 12, GMP_RNDN); /* 1/(12z) * (1+u)^2, t <= 3/128 */
 412       mpfr_set (v, t, GMP_RNDN);        /* (1+u)^2, v < 2^(-5) */
 413       mpfr_add (s, s, v, GMP_RNDN);     /* (1+u)^15 */
 414
 415       mpfr_mul (u, u, u, GMP_RNDN); /* 1/z^2 * (1+u)^3 */
 416
 417       if (Bm == 0)
 418         {
 419           B = bernoulli ((mpz_t *) 0, 0);
 420           B = bernoulli (B, 1);
 421           Bm = 2;
 422         }
 423
 424       /* m <= maxm ensures that 2*m*(2*m+1) <= ULONG_MAX */
 425       maxm = 1UL << (BITS_PER_MP_LIMB / 2 - 1);
 426
 427       /* s:(1+u)^15, t:(1+u)^2, t <= 3/128 */
 428
 429       for (m = 2; MPFR_GET_EXP(v) + (mp_exp_t) w >= MPFR_GET_EXP(s); m++)
 430         {
 431           mpfr_mul (t, t, u, GMP_RNDN); /* (1+u)^(10m-14) */
 432           if (m <= maxm)
 433             {
 434               mpfr_mul_ui (t, t, 2*(m-1)*(2*m-3), GMP_RNDN);
 435               mpfr_div_ui (t, t, 2*m*(2*m-1), GMP_RNDN);
 436               mpfr_div_ui (t, t, 2*m*(2*m+1), GMP_RNDN);
 437             }
 438           else
 439             {
 440               mpfr_mul_ui (t, t, 2*(m-1), GMP_RNDN);
 441               mpfr_mul_ui (t, t, 2*m-3, GMP_RNDN);
 442               mpfr_div_ui (t, t, 2*m, GMP_RNDN);
 443               mpfr_div_ui (t, t, 2*m-1, GMP_RNDN);
 444               mpfr_div_ui (t, t, 2*m, GMP_RNDN);
 445               mpfr_div_ui (t, t, 2*m+1, GMP_RNDN);
 446             }
 447           /* (1+u)^(10m-8) */
 448           /* invariant: t=1/(2m)/(2m-1)/z^(2m-1)/(2m+1)! */
 449           if (Bm <= m)
 450             {
 451               B = bernoulli (B, m); /* B[2m]*(2m+1)!, exact */
 452               Bm ++;
 453             }
 454           mpfr_mul_z (v, t, B[m], GMP_RNDN); /* (1+u)^(10m-7) */
 455           MPFR_ASSERTD(MPFR_GET_EXP(v) <= - (2 * m + 3));
 456           mpfr_add (s, s, v, GMP_RNDN);
 457         }
 458       /* m <= 1/2*Pi*e*z ensures that |v[m]| < 1/2^(2m+3) */
 459       MPFR_ASSERTD ((double) m <= 4.26 * mpfr_get_d (z, GMP_RNDZ));
 460
 461       /* We have sum([(1+u)^(10m-7)-1]*1/2^(2m+3), m=2..infinity)
 462          <= 1.46*u for u <= 2^(-3).
 463          We have 0 < lngamma(z) - [(z - 1/2) ln(z) - z + 1/2 ln(2 Pi)] < 0.021
 464          for z >= 4, thus since the initial s >= 0.85, the different values of
 465          s differ by at most one binade, and the total rounding error on s
 466          in the for-loop is bounded by 2*(m-1)*ulp(final_s).
 467          The error coming from the v's is bounded by
 468          1.46*2^(-w) <= 2*ulp(final_s).
 469          Thus the total error so far is bounded by [(1+u)^15-1]*s+2m*ulp(s)
 470          <= (2m+47)*ulp(s).
 471          Taking into account the truncation error (which is bounded by the last
 472          term v[] according to 6.1.42 in A&S), the bound is (2m+48)*ulp(s).
 473       */
 474
 475       /* add 1/2*log(2*Pi) and subtract log(z0*(z0+1)*...*(z0+k-1)) */
 476       mpfr_const_pi (v, GMP_RNDN); /* v = Pi*(1+u) */
 477       mpfr_mul_2ui (v, v, 1, GMP_RNDN); /* v = 2*Pi * (1+u) */
 478       if (k)
 479         {
 480           unsigned long l;
 481           mpfr_set (t, z0, GMP_RNDN); /* t = z0*(1+u) */
 482           for (l = 1; l < k; l++)
 483             {
 484               mpfr_add_ui (u, z0, l, GMP_RNDN); /* u = (z0+l)*(1+u) */
 485               mpfr_mul (t, t, u, GMP_RNDN);     /* (1+u)^(2l+1) */
 486             }
 487           /* now t: (1+u)^(2k-1) */
 488           /* instead of computing log(sqrt(2*Pi)/t), we compute
 489              1/2*log(2*Pi/t^2), which trades a square root for a square */
 490           mpfr_mul (t, t, t, GMP_RNDN); /* (z0*...*(z0+k-1))^2, (1+u)^(4k-1) */
 491           mpfr_div (v, v, t, GMP_RNDN);
 492           /* 2*Pi/(z0*...*(z0+k-1))^2 (1+u)^(4k+1) */
 493         }
 494 #ifdef IS_GAMMA
 495       err_s = MPFR_GET_EXP(s);
 496       mpfr_exp (s, s, GMP_RNDN);
 497       /* before the exponential, we have s = s0 + h where
 498          |h| <= (2m+48)*ulp(s), thus exp(s0) = exp(s) * exp(-h).
 499          For |h| <= 1/4, we have |exp(h)-1| <= 1.2*|h| thus
 500          |exp(s) - exp(s0)| <= 1.2 * exp(s) * (2m+48)* 2^(EXP(s)-w). */
 501       d = 1.2 * (2.0 * (double) m + 48.0);
 502       /* the error on s is bounded by d*2^err_s * 2^(-w) */
 503       mpfr_sqrt (t, v, GMP_RNDN);
 504       /* let v0 be the exact value of v. We have v = v0*(1+u)^(4k+1),
 505          thus t = sqrt(v0)*(1+u)^(2k+3/2). */
 506       mpfr_mul (s, s, t, GMP_RNDN);
 507       /* the error on input s is bounded by (1+u)^(d*2^err_s),
 508          and that on t is (1+u)^(2k+3/2), thus the
 509          total error is (1+u)^(d*2^err_s+2k+5/2) */
 510       err_s += __gmpfr_ceil_log2 (d);
 511       err_t = __gmpfr_ceil_log2 (2.0 * (double) k + 2.5);
 512       err_s = (err_s >= err_t) ? err_s + 1 : err_t + 1;
 513 #else
 514       mpfr_log (t, v, GMP_RNDN);
 515       /* let v0 be the exact value of v. We have v = v0*(1+u)^(4k+1),
 516          thus log(v) = log(v0) + (4k+1)*log(1+u). Since |log(1+u)/u| <= 1.07
 517          for |u| <= 2^(-3), the absolute error on log(v) is bounded by
 518          1.07*(4k+1)*u, and the rounding error by ulp(t). */
 519       mpfr_div_2ui (t, t, 1, GMP_RNDN);
 520       /* the error on t is now bounded by ulp(t) + 0.54*(4k+1)*2^(-w).
 521          We have sqrt(2*Pi)/(z0*(z0+1)*...*(z0+k-1)) <= sqrt(2*Pi)/k! <= 0.5
 522          since k>=3, thus t <= -0.5 and ulp(t) >= 2^(-w).
 523          Thus the error on t is bounded by (2.16*k+1.54)*ulp(t). */
 524       err_t = MPFR_GET_EXP(t) + (mp_exp_t)
 525         __gmpfr_ceil_log2 (2.2 * (double) k + 1.6);
 526       err_s = MPFR_GET_EXP(s) + (mp_exp_t)
 527         __gmpfr_ceil_log2 (2.0 * (double) m + 48.0);
 528       mpfr_add (s, s, t, GMP_RNDN); /* this is a subtraction in fact */
 529       /* the final error in ulp(s) is
 530          <= 1 + 2^(err_t-EXP(s)) + 2^(err_s-EXP(s))
 531          <= 2^(1+max(err_t,err_s)-EXP(s)) if err_t <> err_s
 532          <= 2^(2+max(err_t,err_s)-EXP(s)) if err_t = err_s */
 533       err_s = (err_t == err_s) ? 1 + err_s : ((err_t > err_s) ? err_t : err_s);
 534       err_s += 1 - MPFR_GET_EXP(s);
 535 #endif
 536     }
 537   while (MPFR_UNLIKELY (!MPFR_CAN_ROUND (s, w - err_s, precy, rnd)));
 538
 539   oldBm = Bm;
 540   while (Bm--)
 541     mpz_clear (B[Bm]);
 542   (*__gmp_free_func) (B, oldBm * sizeof (mpz_t));
 543
 544  end:
 545   inexact = mpfr_set (y, s, rnd);
 546
 547   mpfr_clear (s);
 548   mpfr_clear (t);
 549   mpfr_clear (u);
 550   mpfr_clear (v);
 551   mpfr_clear (z);
 552
 553   MPFR_SAVE_EXPO_FREE (expo);
 554   return mpfr_check_range (y, inexact, rnd);
 555 }
 556
 557 #ifndef IS_GAMMA
 558
 559 int
 560 mpfr_lngamma (mpfr_ptr y, mpfr_srcptr x, mp_rnd_t rnd)
 561 {
 562   int inex;
 563
 564   MPFR_LOG_FUNC (("x[%#R]=%R rnd=%d", x, x, rnd),
 565                  ("lngamma[%#R]=%R inexact=%d", y, y, inex));
 566
 567   /* special cases */
 568   if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
 569     {
 570       if (MPFR_IS_NAN (x) || MPFR_IS_NEG (x))
 571         {
 572           MPFR_SET_NAN (y);
 573           MPFR_RET_NAN;
 574         }
 575       else /* lngamma(+Inf) = lngamma(+0) = +Inf */
 576         {
 577           MPFR_SET_INF (y);
 578           MPFR_SET_POS (y);
 579           MPFR_RET (0);  /* exact */
 580         }
 581     }
 582
 583   /* if x < 0 and -2k-1 <= x <= -2k, then lngamma(x) = NaN */
 584   if (MPFR_IS_NEG (x) && (unit_bit (x) == 0 || mpfr_integer_p (x)))
 585     {
 586       MPFR_SET_NAN (y);
 587       MPFR_RET_NAN;
 588     }
 589
 590   inex = mpfr_lngamma_aux (y, x, rnd);
 591   return inex;
 592 }
 593
 594 int
 595 mpfr_lgamma (mpfr_ptr y, int *signp, mpfr_srcptr x, mp_rnd_t rnd)
 596 {
 597   int inex;
 598
 599   MPFR_LOG_FUNC (("x[%#R]=%R rnd=%d", x, x, rnd),
 600                  ("lgamma[%#R]=%R inexact=%d", y, y, inex));
 601
 602   *signp = 1;  /* most common case */
 603
 604   if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
 605     {
 606       if (MPFR_IS_NAN (x))
 607         {
 608           MPFR_SET_NAN (y);
 609           MPFR_RET_NAN;
 610         }
 611       else
 612         {
 613           *signp = MPFR_INT_SIGN (x);
 614           MPFR_SET_INF (y);
 615           MPFR_SET_POS (y);
 616           MPFR_RET (0);
 617         }
 618     }
 619
 620   if (MPFR_IS_NEG (x))
 621     {
 622       if (mpfr_integer_p (x))
 623         {
 624           MPFR_SET_INF (y);
 625           MPFR_SET_POS (y);
 626           MPFR_RET (0);
 627         }
 628
 629       if (unit_bit (x) == 0)
 630         *signp = -1;
 631
 632       /* For tiny negative x, we have gamma(x) = 1/x - euler + O(x),
 633          thus |gamma(x)| = -1/x + euler + O(x), and
 634          log |gamma(x)| = -log(-x) - euler*x + O(x^2).
 635          More precisely we have for -0.4 <= x < 0:
 636          -log(-x) <= log |gamma(x)| <= -log(-x) - x.
 637          Since log(x) is not representable, we may have an instance of the
 638          Table Maker Dilemma. The only way to ensure correct rounding is to
 639          compute an interval [l,h] such that l <= -log(-x) and
 640          -log(-x) - x <= h, and check whether l and h round to the same number
 641          for the target precision and rounding modes. */
 642       if (MPFR_EXP(x) + 1 <= - (mp_exp_t) MPFR_PREC(y))
 643         /* since PREC(y) >= 1, this ensures EXP(x) <= -2,
 644            thus |x| <= 0.25 < 0.4 */
 645         {
 646           mpfr_t l, h;
 647           int ok, inex2;
 648           mp_prec_t w = MPFR_PREC (y) + 14;
 649
 650           while (1)
 651             {
 652               mpfr_init2 (l, w);
 653               mpfr_init2 (h, w);
 654               /* we want a lower bound on -log(-x), thus an upper bound
 655                  on log(-x), thus an upper bound on -x. */
 656               mpfr_neg (l, x, GMP_RNDU); /* upper bound on -x */
 657               mpfr_log (l, l, GMP_RNDU); /* upper bound for log(-x) */
 658               mpfr_neg (l, l, GMP_RNDD); /* lower bound for -log(-x) */
 659               mpfr_neg (h, x, GMP_RNDD); /* lower bound on -x */
 660               mpfr_log (h, h, GMP_RNDD); /* lower bound on log(-x) */
 661               mpfr_neg (h, h, GMP_RNDU); /* upper bound for -log(-x) */
 662               mpfr_sub (h, h, x, GMP_RNDU); /* upper bound for -log(-x) - x */
 663               inex = mpfr_prec_round (l, MPFR_PREC (y), rnd);
 664               inex2 = mpfr_prec_round (h, MPFR_PREC (y), rnd);
 665               /* Caution: we not only need l = h, but both inexact flags
 666                  should agree. Indeed, one of the inexact flags might be
 667                  zero. In that case if we assume ln|gamma(x)| cannot be
 668                  exact, the other flag should be correct. We are conservative
 669                  here and request that both inexact flags agree. */
 670               ok = SAME_SIGN (inex, inex2) && mpfr_equal_p (l, h);
 671               if (ok)
 672                 mpfr_set (y, h, rnd); /* exact */
 673               mpfr_clear (l);
 674               mpfr_clear (h);
 675               if (ok)
 676                 return inex;
 677               /* if ulp(log(-x)) <= |x| there is no reason to loop,
 678                  since the width of [l, h] will be at least |x| */
 679               if (MPFR_EXP(l) < MPFR_EXP(x) + (mp_exp_t) w)
 680                 break;
 681               w += MPFR_INT_CEIL_LOG2(w) + 3;
 682             }
 683         }
 684     }
 685
 686   inex = mpfr_lngamma_aux (y, x, rnd);
 687   return inex;
 688 }
 689
 690 #endif