contrib/mpfr/gamma.c

   1 /* mpfr_gamma -- gamma function
   2
   3 Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
   4 Contributed by the Arenaire and Cacao projects, INRIA.
   5
   6 This file is part of the GNU MPFR Library.
   7
   8 The GNU MPFR Library is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU Lesser General Public License as published by
  10 the Free Software Foundation; either version 2.1 of the License, or (at your
  11 option) any later version.
  12
  13 The GNU MPFR Library is distributed in the hope that it will be useful, but
  14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  16 License for more details.
  17
  18 You should have received a copy of the GNU Lesser General Public License
  19 along with the GNU MPFR Library; see the file COPYING.LIB.  If not, write to
  20 the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
  21 MA 02110-1301, USA. */
  22
  23 #define MPFR_NEED_LONGLONG_H
  24 #include "mpfr-impl.h"
  25
  26 #define IS_GAMMA
  27 #include "lngamma.c"
  28 #undef IS_GAMMA
  29
  30 /* return a sufficient precision such that 2-x is exact, assuming x < 0 */
  31 static mp_prec_t
  32 mpfr_gamma_2_minus_x_exact (mpfr_srcptr x)
  33 {
  34   /* Since x < 0, 2-x = 2+y with y := -x.
  35      If y < 2, a precision w >= PREC(y) + EXP(2)-EXP(y) = PREC(y) + 2 - EXP(y)
  36      is enough, since no overlap occurs in 2+y, so no carry happens.
  37      If y >= 2, either ULP(y) <= 2, and we need w >= PREC(y)+1 since a
  38      carry can occur, or ULP(y) > 2, and we need w >= EXP(y)-1:
  39      (a) if EXP(y) <= 1, w = PREC(y) + 2 - EXP(y)
  40      (b) if EXP(y) > 1 and EXP(y)-PREC(y) <= 1, w = PREC(y) + 1
  41      (c) if EXP(y) > 1 and EXP(y)-PREC(y) > 1, w = EXP(y) - 1 */
  42   return (MPFR_GET_EXP(x) <= 1) ? MPFR_PREC(x) + 2 - MPFR_GET_EXP(x)
  43     : ((MPFR_GET_EXP(x) <= MPFR_PREC(x) + 1) ? MPFR_PREC(x) + 1
  44        : MPFR_GET_EXP(x) - 1);
  45 }
  46
  47 /* return a sufficient precision such that 1-x is exact, assuming x < 1 */
  48 static mp_prec_t
  49 mpfr_gamma_1_minus_x_exact (mpfr_srcptr x)
  50 {
  51   if (MPFR_IS_POS(x))
  52     return MPFR_PREC(x) - MPFR_GET_EXP(x);
  53   else if (MPFR_GET_EXP(x) <= 0)
  54     return MPFR_PREC(x) + 1 - MPFR_GET_EXP(x);
  55   else if (MPFR_PREC(x) >= MPFR_GET_EXP(x))
  56     return MPFR_PREC(x) + 1;
  57   else
  58     return MPFR_GET_EXP(x);
  59 }
  60
  61 /* returns a lower bound of the number of significant bits of n!
  62    (not counting the low zero bits).
  63    We know n! >= (n/e)^n*sqrt(2*Pi*n) for n >= 1, and the number of zero bits
  64    is floor(n/2) + floor(n/4) + floor(n/8) + ...
  65    This approximation is exact for n <= 500000, except for n = 219536, 235928,
  66    298981, 355854, 464848, 493725, 498992 where it returns a value 1 too small.
  67 */
  68 static unsigned long
  69 bits_fac (unsigned long n)
  70 {
  71   mpfr_t x, y;
  72   unsigned long r, k;
  73   mpfr_init2 (x, 38);
  74   mpfr_init2 (y, 38);
  75   mpfr_set_ui (x, n, GMP_RNDZ);
  76   mpfr_set_str_binary (y, "10.101101111110000101010001011000101001"); /* upper bound of e */
  77   mpfr_div (x, x, y, GMP_RNDZ);
  78   mpfr_pow_ui (x, x, n, GMP_RNDZ);
  79   mpfr_const_pi (y, GMP_RNDZ);
  80   mpfr_mul_ui (y, y, 2 * n, GMP_RNDZ);
  81   mpfr_sqrt (y, y, GMP_RNDZ);
  82   mpfr_mul (x, x, y, GMP_RNDZ);
  83   mpfr_log2 (x, x, GMP_RNDZ);
  84   r = mpfr_get_ui (x, GMP_RNDU);
  85   for (k = 2; k <= n; k *= 2)
  86     r -= n / k;
  87   mpfr_clear (x);
  88   mpfr_clear (y);
  89   return r;
  90 }
  91
  92 /* We use the reflection formula
  93   Gamma(1+t) Gamma(1-t) = - Pi t / sin(Pi (1 + t))
  94   in order to treat the case x <= 1,
  95   i.e. with x = 1-t, then Gamma(x) = -Pi*(1-x)/sin(Pi*(2-x))/GAMMA(2-x)
  96 */
  97 int
  98 mpfr_gamma (mpfr_ptr gamma, mpfr_srcptr x, mp_rnd_t rnd_mode)
  99 {
 100   mpfr_t xp, GammaTrial, tmp, tmp2;
 101   mpz_t fact;
 102   mp_prec_t realprec;
 103   int compared, inex, is_integer;
 104   MPFR_GROUP_DECL (group);
 105   MPFR_SAVE_EXPO_DECL (expo);
 106   MPFR_ZIV_DECL (loop);
 107
 108   MPFR_LOG_FUNC (("x[%#R]=%R rnd=%d", x, x, rnd_mode),
 109                  ("gamma[%#R]=%R inexact=%d", gamma, gamma, inex));
 110
 111   /* Trivial cases */
 112   if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
 113     {
 114       if (MPFR_IS_NAN (x))
 115         {
 116           MPFR_SET_NAN (gamma);
 117           MPFR_RET_NAN;
 118         }
 119       else if (MPFR_IS_INF (x))
 120         {
 121           if (MPFR_IS_NEG (x))
 122             {
 123               MPFR_SET_NAN (gamma);
 124               MPFR_RET_NAN;
 125             }
 126           else
 127             {
 128               MPFR_SET_INF (gamma);
 129               MPFR_SET_POS (gamma);
 130               MPFR_RET (0);  /* exact */
 131             }
 132         }
 133       else /* x is zero */
 134         {
 135           MPFR_ASSERTD(MPFR_IS_ZERO(x));
 136           MPFR_SET_INF(gamma);
 137           MPFR_SET_SAME_SIGN(gamma, x);
 138           MPFR_RET (0);  /* exact */
 139         }
 140     }
 141
 142   /* Check for tiny arguments, where gamma(x) ~ 1/x - euler + ....
 143      We know from "Bound on Runs of Zeros and Ones for Algebraic Functions",
 144      Proceedings of Arith15, T. Lang and J.-M. Muller, 2001, that the maximal
 145      number of consecutive zeroes or ones after the round bit is n-1 for an
 146      input of n bits. But we need a more precise lower bound. Assume x has
 147      n bits, and 1/x is near a floating-point number y of n+1 bits. We can
 148      write x = X*2^e, y = Y/2^f with X, Y integers of n and n+1 bits.
 149      Thus X*Y^2^(e-f) is near from 1, i.e., X*Y is near from 2^(f-e).
 150      Two cases can happen:
 151      (i) either X*Y is exactly 2^(f-e), but this can happen only if X and Y
 152          are themselves powers of two, i.e., x is a power of two;
 153      (ii) or X*Y is at distance at least one from 2^(f-e), thus
 154           |xy-1| >= 2^(e-f), or |y-1/x| >= 2^(e-f)/x = 2^(-f)/X >= 2^(-f-n).
 155           Since ufp(y) = 2^(n-f) [ufp = unit in first place], this means
 156           that the distance |y-1/x| >= 2^(-2n) ufp(y).
 157           Now assuming |gamma(x)-1/x| <= 1, which is true for x <= 1,
 158           if 2^(-2n) ufp(y) >= 2, the error is at most 2^(-2n-1) ufp(y),
 159           and round(1/x) with precision >= 2n+2 gives the correct result.
 160           If x < 2^E, then y > 2^(-E), thus ufp(y) > 2^(-E-1).
 161           A sufficient condition is thus EXP(x) + 2 <= -2 MAX(PREC(x),PREC(Y)).
 162   */
 163   if (MPFR_EXP(x) + 2 <= -2 * (mp_exp_t) MAX(MPFR_PREC(x), MPFR_PREC(gamma)))
 164     {
 165       int positive = MPFR_IS_POS (x);
 166       inex = mpfr_ui_div (gamma, 1, x, rnd_mode);
 167       if (inex == 0) /* x is a power of two */
 168         {
 169           if (positive)
 170             {
 171               if (rnd_mode == GMP_RNDU || rnd_mode == GMP_RNDN)
 172                 inex = 1;
 173               else /* round to zero or to -Inf */
 174                 {
 175                   mpfr_nextbelow (gamma); /* 2^k - epsilon */
 176                   inex = -1;
 177                 }
 178             }
 179           else /* negative */
 180             {
 181               if (rnd_mode == GMP_RNDU || rnd_mode == GMP_RNDZ)
 182                 {
 183                   mpfr_nextabove (gamma); /* -2^k + epsilon */
 184                   inex = 1;
 185                 }
 186               else /* round to nearest and to -Inf */
 187                 inex = -1;
 188             }
 189         }
 190       return inex;
 191     }
 192
 193   is_integer = mpfr_integer_p (x);
 194   /* gamma(x) for x a negative integer gives NaN */
 195   if (is_integer && MPFR_IS_NEG(x))
 196     {
 197       MPFR_SET_NAN (gamma);
 198       MPFR_RET_NAN;
 199     }
 200
 201   compared = mpfr_cmp_ui (x, 1);
 202   if (compared == 0)
 203     return mpfr_set_ui (gamma, 1, rnd_mode);
 204
 205   /* if x is an integer that fits into an unsigned long, use mpfr_fac_ui
 206      if argument is not too large.
 207      If precision is p, fac_ui costs O(u*p), whereas gamma costs O(p*M(p)),
 208      so for u <= M(p), fac_ui should be faster.
 209      We approximate here M(p) by p*log(p)^2, which is not a bad guess.
 210      Warning: since the generic code does not handle exact cases,
 211      we want all cases where gamma(x) is exact to be treated here.
 212   */
 213   if (is_integer && mpfr_fits_ulong_p (x, GMP_RNDN))
 214     {
 215       unsigned long int u;
 216       mp_prec_t p = MPFR_PREC(gamma);
 217       u = mpfr_get_ui (x, GMP_RNDN);
 218       if (u < 44787929UL && bits_fac (u - 1) <= p + (rnd_mode == GMP_RNDN))
 219         /* bits_fac: lower bound on the number of bits of m,
 220            where gamma(x) = (u-1)! = m*2^e with m odd. */
 221         return mpfr_fac_ui (gamma, u - 1, rnd_mode);
 222       /* if bits_fac(...) > p (resp. p+1 for rounding to nearest),
 223          then gamma(x) cannot be exact in precision p (resp. p+1).
 224          FIXME: remove the test u < 44787929UL after changing bits_fac
 225          to return a mpz_t or mpfr_t. */
 226     }
 227
 228   /* check for overflow: according to (6.1.37) in Abramowitz & Stegun,
 229      gamma(x) >= exp(-x) * x^(x-1/2) * sqrt(2*Pi)
 230               >= 2 * (x/e)^x / x for x >= 1 */
 231   if (compared > 0)
 232     {
 233       mpfr_t yp;
 234       MPFR_BLOCK_DECL (flags);
 235
 236       /* 1/e rounded down to 53 bits */
 237 #define EXPM1_STR "0.010111100010110101011000110110001011001110111100111"
 238       mpfr_init2 (xp, 53);
 239       mpfr_init2 (yp, 53);
 240       mpfr_set_str_binary (xp, EXPM1_STR);
 241       mpfr_mul (xp, x, xp, GMP_RNDZ);
 242       mpfr_sub_ui (yp, x, 2, GMP_RNDZ);
 243       mpfr_pow (xp, xp, yp, GMP_RNDZ); /* (x/e)^(x-2) */
 244       mpfr_set_str_binary (yp, EXPM1_STR);
 245       mpfr_mul (xp, xp, yp, GMP_RNDZ); /* x^(x-2) / e^(x-1) */
 246       mpfr_mul (xp, xp, yp, GMP_RNDZ); /* x^(x-2) / e^x */
 247       mpfr_mul (xp, xp, x, GMP_RNDZ); /* lower bound on x^(x-1) / e^x */
 248       MPFR_BLOCK (flags, mpfr_mul_2ui (xp, xp, 1, GMP_RNDZ));
 249       mpfr_clear (xp);
 250       mpfr_clear (yp);
 251       return MPFR_OVERFLOW (flags) ? mpfr_overflow (gamma, rnd_mode, 1)
 252         : mpfr_gamma_aux (gamma, x, rnd_mode);
 253     }
 254
 255   /* now compared < 0 */
 256
 257   MPFR_SAVE_EXPO_MARK (expo);
 258
 259   /* check for underflow: for x < 1,
 260      gamma(x) = Pi*(x-1)/sin(Pi*(2-x))/gamma(2-x).
 261      Since gamma(2-x) >= 2 * ((2-x)/e)^(2-x) / (2-x), we have
 262      |gamma(x)| <= Pi*(1-x)*(2-x)/2/((2-x)/e)^(2-x) / |sin(Pi*(2-x))|
 263                 <= 12 * ((2-x)/e)^x / |sin(Pi*(2-x))|.
 264      To avoid an underflow in ((2-x)/e)^x, we compute the logarithm.
 265   */
 266   if (MPFR_IS_NEG(x))
 267     {
 268       int underflow = 0, sgn, ck;
 269       mp_prec_t w;
 270
 271       mpfr_init2 (xp, 53);
 272       mpfr_init2 (tmp, 53);
 273       mpfr_init2 (tmp2, 53);
 274       /* we want an upper bound for x * [log(2-x)-1].
 275          since x < 0, we need a lower bound on log(2-x) */
 276       mpfr_ui_sub (xp, 2, x, GMP_RNDD);
 277       mpfr_log (xp, xp, GMP_RNDD);
 278       mpfr_sub_ui (xp, xp, 1, GMP_RNDD);
 279       mpfr_mul (xp, xp, x, GMP_RNDU);
 280
 281       /* we need an upper bound on 1/|sin(Pi*(2-x))|,
 282          thus a lower bound on |sin(Pi*(2-x))|.
 283          If 2-x is exact, then the error of Pi*(2-x) is (1+u)^2 with u = 2^(-p)
 284          thus the error on sin(Pi*(2-x)) is less than 1/2ulp + 3Pi(2-x)u,
 285          assuming u <= 1, thus <= u + 3Pi(2-x)u */
 286
 287       w = mpfr_gamma_2_minus_x_exact (x); /* 2-x is exact for prec >= w */
 288       w += 17; /* to get tmp2 small enough */
 289       mpfr_set_prec (tmp, w);
 290       mpfr_set_prec (tmp2, w);
 291       ck = mpfr_ui_sub (tmp, 2, x, GMP_RNDN);
 292       MPFR_ASSERTD (ck == 0);
 293       mpfr_const_pi (tmp2, GMP_RNDN);
 294       mpfr_mul (tmp2, tmp2, tmp, GMP_RNDN); /* Pi*(2-x) */
 295       mpfr_sin (tmp, tmp2, GMP_RNDN); /* sin(Pi*(2-x)) */
 296       sgn = mpfr_sgn (tmp);
 297       mpfr_abs (tmp, tmp, GMP_RNDN);
 298       mpfr_mul_ui (tmp2, tmp2, 3, GMP_RNDU); /* 3Pi(2-x) */
 299       mpfr_add_ui (tmp2, tmp2, 1, GMP_RNDU); /* 3Pi(2-x)+1 */
 300       mpfr_div_2ui (tmp2, tmp2, mpfr_get_prec (tmp), GMP_RNDU);
 301       /* if tmp2<|tmp|, we get a lower bound */
 302       if (mpfr_cmp (tmp2, tmp) < 0)
 303         {
 304           mpfr_sub (tmp, tmp, tmp2, GMP_RNDZ); /* low bnd on |sin(Pi*(2-x))| */
 305           mpfr_ui_div (tmp, 12, tmp, GMP_RNDU); /* upper bound */
 306           mpfr_log (tmp, tmp, GMP_RNDU);
 307           mpfr_add (tmp, tmp, xp, GMP_RNDU);
 308           underflow = mpfr_cmp_si (xp, expo.saved_emin - 2) <= 0;
 309         }
 310
 311       mpfr_clear (xp);
 312       mpfr_clear (tmp);
 313       mpfr_clear (tmp2);
 314       if (underflow) /* the sign is the opposite of that of sin(Pi*(2-x)) */
 315         {
 316           MPFR_SAVE_EXPO_FREE (expo);
 317           return mpfr_underflow (gamma, (rnd_mode == GMP_RNDN) ? GMP_RNDZ : rnd_mode, -sgn);
 318         }
 319     }
 320
 321   realprec = MPFR_PREC (gamma);
 322   /* we want both 1-x and 2-x to be exact */
 323   {
 324     mp_prec_t w;
 325     w = mpfr_gamma_1_minus_x_exact (x);
 326     if (realprec < w)
 327       realprec = w;
 328     w = mpfr_gamma_2_minus_x_exact (x);
 329     if (realprec < w)
 330       realprec = w;
 331   }
 332   realprec = realprec + MPFR_INT_CEIL_LOG2 (realprec) + 20;
 333   MPFR_ASSERTD(realprec >= 5);
 334
 335   MPFR_GROUP_INIT_4 (group, realprec + MPFR_INT_CEIL_LOG2 (realprec) + 20,
 336                      xp, tmp, tmp2, GammaTrial);
 337   mpz_init (fact);
 338   MPFR_ZIV_INIT (loop, realprec);
 339   for (;;)
 340     {
 341       mp_exp_t err_g;
 342       int ck;
 343       MPFR_GROUP_REPREC_4 (group, realprec, xp, tmp, tmp2, GammaTrial);
 344
 345       /* reflection formula: gamma(x) = Pi*(x-1)/sin(Pi*(2-x))/gamma(2-x) */
 346
 347       ck = mpfr_ui_sub (xp, 2, x, GMP_RNDN);
 348       MPFR_ASSERTD(ck == 0); /* 2-x, exact */
 349       mpfr_gamma (tmp, xp, GMP_RNDN);   /* gamma(2-x), error (1+u) */
 350       mpfr_const_pi (tmp2, GMP_RNDN);   /* Pi, error (1+u) */
 351       mpfr_mul (GammaTrial, tmp2, xp, GMP_RNDN); /* Pi*(2-x), error (1+u)^2 */
 352       err_g = MPFR_GET_EXP(GammaTrial);
 353       mpfr_sin (GammaTrial, GammaTrial, GMP_RNDN); /* sin(Pi*(2-x)) */
 354       err_g = err_g + 1 - MPFR_GET_EXP(GammaTrial);
 355       /* let g0 the true value of Pi*(2-x), g the computed value.
 356          We have g = g0 + h with |h| <= |(1+u^2)-1|*g.
 357          Thus sin(g) = sin(g0) + h' with |h'| <= |(1+u^2)-1|*g.
 358          The relative error is thus bounded by |(1+u^2)-1|*g/sin(g)
 359          <= |(1+u^2)-1|*2^err_g. <= 2.25*u*2^err_g for |u|<=1/4.
 360          With the rounding error, this gives (0.5 + 2.25*2^err_g)*u. */
 361       ck = mpfr_sub_ui (xp, x, 1, GMP_RNDN);
 362       MPFR_ASSERTD(ck == 0); /* x-1, exact */
 363       mpfr_mul (xp, tmp2, xp, GMP_RNDN); /* Pi*(x-1), error (1+u)^2 */
 364       mpfr_mul (GammaTrial, GammaTrial, tmp, GMP_RNDN);
 365       /* [1 + (0.5 + 2.25*2^err_g)*u]*(1+u)^2 = 1 + (2.5 + 2.25*2^err_g)*u
 366          + (0.5 + 2.25*2^err_g)*u*(2u+u^2) + u^2.
 367          For err_g <= realprec-2, we have (0.5 + 2.25*2^err_g)*u <=
 368          0.5*u + 2.25/4 <= 0.6875 and u^2 <= u/4, thus
 369          (0.5 + 2.25*2^err_g)*u*(2u+u^2) + u^2 <= 0.6875*(2u+u/4) + u/4
 370          <= 1.8*u, thus the rel. error is bounded by (4.5 + 2.25*2^err_g)*u. */
 371       mpfr_div (GammaTrial, xp, GammaTrial, GMP_RNDN);
 372       /* the error is of the form (1+u)^3/[1 + (4.5 + 2.25*2^err_g)*u].
 373          For realprec >= 5 and err_g <= realprec-2, [(4.5 + 2.25*2^err_g)*u]^2
 374          <= 0.71, and for |y|<=0.71, 1/(1-y) can be written 1+a*y with a<=4.
 375          (1+u)^3 * (1+4*(4.5 + 2.25*2^err_g)*u)
 376          = 1 + (21 + 9*2^err_g)*u + (57+27*2^err_g)*u^2 + (55+27*2^err_g)*u^3
 377              + (18+9*2^err_g)*u^4
 378          <= 1 + (21 + 9*2^err_g)*u + (57+27*2^err_g)*u^2 + (56+28*2^err_g)*u^3
 379          <= 1 + (21 + 9*2^err_g)*u + (59+28*2^err_g)*u^2
 380          <= 1 + (23 + 10*2^err_g)*u.
 381          The final error is thus bounded by (23 + 10*2^err_g) ulps,
 382          which is <= 2^6 for err_g<=2, and <= 2^(err_g+4) for err_g >= 2. */
 383       err_g = (err_g <= 2) ? 6 : err_g + 4;
 384
 385       if (MPFR_LIKELY (MPFR_CAN_ROUND (GammaTrial, realprec - err_g,
 386                                        MPFR_PREC(gamma), rnd_mode)))
 387         break;
 388       MPFR_ZIV_NEXT (loop, realprec);
 389     }
 390   MPFR_ZIV_FREE (loop);
 391
 392   inex = mpfr_set (gamma, GammaTrial, rnd_mode);
 393   MPFR_GROUP_CLEAR (group);
 394   mpz_clear (fact);
 395
 396   MPFR_SAVE_EXPO_FREE (expo);
 397   return mpfr_check_range (gamma, inex, rnd_mode);
 398 }