contrib/mpfr/src/agm.c

   1 /* mpfr_agm -- arithmetic-geometric mean of two floating-point numbers
   2
   3 Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
   4 Contributed by the AriC and Caramel projects, INRIA.
   5
   6 This file is part of the GNU MPFR Library.
   7
   8 The GNU MPFR Library is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU Lesser General Public License as published by
  10 the Free Software Foundation; either version 3 of the License, or (at your
  11 option) any later version.
  12
  13 The GNU MPFR Library is distributed in the hope that it will be useful, but
  14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  16 License for more details.
  17
  18 You should have received a copy of the GNU Lesser General Public License
  19 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
  20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
  21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
  22
  23 #define MPFR_NEED_LONGLONG_H
  24 #include "mpfr-impl.h"
  25
  26 /* agm(x,y) is between x and y, so we don't need to save exponent range */
  27 int
  28 mpfr_agm (mpfr_ptr r, mpfr_srcptr op2, mpfr_srcptr op1, mpfr_rnd_t rnd_mode)
  29 {
  30   int compare, inexact;
  31   mp_size_t s;
  32   mpfr_prec_t p, q;
  33   mp_limb_t *up, *vp, *ufp, *vfp;
  34   mpfr_t u, v, uf, vf, sc1, sc2;
  35   mpfr_exp_t scaleop = 0, scaleit;
  36   unsigned long n; /* number of iterations */
  37   MPFR_ZIV_DECL (loop);
  38   MPFR_TMP_DECL(marker);
  39   MPFR_SAVE_EXPO_DECL (expo);
  40
  41   MPFR_LOG_FUNC
  42     (("op2[%Pu]=%.*Rg op1[%Pu]=%.*Rg rnd=%d",
  43       mpfr_get_prec (op2), mpfr_log_prec, op2,
  44       mpfr_get_prec (op1), mpfr_log_prec, op1, rnd_mode),
  45      ("r[%Pu]=%.*Rg inexact=%d",
  46       mpfr_get_prec (r), mpfr_log_prec, r, inexact));
  47
  48   /* Deal with special values */
  49   if (MPFR_ARE_SINGULAR (op1, op2))
  50     {
  51       /* If a or b is NaN, the result is NaN */
  52       if (MPFR_IS_NAN(op1) || MPFR_IS_NAN(op2))
  53         {
  54           MPFR_SET_NAN(r);
  55           MPFR_RET_NAN;
  56         }
  57       /* now one of a or b is Inf or 0 */
  58       /* If a and b is +Inf, the result is +Inf.
  59          Otherwise if a or b is -Inf or 0, the result is NaN */
  60       else if (MPFR_IS_INF(op1) || MPFR_IS_INF(op2))
  61         {
  62           if (MPFR_IS_STRICTPOS(op1) && MPFR_IS_STRICTPOS(op2))
  63             {
  64               MPFR_SET_INF(r);
  65               MPFR_SET_SAME_SIGN(r, op1);
  66               MPFR_RET(0); /* exact */
  67             }
  68           else
  69             {
  70               MPFR_SET_NAN(r);
  71               MPFR_RET_NAN;
  72             }
  73         }
  74       else /* a and b are neither NaN nor Inf, and one is zero */
  75         {  /* If a or b is 0, the result is +0 since a sqrt is positive */
  76           MPFR_ASSERTD (MPFR_IS_ZERO (op1) || MPFR_IS_ZERO (op2));
  77           MPFR_SET_POS (r);
  78           MPFR_SET_ZERO (r);
  79           MPFR_RET (0); /* exact */
  80         }
  81     }
  82
  83   /* If a or b is negative (excluding -Infinity), the result is NaN */
  84   if (MPFR_UNLIKELY(MPFR_IS_NEG(op1) || MPFR_IS_NEG(op2)))
  85     {
  86       MPFR_SET_NAN(r);
  87       MPFR_RET_NAN;
  88     }
  89
  90   /* Precision of the following calculus */
  91   q = MPFR_PREC(r);
  92   p = q + MPFR_INT_CEIL_LOG2(q) + 15;
  93   MPFR_ASSERTD (p >= 7); /* see algorithms.tex */
  94   s = MPFR_PREC2LIMBS (p);
  95
  96   /* b (op2) and a (op1) are the 2 operands but we want b >= a */
  97   compare = mpfr_cmp (op1, op2);
  98   if (MPFR_UNLIKELY( compare == 0 ))
  99     {
 100       mpfr_set (r, op1, rnd_mode);
 101       MPFR_RET (0); /* exact */
 102     }
 103   else if (compare > 0)
 104     {
 105       mpfr_srcptr t = op1;
 106       op1 = op2;
 107       op2 = t;
 108     }
 109
 110   /* Now b (=op2) > a (=op1) */
 111
 112   MPFR_SAVE_EXPO_MARK (expo);
 113
 114   MPFR_TMP_MARK(marker);
 115
 116   /* Main loop */
 117   MPFR_ZIV_INIT (loop, p);
 118   for (;;)
 119     {
 120       mpfr_prec_t eq;
 121       unsigned long err = 0;  /* must be set to 0 at each Ziv iteration */
 122       MPFR_BLOCK_DECL (flags);
 123
 124       /* Init temporary vars */
 125       MPFR_TMP_INIT (up, u, p, s);
 126       MPFR_TMP_INIT (vp, v, p, s);
 127       MPFR_TMP_INIT (ufp, uf, p, s);
 128       MPFR_TMP_INIT (vfp, vf, p, s);
 129
 130       /* Calculus of un and vn */
 131     retry:
 132       MPFR_BLOCK (flags,
 133                   mpfr_mul (u, op1, op2, MPFR_RNDN);
 134                   /* mpfr_mul(...): faster since PREC(op) < PREC(u) */
 135                   mpfr_add (v, op1, op2, MPFR_RNDN);
 136                   /* mpfr_add with !=prec is still good */);
 137       if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags) || MPFR_UNDERFLOW (flags)))
 138         {
 139           mpfr_exp_t e1 , e2;
 140
 141           MPFR_ASSERTN (scaleop == 0);
 142           e1 = MPFR_GET_EXP (op1);
 143           e2 = MPFR_GET_EXP (op2);
 144
 145           /* Let's determine scaleop to avoid an overflow/underflow. */
 146           if (MPFR_OVERFLOW (flags))
 147             {
 148               /* Let's recall that emin <= e1 <= e2 <= emax.
 149                  There has been an overflow. Thus e2 >= emax/2.
 150                  If the mpfr_mul overflowed, then e1 + e2 > emax.
 151                  If the mpfr_add overflowed, then e2 = emax.
 152                  We want: (e1 + scale) + (e2 + scale) <= emax,
 153                  i.e. scale <= (emax - e1 - e2) / 2. Let's take
 154                  scale = min(floor((emax - e1 - e2) / 2), -1).
 155                  This is OK, as:
 156                  1. emin <= scale <= -1.
 157                  2. e1 + scale >= emin. Indeed:
 158                     * If e1 + e2 > emax, then
 159                       e1 + scale >= e1 + (emax - e1 - e2) / 2 - 1
 160                                  >= (emax + e1 - emax) / 2 - 1
 161                                  >= e1 / 2 - 1 >= emin.
 162                     * Otherwise, mpfr_mul didn't overflow, therefore
 163                       mpfr_add overflowed and e2 = emax, so that
 164                       e1 > emin (see restriction below).
 165                       e1 + scale > emin - 1, thus e1 + scale >= emin.
 166                  3. e2 + scale <= emax, since scale < 0. */
 167               if (e1 + e2 > MPFR_EXT_EMAX)
 168                 {
 169                   scaleop = - (((e1 + e2) - MPFR_EXT_EMAX + 1) / 2);
 170                   MPFR_ASSERTN (scaleop < 0);
 171                 }
 172               else
 173                 {
 174                   /* The addition necessarily overflowed. */
 175                   MPFR_ASSERTN (e2 == MPFR_EXT_EMAX);
 176                   /* The case where e1 = emin and e2 = emax is not supported
 177                      here. This would mean that the precision of e2 would be
 178                      huge (and possibly not supported in practice anyway). */
 179                   MPFR_ASSERTN (e1 > MPFR_EXT_EMIN);
 180                   scaleop = -1;
 181                 }
 182
 183             }
 184           else  /* underflow only (in the multiplication) */
 185             {
 186               /* We have e1 + e2 <= emin (so, e1 <= e2 <= 0).
 187                  We want: (e1 + scale) + (e2 + scale) >= emin + 1,
 188                  i.e. scale >= (emin + 1 - e1 - e2) / 2. let's take
 189                  scale = ceil((emin + 1 - e1 - e2) / 2). This is OK, as:
 190                  1. 1 <= scale <= emax.
 191                  2. e1 + scale >= emin + 1 >= emin.
 192                  3. e2 + scale <= scale <= emax. */
 193               MPFR_ASSERTN (e1 <= e2 && e2 <= 0);
 194               scaleop = (MPFR_EXT_EMIN + 2 - e1 - e2) / 2;
 195               MPFR_ASSERTN (scaleop > 0);
 196             }
 197
 198           MPFR_ALIAS (sc1, op1, MPFR_SIGN (op1), e1 + scaleop);
 199           MPFR_ALIAS (sc2, op2, MPFR_SIGN (op2), e2 + scaleop);
 200           op1 = sc1;
 201           op2 = sc2;
 202           MPFR_LOG_MSG (("Exception in pre-iteration, scale = %"
 203                          MPFR_EXP_FSPEC "d\n", scaleop));
 204           goto retry;
 205         }
 206
 207       mpfr_clear_flags ();
 208       mpfr_sqrt (u, u, MPFR_RNDN);
 209       mpfr_div_2ui (v, v, 1, MPFR_RNDN);
 210
 211       scaleit = 0;
 212       n = 1;
 213       while (mpfr_cmp2 (u, v, &eq) != 0 && eq <= p - 2)
 214         {
 215           MPFR_BLOCK_DECL (flags2);
 216
 217           MPFR_LOG_MSG (("Iteration n = %lu\n", n));
 218
 219         retry2:
 220           mpfr_add (vf, u, v, MPFR_RNDN);  /* No overflow? */
 221           mpfr_div_2ui (vf, vf, 1, MPFR_RNDN);
 222           /* See proof in algorithms.tex */
 223           if (4*eq > p)
 224             {
 225               mpfr_t w;
 226               MPFR_BLOCK_DECL (flags3);
 227
 228               MPFR_LOG_MSG (("4*eq > p\n", 0));
 229
 230               /* vf = V(k) */
 231               mpfr_init2 (w, (p + 1) / 2);
 232               MPFR_BLOCK
 233                 (flags3,
 234                  mpfr_sub (w, v, u, MPFR_RNDN);       /* e = V(k-1)-U(k-1) */
 235                  mpfr_sqr (w, w, MPFR_RNDN);          /* e = e^2 */
 236                  mpfr_div_2ui (w, w, 4, MPFR_RNDN);   /* e*= (1/2)^2*1/4  */
 237                  mpfr_div (w, w, vf, MPFR_RNDN);      /* 1/4*e^2/V(k) */
 238                  );
 239               if (MPFR_LIKELY (! MPFR_UNDERFLOW (flags3)))
 240                 {
 241                   mpfr_sub (v, vf, w, MPFR_RNDN);
 242                   err = MPFR_GET_EXP (vf) - MPFR_GET_EXP (v); /* 0 or 1 */
 243                   mpfr_clear (w);
 244                   break;
 245                 }
 246               /* There has been an underflow because of the cancellation
 247                  between V(k-1) and U(k-1). Let's use the conventional
 248                  method. */
 249               MPFR_LOG_MSG (("4*eq > p -> underflow\n", 0));
 250               mpfr_clear (w);
 251               mpfr_clear_underflow ();
 252             }
 253           /* U(k) increases, so that U.V can overflow (but not underflow). */
 254           MPFR_BLOCK (flags2, mpfr_mul (uf, u, v, MPFR_RNDN););
 255           if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags2)))
 256             {
 257               mpfr_exp_t scale2;
 258
 259               scale2 = - (((MPFR_GET_EXP (u) + MPFR_GET_EXP (v))
 260                            - MPFR_EXT_EMAX + 1) / 2);
 261               MPFR_EXP (u) += scale2;
 262               MPFR_EXP (v) += scale2;
 263               scaleit += scale2;
 264               MPFR_LOG_MSG (("Overflow in iteration n = %lu, scaleit = %"
 265                              MPFR_EXP_FSPEC "d (%" MPFR_EXP_FSPEC "d)\n",
 266                              n, scaleit, scale2));
 267               mpfr_clear_overflow ();
 268               goto retry2;
 269             }
 270           mpfr_sqrt (u, uf, MPFR_RNDN);
 271           mpfr_swap (v, vf);
 272           n ++;
 273         }
 274
 275       MPFR_LOG_MSG (("End of iterations (n = %lu)\n", n));
 276
 277       /* the error on v is bounded by (18n+51) ulps, or twice if there
 278          was an exponent loss in the final subtraction */
 279       err += MPFR_INT_CEIL_LOG2(18 * n + 51); /* 18n+51 should not overflow
 280                                                  since n is about log(p) */
 281       /* we should have n+2 <= 2^(p/4) [see algorithms.tex] */
 282       if (MPFR_LIKELY (MPFR_INT_CEIL_LOG2(n + 2) <= p / 4 &&
 283                        MPFR_CAN_ROUND (v, p - err, q, rnd_mode)))
 284         break; /* Stop the loop */
 285
 286       /* Next iteration */
 287       MPFR_ZIV_NEXT (loop, p);
 288       s = MPFR_PREC2LIMBS (p);
 289     }
 290   MPFR_ZIV_FREE (loop);
 291
 292   if (MPFR_UNLIKELY ((__gmpfr_flags & (MPFR_FLAGS_ALL ^ MPFR_FLAGS_INEXACT))
 293                      != 0))
 294     {
 295       MPFR_ASSERTN (! mpfr_overflow_p ());   /* since mpfr_clear_flags */
 296       MPFR_ASSERTN (! mpfr_underflow_p ());  /* since mpfr_clear_flags */
 297       MPFR_ASSERTN (! mpfr_divby0_p ());     /* since mpfr_clear_flags */
 298       MPFR_ASSERTN (! mpfr_nanflag_p ());    /* since mpfr_clear_flags */
 299     }
 300
 301   /* Setting of the result */
 302   inexact = mpfr_set (r, v, rnd_mode);
 303   MPFR_EXP (r) -= scaleop + scaleit;
 304
 305   /* Let's clean */
 306   MPFR_TMP_FREE(marker);
 307
 308   MPFR_SAVE_EXPO_FREE (expo);
 309   /* From the definition of the AGM, underflow and overflow
 310      are not possible. */
 311   return mpfr_check_range (r, inexact, rnd_mode);
 312   /* agm(u,v) can be exact for u, v rational only for u=v.
 313      Proof (due to Nicolas Brisebarre): it suffices to consider
 314      u=1 and v<1. Then 1/AGM(1,v) = 2F1(1/2,1/2,1;1-v^2),
 315      and a theorem due to G.V. Chudnovsky states that for x a
 316      non-zero algebraic number with |x|<1, then
 317      2F1(1/2,1/2,1;x) and 2F1(-1/2,1/2,1;x) are algebraically
 318      independent over Q. */
 319 }