1 /* mpfr_agm -- arithmetic-geometric mean of two floating-point numbers
3 Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramel projects, INRIA.
6 This file is part of the GNU MPFR Library.
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
23 #define MPFR_NEED_LONGLONG_H
24 #include "mpfr-impl.h"
26 /* agm(x,y) is between x and y, so we don't need to save exponent range */
28 mpfr_agm (mpfr_ptr r
, mpfr_srcptr op2
, mpfr_srcptr op1
, mpfr_rnd_t rnd_mode
)
33 mp_limb_t
*up
, *vp
, *ufp
, *vfp
;
34 mpfr_t u
, v
, uf
, vf
, sc1
, sc2
;
35 mpfr_exp_t scaleop
= 0, scaleit
;
36 unsigned long n
; /* number of iterations */
38 MPFR_TMP_DECL(marker
);
39 MPFR_SAVE_EXPO_DECL (expo
);
42 (("op2[%Pu]=%.*Rg op1[%Pu]=%.*Rg rnd=%d",
43 mpfr_get_prec (op2
), mpfr_log_prec
, op2
,
44 mpfr_get_prec (op1
), mpfr_log_prec
, op1
, rnd_mode
),
45 ("r[%Pu]=%.*Rg inexact=%d",
46 mpfr_get_prec (r
), mpfr_log_prec
, r
, inexact
));
48 /* Deal with special values */
49 if (MPFR_ARE_SINGULAR (op1
, op2
))
51 /* If a or b is NaN, the result is NaN */
52 if (MPFR_IS_NAN(op1
) || MPFR_IS_NAN(op2
))
57 /* now one of a or b is Inf or 0 */
58 /* If a and b is +Inf, the result is +Inf.
59 Otherwise if a or b is -Inf or 0, the result is NaN */
60 else if (MPFR_IS_INF(op1
) || MPFR_IS_INF(op2
))
62 if (MPFR_IS_STRICTPOS(op1
) && MPFR_IS_STRICTPOS(op2
))
65 MPFR_SET_SAME_SIGN(r
, op1
);
66 MPFR_RET(0); /* exact */
74 else /* a and b are neither NaN nor Inf, and one is zero */
75 { /* If a or b is 0, the result is +0 since a sqrt is positive */
76 MPFR_ASSERTD (MPFR_IS_ZERO (op1
) || MPFR_IS_ZERO (op2
));
79 MPFR_RET (0); /* exact */
83 /* If a or b is negative (excluding -Infinity), the result is NaN */
84 if (MPFR_UNLIKELY(MPFR_IS_NEG(op1
) || MPFR_IS_NEG(op2
)))
90 /* Precision of the following calculus */
92 p
= q
+ MPFR_INT_CEIL_LOG2(q
) + 15;
93 MPFR_ASSERTD (p
>= 7); /* see algorithms.tex */
94 s
= MPFR_PREC2LIMBS (p
);
96 /* b (op2) and a (op1) are the 2 operands but we want b >= a */
97 compare
= mpfr_cmp (op1
, op2
);
98 if (MPFR_UNLIKELY( compare
== 0 ))
100 mpfr_set (r
, op1
, rnd_mode
);
101 MPFR_RET (0); /* exact */
103 else if (compare
> 0)
110 /* Now b (=op2) > a (=op1) */
112 MPFR_SAVE_EXPO_MARK (expo
);
114 MPFR_TMP_MARK(marker
);
117 MPFR_ZIV_INIT (loop
, p
);
121 unsigned long err
= 0; /* must be set to 0 at each Ziv iteration */
122 MPFR_BLOCK_DECL (flags
);
124 /* Init temporary vars */
125 MPFR_TMP_INIT (up
, u
, p
, s
);
126 MPFR_TMP_INIT (vp
, v
, p
, s
);
127 MPFR_TMP_INIT (ufp
, uf
, p
, s
);
128 MPFR_TMP_INIT (vfp
, vf
, p
, s
);
130 /* Calculus of un and vn */
133 mpfr_mul (u
, op1
, op2
, MPFR_RNDN
);
134 /* mpfr_mul(...): faster since PREC(op) < PREC(u) */
135 mpfr_add (v
, op1
, op2
, MPFR_RNDN
);
136 /* mpfr_add with !=prec is still good */);
137 if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags
) || MPFR_UNDERFLOW (flags
)))
141 MPFR_ASSERTN (scaleop
== 0);
142 e1
= MPFR_GET_EXP (op1
);
143 e2
= MPFR_GET_EXP (op2
);
145 /* Let's determine scaleop to avoid an overflow/underflow. */
146 if (MPFR_OVERFLOW (flags
))
148 /* Let's recall that emin <= e1 <= e2 <= emax.
149 There has been an overflow. Thus e2 >= emax/2.
150 If the mpfr_mul overflowed, then e1 + e2 > emax.
151 If the mpfr_add overflowed, then e2 = emax.
152 We want: (e1 + scale) + (e2 + scale) <= emax,
153 i.e. scale <= (emax - e1 - e2) / 2. Let's take
154 scale = min(floor((emax - e1 - e2) / 2), -1).
156 1. emin <= scale <= -1.
157 2. e1 + scale >= emin. Indeed:
158 * If e1 + e2 > emax, then
159 e1 + scale >= e1 + (emax - e1 - e2) / 2 - 1
160 >= (emax + e1 - emax) / 2 - 1
161 >= e1 / 2 - 1 >= emin.
162 * Otherwise, mpfr_mul didn't overflow, therefore
163 mpfr_add overflowed and e2 = emax, so that
164 e1 > emin (see restriction below).
165 e1 + scale > emin - 1, thus e1 + scale >= emin.
166 3. e2 + scale <= emax, since scale < 0. */
167 if (e1
+ e2
> MPFR_EXT_EMAX
)
169 scaleop
= - (((e1
+ e2
) - MPFR_EXT_EMAX
+ 1) / 2);
170 MPFR_ASSERTN (scaleop
< 0);
174 /* The addition necessarily overflowed. */
175 MPFR_ASSERTN (e2
== MPFR_EXT_EMAX
);
176 /* The case where e1 = emin and e2 = emax is not supported
177 here. This would mean that the precision of e2 would be
178 huge (and possibly not supported in practice anyway). */
179 MPFR_ASSERTN (e1
> MPFR_EXT_EMIN
);
184 else /* underflow only (in the multiplication) */
186 /* We have e1 + e2 <= emin (so, e1 <= e2 <= 0).
187 We want: (e1 + scale) + (e2 + scale) >= emin + 1,
188 i.e. scale >= (emin + 1 - e1 - e2) / 2. let's take
189 scale = ceil((emin + 1 - e1 - e2) / 2). This is OK, as:
190 1. 1 <= scale <= emax.
191 2. e1 + scale >= emin + 1 >= emin.
192 3. e2 + scale <= scale <= emax. */
193 MPFR_ASSERTN (e1
<= e2
&& e2
<= 0);
194 scaleop
= (MPFR_EXT_EMIN
+ 2 - e1
- e2
) / 2;
195 MPFR_ASSERTN (scaleop
> 0);
198 MPFR_ALIAS (sc1
, op1
, MPFR_SIGN (op1
), e1
+ scaleop
);
199 MPFR_ALIAS (sc2
, op2
, MPFR_SIGN (op2
), e2
+ scaleop
);
202 MPFR_LOG_MSG (("Exception in pre-iteration, scale = %"
203 MPFR_EXP_FSPEC
"d\n", scaleop
));
208 mpfr_sqrt (u
, u
, MPFR_RNDN
);
209 mpfr_div_2ui (v
, v
, 1, MPFR_RNDN
);
213 while (mpfr_cmp2 (u
, v
, &eq
) != 0 && eq
<= p
- 2)
215 MPFR_BLOCK_DECL (flags2
);
217 MPFR_LOG_MSG (("Iteration n = %lu\n", n
));
220 mpfr_add (vf
, u
, v
, MPFR_RNDN
); /* No overflow? */
221 mpfr_div_2ui (vf
, vf
, 1, MPFR_RNDN
);
222 /* See proof in algorithms.tex */
226 MPFR_BLOCK_DECL (flags3
);
228 MPFR_LOG_MSG (("4*eq > p\n", 0));
231 mpfr_init2 (w
, (p
+ 1) / 2);
234 mpfr_sub (w
, v
, u
, MPFR_RNDN
); /* e = V(k-1)-U(k-1) */
235 mpfr_sqr (w
, w
, MPFR_RNDN
); /* e = e^2 */
236 mpfr_div_2ui (w
, w
, 4, MPFR_RNDN
); /* e*= (1/2)^2*1/4 */
237 mpfr_div (w
, w
, vf
, MPFR_RNDN
); /* 1/4*e^2/V(k) */
239 if (MPFR_LIKELY (! MPFR_UNDERFLOW (flags3
)))
241 mpfr_sub (v
, vf
, w
, MPFR_RNDN
);
242 err
= MPFR_GET_EXP (vf
) - MPFR_GET_EXP (v
); /* 0 or 1 */
246 /* There has been an underflow because of the cancellation
247 between V(k-1) and U(k-1). Let's use the conventional
249 MPFR_LOG_MSG (("4*eq > p -> underflow\n", 0));
251 mpfr_clear_underflow ();
253 /* U(k) increases, so that U.V can overflow (but not underflow). */
254 MPFR_BLOCK (flags2
, mpfr_mul (uf
, u
, v
, MPFR_RNDN
););
255 if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags2
)))
259 scale2
= - (((MPFR_GET_EXP (u
) + MPFR_GET_EXP (v
))
260 - MPFR_EXT_EMAX
+ 1) / 2);
261 MPFR_EXP (u
) += scale2
;
262 MPFR_EXP (v
) += scale2
;
264 MPFR_LOG_MSG (("Overflow in iteration n = %lu, scaleit = %"
265 MPFR_EXP_FSPEC
"d (%" MPFR_EXP_FSPEC
"d)\n",
266 n
, scaleit
, scale2
));
267 mpfr_clear_overflow ();
270 mpfr_sqrt (u
, uf
, MPFR_RNDN
);
275 MPFR_LOG_MSG (("End of iterations (n = %lu)\n", n
));
277 /* the error on v is bounded by (18n+51) ulps, or twice if there
278 was an exponent loss in the final subtraction */
279 err
+= MPFR_INT_CEIL_LOG2(18 * n
+ 51); /* 18n+51 should not overflow
280 since n is about log(p) */
281 /* we should have n+2 <= 2^(p/4) [see algorithms.tex] */
282 if (MPFR_LIKELY (MPFR_INT_CEIL_LOG2(n
+ 2) <= p
/ 4 &&
283 MPFR_CAN_ROUND (v
, p
- err
, q
, rnd_mode
)))
284 break; /* Stop the loop */
287 MPFR_ZIV_NEXT (loop
, p
);
288 s
= MPFR_PREC2LIMBS (p
);
290 MPFR_ZIV_FREE (loop
);
292 if (MPFR_UNLIKELY ((__gmpfr_flags
& (MPFR_FLAGS_ALL
^ MPFR_FLAGS_INEXACT
))
295 MPFR_ASSERTN (! mpfr_overflow_p ()); /* since mpfr_clear_flags */
296 MPFR_ASSERTN (! mpfr_underflow_p ()); /* since mpfr_clear_flags */
297 MPFR_ASSERTN (! mpfr_divby0_p ()); /* since mpfr_clear_flags */
298 MPFR_ASSERTN (! mpfr_nanflag_p ()); /* since mpfr_clear_flags */
301 /* Setting of the result */
302 inexact
= mpfr_set (r
, v
, rnd_mode
);
303 MPFR_EXP (r
) -= scaleop
+ scaleit
;
306 MPFR_TMP_FREE(marker
);
308 MPFR_SAVE_EXPO_FREE (expo
);
309 /* From the definition of the AGM, underflow and overflow
311 return mpfr_check_range (r
, inexact
, rnd_mode
);
312 /* agm(u,v) can be exact for u, v rational only for u=v.
313 Proof (due to Nicolas Brisebarre): it suffices to consider
314 u=1 and v<1. Then 1/AGM(1,v) = 2F1(1/2,1/2,1;1-v^2),
315 and a theorem due to G.V. Chudnovsky states that for x a
316 non-zero algebraic number with |x|<1, then
317 2F1(1/2,1/2,1;x) and 2F1(-1/2,1/2,1;x) are algebraically
318 independent over Q. */