amd64 - add kvtop and add back ed(4) to AMD64_GENERIC
[dragonfly.git] / contrib / mpfr / pow_z.c
blob3e88a9bf2533a76dcce33020ef94c1443f5e2c9b
1 /* mpfr_pow_z -- power function x^z with z a MPZ
3 Copyright 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
4 Contributed by the Arenaire and Cacao projects, INRIA.
6 This file is part of the GNU MPFR Library.
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or (at your
11 option) any later version.
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LIB. If not, write to
20 the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
21 MA 02110-1301, USA. */
23 #define MPFR_NEED_LONGLONG_H
24 #include "mpfr-impl.h"
26 /* y <- x^|z| with z != 0
27 if cr=1: ensures correct rounding of y
28 if cr=0: does not ensure correct rounding, but avoid spurious overflow
29 or underflow, and uses the precision of y as working precision (warning,
30 y and x might be the same variable). */
31 static int
32 mpfr_pow_pos_z (mpfr_ptr y, mpfr_srcptr x, mpz_srcptr z, mp_rnd_t rnd, int cr)
34 mpfr_t res;
35 mp_prec_t prec, err;
36 int inexact;
37 mp_rnd_t rnd1, rnd2;
38 mpz_t absz;
39 mp_size_t size_z;
40 MPFR_ZIV_DECL (loop);
41 MPFR_BLOCK_DECL (flags);
43 MPFR_LOG_FUNC (("x[%#R]=%R z=? rnd=%d cr=%d", x, x, rnd, cr),
44 ("y[%#R]=%R inexact=%d", y, y, inexact));
46 MPFR_ASSERTD (mpz_sgn (z) != 0);
48 if (MPFR_UNLIKELY (mpz_cmpabs_ui (z, 1) == 0))
49 return mpfr_set (y, x, rnd);
51 absz[0] = z[0];
52 SIZ (absz) = ABS(SIZ(absz)); /* Hack to get abs(z) */
53 MPFR_MPZ_SIZEINBASE2 (size_z, z);
55 /* round towards 1 (or -1) to avoid spurious overflow or underflow,
56 i.e. if an overflow or underflow occurs, it is a real exception
57 and is not just due to the rounding error. */
58 rnd1 = (MPFR_EXP(x) >= 1) ? GMP_RNDZ
59 : (MPFR_IS_POS(x) ? GMP_RNDU : GMP_RNDD);
60 rnd2 = (MPFR_EXP(x) >= 1) ? GMP_RNDD : GMP_RNDU;
62 if (cr != 0)
63 prec = MPFR_PREC (y) + 3 + size_z + MPFR_INT_CEIL_LOG2 (MPFR_PREC (y));
64 else
65 prec = MPFR_PREC (y);
66 mpfr_init2 (res, prec);
68 MPFR_ZIV_INIT (loop, prec);
69 for (;;)
71 unsigned int inexmul; /* will be non-zero if res may be inexact */
72 mp_size_t i = size_z;
74 /* now 2^(i-1) <= z < 2^i */
75 /* see below (case z < 0) for the error analysis, which is identical,
76 except if z=n, the maximal relative error is here 2(n-1)2^(-prec)
77 instead of 2(2n-1)2^(-prec) for z<0. */
78 MPFR_ASSERTD (prec > (mpfr_prec_t) i);
79 err = prec - 1 - (mpfr_prec_t) i;
81 MPFR_BLOCK (flags,
82 inexmul = mpfr_mul (res, x, x, rnd2);
83 MPFR_ASSERTD (i >= 2);
84 if (mpz_tstbit (absz, i - 2))
85 inexmul |= mpfr_mul (res, res, x, rnd1);
86 for (i -= 3; i >= 0 && !MPFR_BLOCK_EXCEP; i--)
88 inexmul |= mpfr_mul (res, res, res, rnd2);
89 if (mpz_tstbit (absz, i))
90 inexmul |= mpfr_mul (res, res, x, rnd1);
91 });
92 if (MPFR_LIKELY (inexmul == 0 || cr == 0
93 || MPFR_OVERFLOW (flags) || MPFR_UNDERFLOW (flags)
94 || MPFR_CAN_ROUND (res, err, MPFR_PREC (y), rnd)))
95 break;
96 /* Can't decide correct rounding, increase the precision */
97 MPFR_ZIV_NEXT (loop, prec);
98 mpfr_set_prec (res, prec);
100 MPFR_ZIV_FREE (loop);
102 /* Check Overflow */
103 if (MPFR_OVERFLOW (flags))
105 MPFR_LOG_MSG (("overflow\n", 0));
106 inexact = mpfr_overflow (y, rnd, mpz_odd_p (absz) ?
107 MPFR_SIGN (x) : MPFR_SIGN_POS);
109 /* Check Underflow */
110 else if (MPFR_UNDERFLOW (flags))
112 MPFR_LOG_MSG (("underflow\n", 0));
113 if (rnd == GMP_RNDN)
115 mpfr_t y2, zz;
117 /* We cannot decide now whether the result should be rounded
118 toward zero or +Inf. So, let's use the general case of
119 mpfr_pow, which can do that. But the problem is that the
120 result can be exact! However, it is sufficient to try to
121 round on 2 bits (the precision does not matter in case of
122 underflow, since MPFR does not have subnormals), in which
123 case, the result cannot be exact due to previous filtering
124 of trivial cases. */
125 MPFR_ASSERTD (mpfr_cmp_si_2exp (x, MPFR_SIGN (x),
126 MPFR_EXP (x) - 1) != 0);
127 mpfr_init2 (y2, 2);
128 mpfr_init2 (zz, ABS (SIZ (z)) * BITS_PER_MP_LIMB);
129 inexact = mpfr_set_z (zz, z, GMP_RNDN);
130 MPFR_ASSERTN (inexact == 0);
131 inexact = mpfr_pow_general (y2, x, zz, rnd, 1,
132 (mpfr_save_expo_t *) NULL);
133 mpfr_clear (zz);
134 mpfr_set (y, y2, GMP_RNDN);
135 mpfr_clear (y2);
136 __gmpfr_flags = MPFR_FLAGS_INEXACT | MPFR_FLAGS_UNDERFLOW;
138 else
140 inexact = mpfr_underflow (y, rnd, mpz_odd_p (absz) ?
141 MPFR_SIGN (x) : MPFR_SIGN_POS);
144 else
145 inexact = mpfr_set (y, res, rnd);
147 mpfr_clear (res);
148 return inexact;
151 /* The computation of y = pow(x,z) is done by
152 * y = set_ui(1) if z = 0
153 * y = pow_ui(x,z) if z > 0
154 * y = pow_ui(1/x,-z) if z < 0
156 * Note: in case z < 0, we could also compute 1/pow_ui(x,-z). However, in
157 * case MAX < 1/MIN, where MAX is the largest positive value, i.e.,
158 * MAX = nextbelow(+Inf), and MIN is the smallest positive value, i.e.,
159 * MIN = nextabove(+0), then x^(-z) might produce an overflow, whereas
160 * x^z is representable.
164 mpfr_pow_z (mpfr_ptr y, mpfr_srcptr x, mpz_srcptr z, mp_rnd_t rnd)
166 int inexact;
167 mpz_t tmp;
168 MPFR_SAVE_EXPO_DECL (expo);
170 MPFR_LOG_FUNC (("x[%#R]=%R z=? rnd=%d", x, x, rnd),
171 ("y[%#R]=%R inexact=%d", y, y, inexact));
173 /* x^0 = 1 for any x, even a NaN */
174 if (MPFR_UNLIKELY (mpz_sgn (z) == 0))
175 return mpfr_set_ui (y, 1, rnd);
177 if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x)))
179 if (MPFR_IS_NAN (x))
181 MPFR_SET_NAN (y);
182 MPFR_RET_NAN;
184 else if (MPFR_IS_INF (x))
186 /* Inf^n = Inf, (-Inf)^n = Inf for n even, -Inf for n odd */
187 /* Inf ^(-n) = 0, sign = + if x>0 or z even */
188 if (mpz_sgn (z) > 0)
189 MPFR_SET_INF (y);
190 else
191 MPFR_SET_ZERO (y);
192 if (MPFR_UNLIKELY (MPFR_IS_NEG (x) && mpz_odd_p (z)))
193 MPFR_SET_NEG (y);
194 else
195 MPFR_SET_POS (y);
196 MPFR_RET (0);
198 else /* x is zero */
200 MPFR_ASSERTD (MPFR_IS_ZERO(x));
201 if (mpz_sgn (z) > 0)
202 /* 0^n = +/-0 for any n */
203 MPFR_SET_ZERO (y);
204 else
205 /* 0^(-n) if +/- INF */
206 MPFR_SET_INF (y);
207 if (MPFR_LIKELY (MPFR_IS_POS (x) || mpz_even_p (z)))
208 MPFR_SET_POS (y);
209 else
210 MPFR_SET_NEG (y);
211 MPFR_RET(0);
215 /* detect exact powers: x^-n is exact iff x is a power of 2
216 Do it if n > 0 too as this is faster and this filtering is
217 needed in case of underflow. */
218 if (MPFR_UNLIKELY (mpfr_cmp_si_2exp (x, MPFR_SIGN (x),
219 MPFR_EXP (x) - 1) == 0))
221 mp_exp_t expx = MPFR_EXP (x); /* warning: x and y may be the same
222 variable */
224 MPFR_LOG_MSG (("x^n with x power of two\n", 0));
225 mpfr_set_si (y, mpz_odd_p (z) ? MPFR_INT_SIGN(x) : 1, rnd);
226 MPFR_ASSERTD (MPFR_IS_FP (y));
227 mpz_init (tmp);
228 mpz_mul_si (tmp, z, expx - 1);
229 MPFR_ASSERTD (MPFR_GET_EXP (y) == 1);
230 mpz_add_ui (tmp, tmp, 1);
231 inexact = 0;
232 if (MPFR_UNLIKELY (mpz_cmp_si (tmp, __gmpfr_emin) < 0))
234 MPFR_LOG_MSG (("underflow\n", 0));
235 /* |y| is a power of two, thus |y| <= 2^(emin-2), and in
236 rounding to nearest, the value must be rounded to 0. */
237 if (rnd == GMP_RNDN)
238 rnd = GMP_RNDZ;
239 inexact = mpfr_underflow (y, rnd, MPFR_SIGN (y));
241 else if (MPFR_UNLIKELY (mpz_cmp_si (tmp, __gmpfr_emax) > 0))
243 MPFR_LOG_MSG (("overflow\n", 0));
244 inexact = mpfr_overflow (y, rnd, MPFR_SIGN (y));
246 else
247 MPFR_SET_EXP (y, mpz_get_si (tmp));
248 mpz_clear (tmp);
249 MPFR_RET (inexact);
252 MPFR_SAVE_EXPO_MARK (expo);
254 if (mpz_sgn (z) > 0)
256 inexact = mpfr_pow_pos_z (y, x, z, rnd, 1);
257 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, __gmpfr_flags);
259 else
261 /* Declaration of the intermediary variable */
262 mpfr_t t;
263 mp_prec_t Nt; /* Precision of the intermediary variable */
264 mp_rnd_t rnd1;
265 mp_size_t size_z;
266 MPFR_ZIV_DECL (loop);
268 MPFR_MPZ_SIZEINBASE2 (size_z, z);
270 /* initial working precision */
271 Nt = MPFR_PREC (y);
272 Nt = Nt + size_z + 3 + MPFR_INT_CEIL_LOG2 (Nt);
273 /* ensures Nt >= bits(z)+2 */
275 /* initialise of intermediary variable */
276 mpfr_init2 (t, Nt);
278 /* We will compute rnd(rnd1(1/x) ^ (-z)), where rnd1 is the rounding
279 toward sign(x), to avoid spurious overflow or underflow. */
280 rnd1 = MPFR_EXP (x) < 1 ? GMP_RNDZ :
281 (MPFR_SIGN (x) > 0 ? GMP_RNDU : GMP_RNDD);
283 MPFR_ZIV_INIT (loop, Nt);
284 for (;;)
286 MPFR_BLOCK_DECL (flags);
288 /* compute (1/x)^(-z), -z>0 */
289 /* As emin = -emax, an underflow cannot occur in the division.
290 And if an overflow occurs, then this means that x^z overflows
291 too (since we have rounded toward 1 or -1). */
292 MPFR_BLOCK (flags, mpfr_ui_div (t, 1, x, rnd1));
293 MPFR_ASSERTD (! MPFR_UNDERFLOW (flags));
294 /* t = (1/x)*(1+theta) where |theta| <= 2^(-Nt) */
295 if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags)))
296 goto overflow;
297 MPFR_BLOCK (flags, mpfr_pow_pos_z (t, t, z, rnd, 0));
298 /* Now if z=-n, t = x^z*(1+theta)^(2n-1) where |theta| <= 2^(-Nt),
299 with theta maybe different from above. If (2n-1)*2^(-Nt) <= 1/2,
300 which is satisfied as soon as Nt >= bits(z)+2, then we can use
301 Lemma \ref{lemma_graillat} from algorithms.tex, which yields
302 t = x^z*(1+theta) with |theta| <= 2(2n-1)*2^(-Nt), thus the
303 error is bounded by 2(2n-1) ulps <= 2^(bits(z)+2) ulps. */
304 if (MPFR_UNLIKELY (MPFR_OVERFLOW (flags)))
306 overflow:
307 MPFR_ZIV_FREE (loop);
308 mpfr_clear (t);
309 MPFR_SAVE_EXPO_FREE (expo);
310 MPFR_LOG_MSG (("overflow\n", 0));
311 return mpfr_overflow (y, rnd,
312 mpz_odd_p (z) ? MPFR_SIGN (x) :
313 MPFR_SIGN_POS);
315 if (MPFR_UNLIKELY (MPFR_UNDERFLOW (flags)))
317 MPFR_ZIV_FREE (loop);
318 mpfr_clear (t);
319 MPFR_LOG_MSG (("underflow\n", 0));
320 if (rnd == GMP_RNDN)
322 mpfr_t y2, zz;
324 /* We cannot decide now whether the result should be
325 rounded toward zero or away from zero. So, like
326 in mpfr_pow_pos_z, let's use the general case of
327 mpfr_pow in precision 2. */
328 MPFR_ASSERTD (mpfr_cmp_si_2exp (x, MPFR_SIGN (x),
329 MPFR_EXP (x) - 1) != 0);
330 mpfr_init2 (y2, 2);
331 mpfr_init2 (zz, ABS (SIZ (z)) * BITS_PER_MP_LIMB);
332 inexact = mpfr_set_z (zz, z, GMP_RNDN);
333 MPFR_ASSERTN (inexact == 0);
334 inexact = mpfr_pow_general (y2, x, zz, rnd, 1,
335 (mpfr_save_expo_t *) NULL);
336 mpfr_clear (zz);
337 mpfr_set (y, y2, GMP_RNDN);
338 mpfr_clear (y2);
339 MPFR_SAVE_EXPO_UPDATE_FLAGS (expo, MPFR_FLAGS_UNDERFLOW);
340 goto end;
342 else
344 MPFR_SAVE_EXPO_FREE (expo);
345 return mpfr_underflow (y, rnd, mpz_odd_p (z) ?
346 MPFR_SIGN (x) : MPFR_SIGN_POS);
349 if (MPFR_LIKELY (MPFR_CAN_ROUND (t, Nt - size_z - 2, MPFR_PREC (y),
350 rnd)))
351 break;
352 /* actualisation of the precision */
353 MPFR_ZIV_NEXT (loop, Nt);
354 mpfr_set_prec (t, Nt);
356 MPFR_ZIV_FREE (loop);
358 inexact = mpfr_set (y, t, rnd);
359 mpfr_clear (t);
362 end:
363 MPFR_SAVE_EXPO_FREE (expo);
364 return mpfr_check_range (y, inexact, rnd);