Add ACPI_ENABLE_PCI option.
[dragonfly.git] / contrib / mpfr / mul.c
blobec1f757e5381514c20961d1810f3cfd4fcdc5d16
1 /* mpfr_mul -- multiply two floating-point numbers
3 Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
4 Contributed by the Arenaire and Cacao projects, INRIA.
6 This file is part of the GNU MPFR Library.
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or (at your
11 option) any later version.
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LIB. If not, write to
20 the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
21 MA 02110-1301, USA. */
23 #define MPFR_NEED_LONGLONG_H
24 #include "mpfr-impl.h"
27 /********* BEGINNING CHECK *************/
29 /* Check if we have to check the result of mpfr_mul.
30 TODO: Find a better (and faster?) check than using old implementation */
31 #ifdef WANT_ASSERT
32 # if WANT_ASSERT >= 3
34 int mpfr_mul2 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mp_rnd_t rnd_mode);
35 static int
36 mpfr_mul3 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mp_rnd_t rnd_mode)
38 /* Old implementation */
39 int sign_product, cc, inexact;
40 mp_exp_t ax;
41 mp_limb_t *tmp;
42 mp_limb_t b1;
43 mp_prec_t bq, cq;
44 mp_size_t bn, cn, tn, k;
45 MPFR_TMP_DECL(marker);
47 /* deal with special cases */
48 if (MPFR_ARE_SINGULAR(b,c))
50 if (MPFR_IS_NAN(b) || MPFR_IS_NAN(c))
52 MPFR_SET_NAN(a);
53 MPFR_RET_NAN;
55 sign_product = MPFR_MULT_SIGN( MPFR_SIGN(b) , MPFR_SIGN(c) );
56 if (MPFR_IS_INF(b))
58 if (MPFR_IS_INF(c) || MPFR_NOTZERO(c))
60 MPFR_SET_SIGN(a,sign_product);
61 MPFR_SET_INF(a);
62 MPFR_RET(0); /* exact */
64 else
66 MPFR_SET_NAN(a);
67 MPFR_RET_NAN;
70 else if (MPFR_IS_INF(c))
72 if (MPFR_NOTZERO(b))
74 MPFR_SET_SIGN(a, sign_product);
75 MPFR_SET_INF(a);
76 MPFR_RET(0); /* exact */
78 else
80 MPFR_SET_NAN(a);
81 MPFR_RET_NAN;
84 else
86 MPFR_ASSERTD(MPFR_IS_ZERO(b) || MPFR_IS_ZERO(c));
87 MPFR_SET_SIGN(a, sign_product);
88 MPFR_SET_ZERO(a);
89 MPFR_RET(0); /* 0 * 0 is exact */
92 MPFR_CLEAR_FLAGS(a);
93 sign_product = MPFR_MULT_SIGN( MPFR_SIGN(b) , MPFR_SIGN(c) );
95 ax = MPFR_GET_EXP (b) + MPFR_GET_EXP (c);
97 bq = MPFR_PREC(b);
98 cq = MPFR_PREC(c);
100 MPFR_ASSERTD(bq+cq > bq); /* PREC_MAX is /2 so no integer overflow */
102 bn = (bq+BITS_PER_MP_LIMB-1)/BITS_PER_MP_LIMB; /* number of limbs of b */
103 cn = (cq+BITS_PER_MP_LIMB-1)/BITS_PER_MP_LIMB; /* number of limbs of c */
104 k = bn + cn; /* effective nb of limbs used by b*c (= tn or tn+1) below */
105 tn = (bq + cq + BITS_PER_MP_LIMB - 1) / BITS_PER_MP_LIMB;
106 /* <= k, thus no int overflow */
107 MPFR_ASSERTD(tn <= k);
109 /* Check for no size_t overflow*/
110 MPFR_ASSERTD((size_t) k <= ((size_t) -1) / BYTES_PER_MP_LIMB);
111 MPFR_TMP_MARK(marker);
112 tmp = (mp_limb_t *) MPFR_TMP_ALLOC((size_t) k * BYTES_PER_MP_LIMB);
114 /* multiplies two mantissa in temporary allocated space */
115 b1 = (MPFR_LIKELY(bn >= cn)) ?
116 mpn_mul (tmp, MPFR_MANT(b), bn, MPFR_MANT(c), cn)
117 : mpn_mul (tmp, MPFR_MANT(c), cn, MPFR_MANT(b), bn);
119 /* now tmp[0]..tmp[k-1] contains the product of both mantissa,
120 with tmp[k-1]>=2^(BITS_PER_MP_LIMB-2) */
121 b1 >>= BITS_PER_MP_LIMB - 1; /* msb from the product */
123 /* if the mantissas of b and c are uniformly distributed in ]1/2, 1],
124 then their product is in ]1/4, 1/2] with probability 2*ln(2)-1 ~ 0.386
125 and in [1/2, 1] with probability 2-2*ln(2) ~ 0.614 */
126 tmp += k - tn;
127 if (MPFR_UNLIKELY(b1 == 0))
128 mpn_lshift (tmp, tmp, tn, 1); /* tn <= k, so no stack corruption */
129 cc = mpfr_round_raw (MPFR_MANT (a), tmp, bq + cq,
130 MPFR_IS_NEG_SIGN(sign_product),
131 MPFR_PREC (a), rnd_mode, &inexact);
133 /* cc = 1 ==> result is a power of two */
134 if (MPFR_UNLIKELY(cc))
135 MPFR_MANT(a)[MPFR_LIMB_SIZE(a)-1] = MPFR_LIMB_HIGHBIT;
137 MPFR_TMP_FREE(marker);
140 mp_exp_t ax2 = ax + (mp_exp_t) (b1 - 1 + cc);
141 if (MPFR_UNLIKELY( ax2 > __gmpfr_emax))
142 return mpfr_overflow (a, rnd_mode, sign_product);
143 if (MPFR_UNLIKELY( ax2 < __gmpfr_emin))
145 /* In the rounding to the nearest mode, if the exponent of the exact
146 result (i.e. before rounding, i.e. without taking cc into account)
147 is < __gmpfr_emin - 1 or the exact result is a power of 2 (i.e. if
148 both arguments are powers of 2), then round to zero. */
149 if (rnd_mode == GMP_RNDN &&
150 (ax + (mp_exp_t) b1 < __gmpfr_emin ||
151 (mpfr_powerof2_raw (b) && mpfr_powerof2_raw (c))))
152 rnd_mode = GMP_RNDZ;
153 return mpfr_underflow (a, rnd_mode, sign_product);
155 MPFR_SET_EXP (a, ax2);
156 MPFR_SET_SIGN(a, sign_product);
158 MPFR_RET (inexact);
162 mpfr_mul (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mp_rnd_t rnd_mode)
164 mpfr_t ta, tb, tc;
165 int inexact1, inexact2;
167 mpfr_init2 (ta, MPFR_PREC (a));
168 mpfr_init2 (tb, MPFR_PREC (b));
169 mpfr_init2 (tc, MPFR_PREC (c));
170 MPFR_ASSERTN (mpfr_set (tb, b, GMP_RNDN) == 0);
171 MPFR_ASSERTN (mpfr_set (tc, c, GMP_RNDN) == 0);
173 inexact2 = mpfr_mul3 (ta, tb, tc, rnd_mode);
174 inexact1 = mpfr_mul2 (a, b, c, rnd_mode);
175 if (mpfr_cmp (ta, a) || inexact1*inexact2 < 0
176 || (inexact1*inexact2 == 0 && (inexact1|inexact2) != 0))
178 fprintf (stderr, "mpfr_mul return different values for %s\n"
179 "Prec_a = %lu, Prec_b = %lu, Prec_c = %lu\nB = ",
180 mpfr_print_rnd_mode (rnd_mode),
181 MPFR_PREC (a), MPFR_PREC (b), MPFR_PREC (c));
182 mpfr_out_str (stderr, 16, 0, tb, GMP_RNDN);
183 fprintf (stderr, "\nC = ");
184 mpfr_out_str (stderr, 16, 0, tc, GMP_RNDN);
185 fprintf (stderr, "\nOldMul: ");
186 mpfr_out_str (stderr, 16, 0, ta, GMP_RNDN);
187 fprintf (stderr, "\nNewMul: ");
188 mpfr_out_str (stderr, 16, 0, a, GMP_RNDN);
189 fprintf (stderr, "\nNewInexact = %d | OldInexact = %d\n",
190 inexact1, inexact2);
191 MPFR_ASSERTN(0);
194 mpfr_clears (ta, tb, tc, (mpfr_ptr) 0);
195 return inexact1;
198 # define mpfr_mul mpfr_mul2
199 # endif
200 #endif
202 /****** END OF CHECK *******/
204 /* Multiply 2 mpfr_t */
207 mpfr_mul (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mp_rnd_t rnd_mode)
209 int sign, inexact;
210 mp_exp_t ax, ax2;
211 mp_limb_t *tmp;
212 mp_limb_t b1;
213 mp_prec_t bq, cq;
214 mp_size_t bn, cn, tn, k;
215 MPFR_TMP_DECL (marker);
217 MPFR_LOG_FUNC (("b[%#R]=%R c[%#R]=%R rnd=%d", b, b, c, c, rnd_mode),
218 ("a[%#R]=%R inexact=%d", a, a, inexact));
220 /* deal with special cases */
221 if (MPFR_ARE_SINGULAR (b, c))
223 if (MPFR_IS_NAN (b) || MPFR_IS_NAN (c))
225 MPFR_SET_NAN (a);
226 MPFR_RET_NAN;
228 sign = MPFR_MULT_SIGN (MPFR_SIGN (b), MPFR_SIGN (c));
229 if (MPFR_IS_INF (b))
231 if (!MPFR_IS_ZERO (c))
233 MPFR_SET_SIGN (a, sign);
234 MPFR_SET_INF (a);
235 MPFR_RET (0);
237 else
239 MPFR_SET_NAN (a);
240 MPFR_RET_NAN;
243 else if (MPFR_IS_INF (c))
245 if (!MPFR_IS_ZERO (b))
247 MPFR_SET_SIGN (a, sign);
248 MPFR_SET_INF (a);
249 MPFR_RET(0);
251 else
253 MPFR_SET_NAN (a);
254 MPFR_RET_NAN;
257 else
259 MPFR_ASSERTD (MPFR_IS_ZERO(b) || MPFR_IS_ZERO(c));
260 MPFR_SET_SIGN (a, sign);
261 MPFR_SET_ZERO (a);
262 MPFR_RET (0);
265 MPFR_CLEAR_FLAGS (a);
266 sign = MPFR_MULT_SIGN (MPFR_SIGN (b), MPFR_SIGN (c));
268 ax = MPFR_GET_EXP (b) + MPFR_GET_EXP (c);
269 /* Note: the exponent of the exact result will be e = bx + cx + ec with
270 ec in {-1,0,1} and the following assumes that e is representable. */
272 /* FIXME: Useful since we do an exponent check after ?
273 * It is useful iff the precision is big, there is an overflow
274 * and we are doing further mults...*/
275 #ifdef HUGE
276 if (MPFR_UNLIKELY (ax > __gmpfr_emax + 1))
277 return mpfr_overflow (a, rnd_mode, sign);
278 if (MPFR_UNLIKELY (ax < __gmpfr_emin - 2))
279 return mpfr_underflow (a, rnd_mode == GMP_RNDN ? GMP_RNDZ : rnd_mode,
280 sign);
281 #endif
283 bq = MPFR_PREC (b);
284 cq = MPFR_PREC (c);
286 MPFR_ASSERTD (bq+cq > bq); /* PREC_MAX is /2 so no integer overflow */
288 bn = (bq+BITS_PER_MP_LIMB-1)/BITS_PER_MP_LIMB; /* number of limbs of b */
289 cn = (cq+BITS_PER_MP_LIMB-1)/BITS_PER_MP_LIMB; /* number of limbs of c */
290 k = bn + cn; /* effective nb of limbs used by b*c (= tn or tn+1) below */
291 tn = (bq + cq + BITS_PER_MP_LIMB - 1) / BITS_PER_MP_LIMB;
292 MPFR_ASSERTD (tn <= k); /* tn <= k, thus no int overflow */
294 /* Check for no size_t overflow*/
295 MPFR_ASSERTD ((size_t) k <= ((size_t) -1) / BYTES_PER_MP_LIMB);
296 MPFR_TMP_MARK (marker);
297 tmp = (mp_limb_t *) MPFR_TMP_ALLOC ((size_t) k * BYTES_PER_MP_LIMB);
299 /* multiplies two mantissa in temporary allocated space */
300 if (MPFR_UNLIKELY (bn < cn))
302 mpfr_srcptr z = b;
303 mp_size_t zn = bn;
304 b = c;
305 bn = cn;
306 c = z;
307 cn = zn;
309 MPFR_ASSERTD (bn >= cn);
310 if (MPFR_LIKELY (bn <= 2))
312 if (bn == 1)
314 /* 1 limb * 1 limb */
315 umul_ppmm (tmp[1], tmp[0], MPFR_MANT (b)[0], MPFR_MANT (c)[0]);
316 b1 = tmp[1];
318 else if (MPFR_UNLIKELY (cn == 1))
320 /* 2 limbs * 1 limb */
321 mp_limb_t t;
322 umul_ppmm (tmp[1], tmp[0], MPFR_MANT (b)[0], MPFR_MANT (c)[0]);
323 umul_ppmm (tmp[2], t, MPFR_MANT (b)[1], MPFR_MANT (c)[0]);
324 add_ssaaaa (tmp[2], tmp[1], tmp[2], tmp[1], 0, t);
325 b1 = tmp[2];
327 else
329 /* 2 limbs * 2 limbs */
330 mp_limb_t t1, t2, t3;
331 /* First 2 limbs * 1 limb */
332 umul_ppmm (tmp[1], tmp[0], MPFR_MANT (b)[0], MPFR_MANT (c)[0]);
333 umul_ppmm (tmp[2], t1, MPFR_MANT (b)[1], MPFR_MANT (c)[0]);
334 add_ssaaaa (tmp[2], tmp[1], tmp[2], tmp[1], 0, t1);
335 /* Second, the other 2 limbs * 1 limb product */
336 umul_ppmm (t1, t2, MPFR_MANT (b)[0], MPFR_MANT (c)[1]);
337 umul_ppmm (tmp[3], t3, MPFR_MANT (b)[1], MPFR_MANT (c)[1]);
338 add_ssaaaa (tmp[3], t1, tmp[3], t1, 0, t3);
339 /* Sum those two partial products */
340 add_ssaaaa (tmp[2], tmp[1], tmp[2], tmp[1], t1, t2);
341 tmp[3] += (tmp[2] < t1);
342 b1 = tmp[3];
344 b1 >>= (BITS_PER_MP_LIMB - 1);
345 tmp += k - tn;
346 if (MPFR_UNLIKELY (b1 == 0))
347 mpn_lshift (tmp, tmp, tn, 1); /* tn <= k, so no stack corruption */
349 else
350 /* Mulders' mulhigh. Disable if squaring, since it is not tuned for
351 such a case */
352 if (MPFR_UNLIKELY (bn > MPFR_MUL_THRESHOLD && b != c))
354 mp_limb_t *bp, *cp;
355 mp_size_t n;
356 mp_prec_t p;
358 /* Fist check if we can reduce the precision of b or c:
359 exact values are a nightmare for the short product trick */
360 bp = MPFR_MANT (b);
361 cp = MPFR_MANT (c);
362 MPFR_ASSERTN (MPFR_MUL_THRESHOLD >= 1);
363 if (MPFR_UNLIKELY ((bp[0] == 0 && bp[1] == 0) ||
364 (cp[0] == 0 && cp[1] == 0)))
366 mpfr_t b_tmp, c_tmp;
368 MPFR_TMP_FREE (marker);
369 /* Check for b */
370 while (*bp == 0)
372 bp++;
373 bn--;
374 MPFR_ASSERTD (bn > 0);
375 } /* This must end since the MSL is != 0 */
377 /* Check for c too */
378 while (*cp == 0)
380 cp++;
381 cn--;
382 MPFR_ASSERTD (cn > 0);
383 } /* This must end since the MSL is != 0 */
385 /* It is not the faster way, but it is safer */
386 MPFR_SET_SAME_SIGN (b_tmp, b);
387 MPFR_SET_EXP (b_tmp, MPFR_GET_EXP (b));
388 MPFR_PREC (b_tmp) = bn * BITS_PER_MP_LIMB;
389 MPFR_MANT (b_tmp) = bp;
391 MPFR_SET_SAME_SIGN (c_tmp, c);
392 MPFR_SET_EXP (c_tmp, MPFR_GET_EXP (c));
393 MPFR_PREC (c_tmp) = cn * BITS_PER_MP_LIMB;
394 MPFR_MANT (c_tmp) = cp;
396 /* Call again mpfr_mul with the fixed arguments */
397 return mpfr_mul (a, b_tmp, c_tmp, rnd_mode);
400 /* Compute estimated precision of mulhigh.
401 We could use `+ (n < cn) + (n < bn)' instead of `+ 2',
402 but does it worth it? */
403 n = MPFR_LIMB_SIZE (a) + 1;
404 n = MIN (n, cn);
405 MPFR_ASSERTD (n >= 1 && 2*n <= k && n <= cn && n <= bn);
406 p = n * BITS_PER_MP_LIMB - MPFR_INT_CEIL_LOG2 (n + 2);
407 bp += bn - n;
408 cp += cn - n;
410 /* Check if MulHigh can produce a roundable result.
411 We may lost 1 bit due to RNDN, 1 due to final shift. */
412 if (MPFR_UNLIKELY (MPFR_PREC (a) > p - 5))
414 if (MPFR_UNLIKELY (MPFR_PREC (a) > p - 5 + BITS_PER_MP_LIMB
415 || bn <= MPFR_MUL_THRESHOLD+1))
417 /* MulHigh can't produce a roundable result. */
418 MPFR_LOG_MSG (("mpfr_mulhigh can't be used (%lu VS %lu)\n",
419 MPFR_PREC (a), p));
420 goto full_multiply;
422 /* Add one extra limb to mantissa of b and c. */
423 if (bn > n)
424 bp --;
425 else
427 bp = (mp_limb_t*) MPFR_TMP_ALLOC ((n+1) * sizeof (mp_limb_t));
428 bp[0] = 0;
429 MPN_COPY (bp + 1, MPFR_MANT (b) + bn - n, n);
431 if (cn > n)
432 cp --; /* FIXME: Could this happen? */
433 else
435 cp = (mp_limb_t*) MPFR_TMP_ALLOC ((n+1) * sizeof (mp_limb_t));
436 cp[0] = 0;
437 MPN_COPY (cp + 1, MPFR_MANT (c) + cn - n, n);
439 /* We will compute with one extra limb */
440 n++;
441 p = n * BITS_PER_MP_LIMB - MPFR_INT_CEIL_LOG2 (n + 2);
442 /* Due to some nasty reasons we can have only 4 bits */
443 MPFR_ASSERTD (MPFR_PREC (a) <= p - 4);
445 if (MPFR_LIKELY (k < 2*n))
447 tmp = (mp_limb_t*) MPFR_TMP_ALLOC (2 * n * sizeof (mp_limb_t));
448 tmp += 2*n-k; /* `tmp' still points to an area of `k' limbs */
451 MPFR_LOG_MSG (("Use mpfr_mulhigh (%lu VS %lu)\n", MPFR_PREC (a), p));
452 /* Compute an approximation of the product of b and c */
453 mpfr_mulhigh_n (tmp + k - 2 * n, bp, cp, n);
454 /* now tmp[0]..tmp[k-1] contains the product of both mantissa,
455 with tmp[k-1]>=2^(BITS_PER_MP_LIMB-2) */
456 b1 = tmp[k-1] >> (BITS_PER_MP_LIMB - 1); /* msb from the product */
458 /* If the mantissas of b and c are uniformly distributed in (1/2, 1],
459 then their product is in (1/4, 1/2] with probability 2*ln(2)-1
460 ~ 0.386 and in [1/2, 1] with probability 2-2*ln(2) ~ 0.614 */
461 if (MPFR_UNLIKELY (b1 == 0))
462 /* Warning: the mpfr_mulhigh_n call above only surely affects
463 tmp[k-n-1..k-1], thus we shift only those limbs */
464 mpn_lshift (tmp + k - n - 1, tmp + k - n - 1, n + 1, 1);
465 tmp += k - tn;
466 MPFR_ASSERTD (MPFR_LIMB_MSB (tmp[tn-1]) != 0);
468 if (MPFR_UNLIKELY (!mpfr_round_p (tmp, tn, p+b1-1, MPFR_PREC(a)
469 + (rnd_mode == GMP_RNDN))))
471 tmp -= k - tn; /* tmp may have changed, FIX IT!!!!! */
472 goto full_multiply;
475 else
477 full_multiply:
478 MPFR_LOG_MSG (("Use mpn_mul\n", 0));
479 b1 = mpn_mul (tmp, MPFR_MANT (b), bn, MPFR_MANT (c), cn);
481 /* now tmp[0]..tmp[k-1] contains the product of both mantissa,
482 with tmp[k-1]>=2^(BITS_PER_MP_LIMB-2) */
483 b1 >>= BITS_PER_MP_LIMB - 1; /* msb from the product */
485 /* if the mantissas of b and c are uniformly distributed in (1/2, 1],
486 then their product is in (1/4, 1/2] with probability 2*ln(2)-1
487 ~ 0.386 and in [1/2, 1] with probability 2-2*ln(2) ~ 0.614 */
488 tmp += k - tn;
489 if (MPFR_UNLIKELY (b1 == 0))
490 mpn_lshift (tmp, tmp, tn, 1); /* tn <= k, so no stack corruption */
493 ax2 = ax + (mp_exp_t) (b1 - 1);
494 MPFR_RNDRAW (inexact, a, tmp, bq+cq, rnd_mode, sign, ax2++);
495 MPFR_TMP_FREE (marker);
496 MPFR_EXP (a) = ax2; /* Can't use MPFR_SET_EXP: Expo may be out of range */
497 MPFR_SET_SIGN (a, sign);
498 if (MPFR_UNLIKELY (ax2 > __gmpfr_emax))
499 return mpfr_overflow (a, rnd_mode, sign);
500 if (MPFR_UNLIKELY (ax2 < __gmpfr_emin))
502 /* In the rounding to the nearest mode, if the exponent of the exact
503 result (i.e. before rounding, i.e. without taking cc into account)
504 is < __gmpfr_emin - 1 or the exact result is a power of 2 (i.e. if
505 both arguments are powers of 2), then round to zero. */
506 if (rnd_mode == GMP_RNDN
507 && (ax + (mp_exp_t) b1 < __gmpfr_emin
508 || (mpfr_powerof2_raw (b) && mpfr_powerof2_raw (c))))
509 rnd_mode = GMP_RNDZ;
510 return mpfr_underflow (a, rnd_mode, sign);
512 MPFR_RET (inexact);