1 /* mpfr_mul -- multiply two floating-point numbers
3 Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramel projects, INRIA.
6 This file is part of the GNU MPFR Library.
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
23 #define MPFR_NEED_LONGLONG_H
24 #include "mpfr-impl.h"
27 /********* BEGINNING CHECK *************/
29 /* Check if we have to check the result of mpfr_mul.
30 TODO: Find a better (and faster?) check than using old implementation */
34 int mpfr_mul2 (mpfr_ptr a
, mpfr_srcptr b
, mpfr_srcptr c
, mpfr_rnd_t rnd_mode
);
36 mpfr_mul3 (mpfr_ptr a
, mpfr_srcptr b
, mpfr_srcptr c
, mpfr_rnd_t rnd_mode
)
38 /* Old implementation */
39 int sign_product
, cc
, inexact
;
44 mp_size_t bn
, cn
, tn
, k
;
45 MPFR_TMP_DECL(marker
);
47 /* deal with special cases */
48 if (MPFR_ARE_SINGULAR(b
,c
))
50 if (MPFR_IS_NAN(b
) || MPFR_IS_NAN(c
))
55 sign_product
= MPFR_MULT_SIGN( MPFR_SIGN(b
) , MPFR_SIGN(c
) );
58 if (MPFR_IS_INF(c
) || MPFR_NOTZERO(c
))
60 MPFR_SET_SIGN(a
,sign_product
);
62 MPFR_RET(0); /* exact */
70 else if (MPFR_IS_INF(c
))
74 MPFR_SET_SIGN(a
, sign_product
);
76 MPFR_RET(0); /* exact */
86 MPFR_ASSERTD(MPFR_IS_ZERO(b
) || MPFR_IS_ZERO(c
));
87 MPFR_SET_SIGN(a
, sign_product
);
89 MPFR_RET(0); /* 0 * 0 is exact */
92 sign_product
= MPFR_MULT_SIGN( MPFR_SIGN(b
) , MPFR_SIGN(c
) );
94 ax
= MPFR_GET_EXP (b
) + MPFR_GET_EXP (c
);
99 MPFR_ASSERTN ((mpfr_uprec_t
) bq
+ cq
<= MPFR_PREC_MAX
);
101 bn
= MPFR_PREC2LIMBS (bq
); /* number of limbs of b */
102 cn
= MPFR_PREC2LIMBS (cq
); /* number of limbs of c */
103 k
= bn
+ cn
; /* effective nb of limbs used by b*c (= tn or tn+1) below */
104 tn
= MPFR_PREC2LIMBS (bq
+ cq
);
105 /* <= k, thus no int overflow */
106 MPFR_ASSERTD(tn
<= k
);
108 /* Check for no size_t overflow*/
109 MPFR_ASSERTD((size_t) k
<= ((size_t) -1) / BYTES_PER_MP_LIMB
);
110 MPFR_TMP_MARK(marker
);
111 tmp
= MPFR_TMP_LIMBS_ALLOC (k
);
113 /* multiplies two mantissa in temporary allocated space */
114 b1
= (MPFR_LIKELY(bn
>= cn
)) ?
115 mpn_mul (tmp
, MPFR_MANT(b
), bn
, MPFR_MANT(c
), cn
)
116 : mpn_mul (tmp
, MPFR_MANT(c
), cn
, MPFR_MANT(b
), bn
);
118 /* now tmp[0]..tmp[k-1] contains the product of both mantissa,
119 with tmp[k-1]>=2^(GMP_NUMB_BITS-2) */
120 b1
>>= GMP_NUMB_BITS
- 1; /* msb from the product */
122 /* if the mantissas of b and c are uniformly distributed in ]1/2, 1],
123 then their product is in ]1/4, 1/2] with probability 2*ln(2)-1 ~ 0.386
124 and in [1/2, 1] with probability 2-2*ln(2) ~ 0.614 */
126 if (MPFR_UNLIKELY(b1
== 0))
127 mpn_lshift (tmp
, tmp
, tn
, 1); /* tn <= k, so no stack corruption */
128 cc
= mpfr_round_raw (MPFR_MANT (a
), tmp
, bq
+ cq
,
129 MPFR_IS_NEG_SIGN(sign_product
),
130 MPFR_PREC (a
), rnd_mode
, &inexact
);
132 /* cc = 1 ==> result is a power of two */
133 if (MPFR_UNLIKELY(cc
))
134 MPFR_MANT(a
)[MPFR_LIMB_SIZE(a
)-1] = MPFR_LIMB_HIGHBIT
;
136 MPFR_TMP_FREE(marker
);
139 mpfr_exp_t ax2
= ax
+ (mpfr_exp_t
) (b1
- 1 + cc
);
140 if (MPFR_UNLIKELY( ax2
> __gmpfr_emax
))
141 return mpfr_overflow (a
, rnd_mode
, sign_product
);
142 if (MPFR_UNLIKELY( ax2
< __gmpfr_emin
))
144 /* In the rounding to the nearest mode, if the exponent of the exact
145 result (i.e. before rounding, i.e. without taking cc into account)
146 is < __gmpfr_emin - 1 or the exact result is a power of 2 (i.e. if
147 both arguments are powers of 2) in absolute value, then round to
149 if (rnd_mode
== MPFR_RNDN
&&
150 (ax
+ (mpfr_exp_t
) b1
< __gmpfr_emin
||
151 (mpfr_powerof2_raw (b
) && mpfr_powerof2_raw (c
))))
152 rnd_mode
= MPFR_RNDZ
;
153 return mpfr_underflow (a
, rnd_mode
, sign_product
);
155 MPFR_SET_EXP (a
, ax2
);
156 MPFR_SET_SIGN(a
, sign_product
);
162 mpfr_mul (mpfr_ptr a
, mpfr_srcptr b
, mpfr_srcptr c
, mpfr_rnd_t rnd_mode
)
165 int inexact1
, inexact2
;
167 mpfr_init2 (ta
, MPFR_PREC (a
));
168 mpfr_init2 (tb
, MPFR_PREC (b
));
169 mpfr_init2 (tc
, MPFR_PREC (c
));
170 MPFR_ASSERTN (mpfr_set (tb
, b
, MPFR_RNDN
) == 0);
171 MPFR_ASSERTN (mpfr_set (tc
, c
, MPFR_RNDN
) == 0);
173 inexact2
= mpfr_mul3 (ta
, tb
, tc
, rnd_mode
);
174 inexact1
= mpfr_mul2 (a
, b
, c
, rnd_mode
);
175 if (mpfr_cmp (ta
, a
) || inexact1
*inexact2
< 0
176 || (inexact1
*inexact2
== 0 && (inexact1
|inexact2
) != 0))
178 fprintf (stderr
, "mpfr_mul return different values for %s\n"
179 "Prec_a = %lu, Prec_b = %lu, Prec_c = %lu\nB = ",
180 mpfr_print_rnd_mode (rnd_mode
),
181 MPFR_PREC (a
), MPFR_PREC (b
), MPFR_PREC (c
));
182 mpfr_out_str (stderr
, 16, 0, tb
, MPFR_RNDN
);
183 fprintf (stderr
, "\nC = ");
184 mpfr_out_str (stderr
, 16, 0, tc
, MPFR_RNDN
);
185 fprintf (stderr
, "\nOldMul: ");
186 mpfr_out_str (stderr
, 16, 0, ta
, MPFR_RNDN
);
187 fprintf (stderr
, "\nNewMul: ");
188 mpfr_out_str (stderr
, 16, 0, a
, MPFR_RNDN
);
189 fprintf (stderr
, "\nNewInexact = %d | OldInexact = %d\n",
194 mpfr_clears (ta
, tb
, tc
, (mpfr_ptr
) 0);
198 # define mpfr_mul mpfr_mul2
202 /****** END OF CHECK *******/
204 /* Multiply 2 mpfr_t */
206 /* Note: mpfr_sqr will call mpfr_mul if bn > MPFR_SQR_THRESHOLD,
207 in order to use Mulders' mulhigh, which is handled only here
208 to avoid partial code duplication. There is some overhead due
209 to the additional tests, but slowdown should not be noticeable
210 as this code is not executed in very small precisions. */
213 mpfr_mul (mpfr_ptr a
, mpfr_srcptr b
, mpfr_srcptr c
, mpfr_rnd_t rnd_mode
)
220 mp_size_t bn
, cn
, tn
, k
, threshold
;
221 MPFR_TMP_DECL (marker
);
224 (("b[%Pu]=%.*Rg c[%Pu]=%.*Rg rnd=%d",
225 mpfr_get_prec (b
), mpfr_log_prec
, b
,
226 mpfr_get_prec (c
), mpfr_log_prec
, c
, rnd_mode
),
227 ("a[%Pu]=%.*Rg inexact=%d",
228 mpfr_get_prec (a
), mpfr_log_prec
, a
, inexact
));
230 /* deal with special cases */
231 if (MPFR_ARE_SINGULAR (b
, c
))
233 if (MPFR_IS_NAN (b
) || MPFR_IS_NAN (c
))
238 sign
= MPFR_MULT_SIGN (MPFR_SIGN (b
), MPFR_SIGN (c
));
241 if (!MPFR_IS_ZERO (c
))
243 MPFR_SET_SIGN (a
, sign
);
253 else if (MPFR_IS_INF (c
))
255 if (!MPFR_IS_ZERO (b
))
257 MPFR_SET_SIGN (a
, sign
);
269 MPFR_ASSERTD (MPFR_IS_ZERO(b
) || MPFR_IS_ZERO(c
));
270 MPFR_SET_SIGN (a
, sign
);
275 sign
= MPFR_MULT_SIGN (MPFR_SIGN (b
), MPFR_SIGN (c
));
277 ax
= MPFR_GET_EXP (b
) + MPFR_GET_EXP (c
);
278 /* Note: the exponent of the exact result will be e = bx + cx + ec with
279 ec in {-1,0,1} and the following assumes that e is representable. */
281 /* FIXME: Useful since we do an exponent check after ?
282 * It is useful iff the precision is big, there is an overflow
283 * and we are doing further mults...*/
285 if (MPFR_UNLIKELY (ax
> __gmpfr_emax
+ 1))
286 return mpfr_overflow (a
, rnd_mode
, sign
);
287 if (MPFR_UNLIKELY (ax
< __gmpfr_emin
- 2))
288 return mpfr_underflow (a
, rnd_mode
== MPFR_RNDN
? MPFR_RNDZ
: rnd_mode
,
295 MPFR_ASSERTN ((mpfr_uprec_t
) bq
+ cq
<= MPFR_PREC_MAX
);
297 bn
= MPFR_PREC2LIMBS (bq
); /* number of limbs of b */
298 cn
= MPFR_PREC2LIMBS (cq
); /* number of limbs of c */
299 k
= bn
+ cn
; /* effective nb of limbs used by b*c (= tn or tn+1) below */
300 tn
= MPFR_PREC2LIMBS (bq
+ cq
);
301 MPFR_ASSERTD (tn
<= k
); /* tn <= k, thus no int overflow */
303 /* Check for no size_t overflow*/
304 MPFR_ASSERTD ((size_t) k
<= ((size_t) -1) / BYTES_PER_MP_LIMB
);
305 MPFR_TMP_MARK (marker
);
306 tmp
= MPFR_TMP_LIMBS_ALLOC (k
);
308 /* multiplies two mantissa in temporary allocated space */
309 if (MPFR_UNLIKELY (bn
< cn
))
318 MPFR_ASSERTD (bn
>= cn
);
319 if (MPFR_LIKELY (bn
<= 2))
323 /* 1 limb * 1 limb */
324 umul_ppmm (tmp
[1], tmp
[0], MPFR_MANT (b
)[0], MPFR_MANT (c
)[0]);
327 else if (MPFR_UNLIKELY (cn
== 1))
329 /* 2 limbs * 1 limb */
331 umul_ppmm (tmp
[1], tmp
[0], MPFR_MANT (b
)[0], MPFR_MANT (c
)[0]);
332 umul_ppmm (tmp
[2], t
, MPFR_MANT (b
)[1], MPFR_MANT (c
)[0]);
333 add_ssaaaa (tmp
[2], tmp
[1], tmp
[2], tmp
[1], 0, t
);
338 /* 2 limbs * 2 limbs */
339 mp_limb_t t1
, t2
, t3
;
340 /* First 2 limbs * 1 limb */
341 umul_ppmm (tmp
[1], tmp
[0], MPFR_MANT (b
)[0], MPFR_MANT (c
)[0]);
342 umul_ppmm (tmp
[2], t1
, MPFR_MANT (b
)[1], MPFR_MANT (c
)[0]);
343 add_ssaaaa (tmp
[2], tmp
[1], tmp
[2], tmp
[1], 0, t1
);
344 /* Second, the other 2 limbs * 1 limb product */
345 umul_ppmm (t1
, t2
, MPFR_MANT (b
)[0], MPFR_MANT (c
)[1]);
346 umul_ppmm (tmp
[3], t3
, MPFR_MANT (b
)[1], MPFR_MANT (c
)[1]);
347 add_ssaaaa (tmp
[3], t1
, tmp
[3], t1
, 0, t3
);
348 /* Sum those two partial products */
349 add_ssaaaa (tmp
[2], tmp
[1], tmp
[2], tmp
[1], t1
, t2
);
350 tmp
[3] += (tmp
[2] < t1
);
353 b1
>>= (GMP_NUMB_BITS
- 1);
355 if (MPFR_UNLIKELY (b1
== 0))
356 mpn_lshift (tmp
, tmp
, tn
, 1); /* tn <= k, so no stack corruption */
359 /* Mulders' mulhigh. This code can also be used via mpfr_sqr,
360 hence the tests b != c. */
361 if (MPFR_UNLIKELY (bn
> (threshold
= b
!= c
?
362 MPFR_MUL_THRESHOLD
: MPFR_SQR_THRESHOLD
)))
368 /* First check if we can reduce the precision of b or c:
369 exact values are a nightmare for the short product trick */
372 MPFR_ASSERTN (threshold
>= 1);
373 if (MPFR_UNLIKELY ((bp
[0] == 0 && bp
[1] == 0) ||
374 (cp
[0] == 0 && cp
[1] == 0)))
378 MPFR_TMP_FREE (marker
);
384 MPFR_ASSERTD (bn
> 0);
385 } /* This must end since the most significant limb is != 0 */
387 /* Check for c too: if b ==c, will do nothing */
392 MPFR_ASSERTD (cn
> 0);
393 } /* This must end since the most significant limb is != 0 */
395 /* It is not the faster way, but it is safer */
396 MPFR_SET_SAME_SIGN (b_tmp
, b
);
397 MPFR_SET_EXP (b_tmp
, MPFR_GET_EXP (b
));
398 MPFR_PREC (b_tmp
) = bn
* GMP_NUMB_BITS
;
399 MPFR_MANT (b_tmp
) = bp
;
403 MPFR_SET_SAME_SIGN (c_tmp
, c
);
404 MPFR_SET_EXP (c_tmp
, MPFR_GET_EXP (c
));
405 MPFR_PREC (c_tmp
) = cn
* GMP_NUMB_BITS
;
406 MPFR_MANT (c_tmp
) = cp
;
408 /* Call again mpfr_mul with the fixed arguments */
409 return mpfr_mul (a
, b_tmp
, c_tmp
, rnd_mode
);
412 /* Call mpfr_mul instead of mpfr_sqr as the precision
413 is probably still high enough. */
414 return mpfr_mul (a
, b_tmp
, b_tmp
, rnd_mode
);
417 /* Compute estimated precision of mulhigh.
418 We could use `+ (n < cn) + (n < bn)' instead of `+ 2',
419 but does it worth it? */
420 n
= MPFR_LIMB_SIZE (a
) + 1;
422 MPFR_ASSERTD (n
>= 1 && 2*n
<= k
&& n
<= cn
&& n
<= bn
);
423 p
= n
* GMP_NUMB_BITS
- MPFR_INT_CEIL_LOG2 (n
+ 2);
427 /* Check if MulHigh can produce a roundable result.
428 We may lose 1 bit due to RNDN, 1 due to final shift. */
429 if (MPFR_UNLIKELY (MPFR_PREC (a
) > p
- 5))
431 if (MPFR_UNLIKELY (MPFR_PREC (a
) > p
- 5 + GMP_NUMB_BITS
432 || bn
<= threshold
+ 1))
434 /* MulHigh can't produce a roundable result. */
435 MPFR_LOG_MSG (("mpfr_mulhigh can't be used (%lu VS %lu)\n",
439 /* Add one extra limb to mantissa of b and c. */
444 bp
= MPFR_TMP_LIMBS_ALLOC (n
+ 1);
446 MPN_COPY (bp
+ 1, MPFR_MANT (b
) + bn
- n
, n
);
451 cp
--; /* FIXME: Could this happen? */
454 cp
= MPFR_TMP_LIMBS_ALLOC (n
+ 1);
456 MPN_COPY (cp
+ 1, MPFR_MANT (c
) + cn
- n
, n
);
459 /* We will compute with one extra limb */
461 /* ceil(log2(n+2)) takes into account the lost bits due to
462 Mulders' short product */
463 p
= n
* GMP_NUMB_BITS
- MPFR_INT_CEIL_LOG2 (n
+ 2);
464 /* Due to some nasty reasons we can have only 4 bits */
465 MPFR_ASSERTD (MPFR_PREC (a
) <= p
- 4);
467 if (MPFR_LIKELY (k
< 2*n
))
469 tmp
= MPFR_TMP_LIMBS_ALLOC (2 * n
);
470 tmp
+= 2*n
-k
; /* `tmp' still points to an area of `k' limbs */
473 MPFR_LOG_MSG (("Use mpfr_mulhigh (%lu VS %lu)\n", MPFR_PREC (a
), p
));
474 /* Compute an approximation of the product of b and c */
476 mpfr_mulhigh_n (tmp
+ k
- 2 * n
, bp
, cp
, n
);
478 mpfr_sqrhigh_n (tmp
+ k
- 2 * n
, bp
, n
);
479 /* now tmp[0]..tmp[k-1] contains the product of both mantissa,
480 with tmp[k-1]>=2^(GMP_NUMB_BITS-2) */
481 /* [VL] FIXME: This cannot be true: mpfr_mulhigh_n only
482 depends on pointers and n. As k can be arbitrarily larger,
483 the result cannot depend on k. And indeed, with GMP compiled
484 with --enable-alloca=debug, valgrind was complaining, at
485 least because MPFR_RNDRAW at the end tried to compute the
486 sticky bit even when not necessary; this problem is fixed,
487 but there's at least something wrong with the comment above. */
488 b1
= tmp
[k
-1] >> (GMP_NUMB_BITS
- 1); /* msb from the product */
490 /* If the mantissas of b and c are uniformly distributed in (1/2, 1],
491 then their product is in (1/4, 1/2] with probability 2*ln(2)-1
492 ~ 0.386 and in [1/2, 1] with probability 2-2*ln(2) ~ 0.614 */
493 if (MPFR_UNLIKELY (b1
== 0))
494 /* Warning: the mpfr_mulhigh_n call above only surely affects
495 tmp[k-n-1..k-1], thus we shift only those limbs */
496 mpn_lshift (tmp
+ k
- n
- 1, tmp
+ k
- n
- 1, n
+ 1, 1);
498 MPFR_ASSERTD (MPFR_LIMB_MSB (tmp
[tn
-1]) != 0);
500 /* if the most significant bit b1 is zero, we have only p-1 correct
502 if (MPFR_UNLIKELY (!mpfr_round_p (tmp
, tn
, p
+ b1
- 1, MPFR_PREC(a
)
503 + (rnd_mode
== MPFR_RNDN
))))
505 tmp
-= k
- tn
; /* tmp may have changed, FIX IT!!!!! */
512 MPFR_LOG_MSG (("Use mpn_mul\n", 0));
513 b1
= mpn_mul (tmp
, MPFR_MANT (b
), bn
, MPFR_MANT (c
), cn
);
515 /* now tmp[0]..tmp[k-1] contains the product of both mantissa,
516 with tmp[k-1]>=2^(GMP_NUMB_BITS-2) */
517 b1
>>= GMP_NUMB_BITS
- 1; /* msb from the product */
519 /* if the mantissas of b and c are uniformly distributed in (1/2, 1],
520 then their product is in (1/4, 1/2] with probability 2*ln(2)-1
521 ~ 0.386 and in [1/2, 1] with probability 2-2*ln(2) ~ 0.614 */
523 if (MPFR_UNLIKELY (b1
== 0))
524 mpn_lshift (tmp
, tmp
, tn
, 1); /* tn <= k, so no stack corruption */
527 ax2
= ax
+ (mpfr_exp_t
) (b1
- 1);
528 MPFR_RNDRAW (inexact
, a
, tmp
, bq
+cq
, rnd_mode
, sign
, ax2
++);
529 MPFR_TMP_FREE (marker
);
530 MPFR_EXP (a
) = ax2
; /* Can't use MPFR_SET_EXP: Expo may be out of range */
531 MPFR_SET_SIGN (a
, sign
);
532 if (MPFR_UNLIKELY (ax2
> __gmpfr_emax
))
533 return mpfr_overflow (a
, rnd_mode
, sign
);
534 if (MPFR_UNLIKELY (ax2
< __gmpfr_emin
))
536 /* In the rounding to the nearest mode, if the exponent of the exact
537 result (i.e. before rounding, i.e. without taking cc into account)
538 is < __gmpfr_emin - 1 or the exact result is a power of 2 (i.e. if
539 both arguments are powers of 2), then round to zero. */
540 if (rnd_mode
== MPFR_RNDN
541 && (ax
+ (mpfr_exp_t
) b1
< __gmpfr_emin
542 || (mpfr_powerof2_raw (b
) && mpfr_powerof2_raw (c
))))
543 rnd_mode
= MPFR_RNDZ
;
544 return mpfr_underflow (a
, rnd_mode
, sign
);