1 /* mpfr_sub1 -- internal function to perform a "real" subtraction
3 Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramel projects, INRIA.
6 This file is part of the GNU MPFR Library.
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
23 #include "mpfr-impl.h"
25 /* compute sign(b) * (|b| - |c|), with |b| > |c|, diff_exp = EXP(b) - EXP(c)
26 Returns 0 iff result is exact,
27 a negative value when the result is less than the exact value,
28 a positive value otherwise.
32 mpfr_sub1 (mpfr_ptr a
, mpfr_srcptr b
, mpfr_srcptr c
, mpfr_rnd_t rnd_mode
)
36 mpfr_prec_t cancel
, cancel1
;
37 mp_size_t cancel2
, an
, bn
, cn
, cn0
;
38 mp_limb_t
*ap
, *bp
, *cp
;
39 mp_limb_t carry
, bb
, cc
;
40 int inexact
, shift_b
, shift_c
, add_exp
= 0;
41 int cmp_low
= 0; /* used for rounding to nearest: 0 if low(b) = low(c),
42 negative if low(b) < low(c), positive if low(b)>low(c) */
44 MPFR_TMP_DECL(marker
);
46 MPFR_TMP_MARK(marker
);
48 an
= MPFR_LIMB_SIZE(a
);
50 sign
= mpfr_cmp2 (b
, c
, &cancel
);
51 if (MPFR_UNLIKELY(sign
== 0))
53 if (rnd_mode
== MPFR_RNDD
)
62 * If subtraction: sign(a) = sign * sign(b)
63 * If addition: sign(a) = sign of the larger argument in absolute value.
65 * Both cases can be simplidied in:
67 * if addition: sign(a) = sign * sign(b) = sign(b)
68 * if subtraction, b is greater, so sign(a) = sign(b)
70 * if subtraction, sign(a) = - sign(b)
71 * if addition, sign(a) = sign(c) (since c is greater)
72 * But if it is an addition, sign(b) and sign(c) are opposed!
73 * So sign(a) = - sign(b)
76 if (sign
< 0) /* swap b and c so that |b| > |c| */
79 MPFR_SET_OPPOSITE_SIGN (a
,b
);
83 MPFR_SET_SAME_SIGN (a
,b
);
85 /* Check if c is too small.
86 A more precise test is to replace 2 by
87 (rnd == MPFR_RNDN) + mpfr_power2_raw (b)
88 but it is more expensive and not very useful */
89 if (MPFR_UNLIKELY (MPFR_GET_EXP (c
) <= MPFR_GET_EXP (b
)
90 - (mpfr_exp_t
) MAX (MPFR_PREC (a
), MPFR_PREC (b
)) - 2))
92 /* Remember, we can't have an exact result! */
93 /* A.AAAAAAAAAAAAAAAAA
96 /* A = S*ABS(B) +/- ulp(a) */
97 MPFR_SET_EXP (a
, MPFR_GET_EXP (b
));
98 MPFR_RNDRAW_EVEN (inexact
, a
, MPFR_MANT (b
), MPFR_PREC (b
),
99 rnd_mode
, MPFR_SIGN (a
),
100 if (MPFR_UNLIKELY ( ++MPFR_EXP (a
) > __gmpfr_emax
))
101 inexact
= mpfr_overflow (a
, rnd_mode
, MPFR_SIGN (a
)));
102 /* inexact = mpfr_set4 (a, b, rnd_mode, MPFR_SIGN (a)); */
106 But we know it isn't (Since we have to remove `c')
107 So if we round to Zero, we have to remove one ulp.
108 Otherwise the result is correctly rounded. */
109 if (MPFR_IS_LIKE_RNDZ (rnd_mode
, MPFR_IS_NEG (a
)))
112 MPFR_RET (- MPFR_INT_SIGN (a
));
114 MPFR_RET (MPFR_INT_SIGN (a
));
121 /* It isn't exact so Prec(b) > Prec(a) and the last
122 Prec(b)-Prec(a) bits of `b' are not zeros.
123 Which means that removing c from b can't generate a carry
124 execpt in case of even rounding.
125 In all other case the result and the inexact flag should be
126 correct (We can't have an exact result).
127 In case of EVEN rounding:
130 = 1.BBBBBBBBBBBBBx01 Rounded to Prec(b)
131 = 1.BBBBBBBBBBBBBx Nearest / Rounded to Prec(a)
133 1.BBBBBBBBBBBBB0 if inexact == EVEN_INEX (x == 0)
134 1.BBBBBBBBBBBBB1+1 if inexact == -EVEN_INEX (x == 1)
135 which means we get a wrong rounded result if x==1,
136 i.e. inexact= MPFR_EVEN_INEX */
137 if (MPFR_UNLIKELY (inexact
== MPFR_EVEN_INEX
*MPFR_INT_SIGN (a
)))
140 inexact
= -MPFR_INT_SIGN (a
);
146 diff_exp
= (mpfr_uexp_t
) MPFR_GET_EXP (b
) - MPFR_GET_EXP (c
);
148 /* reserve a space to store b aligned with the result, i.e. shifted by
149 (-cancel) % GMP_NUMB_BITS to the right */
150 bn
= MPFR_LIMB_SIZE (b
);
151 MPFR_UNSIGNED_MINUS_MODULO (shift_b
, cancel
);
152 cancel1
= (cancel
+ shift_b
) / GMP_NUMB_BITS
;
154 /* the high cancel1 limbs from b should not be taken into account */
155 if (MPFR_UNLIKELY (shift_b
== 0))
157 bp
= MPFR_MANT(b
); /* no need of an extra space */
158 /* Ensure ap != bp */
159 if (MPFR_UNLIKELY (ap
== bp
))
161 bp
= MPFR_TMP_LIMBS_ALLOC (bn
);
162 MPN_COPY (bp
, ap
, bn
);
167 bp
= MPFR_TMP_LIMBS_ALLOC (bn
+ 1);
168 bp
[0] = mpn_rshift (bp
+ 1, MPFR_MANT(b
), bn
++, shift_b
);
171 /* reserve a space to store c aligned with the result, i.e. shifted by
172 (diff_exp-cancel) % GMP_NUMB_BITS to the right */
173 cn
= MPFR_LIMB_SIZE(c
);
174 if ((UINT_MAX
% GMP_NUMB_BITS
) == (GMP_NUMB_BITS
-1)
175 && ((-(unsigned) 1)%GMP_NUMB_BITS
> 0))
176 shift_c
= ((mpfr_uexp_t
) diff_exp
- cancel
) % GMP_NUMB_BITS
;
179 shift_c
= diff_exp
- (cancel
% GMP_NUMB_BITS
);
180 shift_c
= (shift_c
+ GMP_NUMB_BITS
) % GMP_NUMB_BITS
;
182 MPFR_ASSERTD( shift_c
>= 0 && shift_c
< GMP_NUMB_BITS
);
184 if (MPFR_UNLIKELY(shift_c
== 0))
187 /* Ensure ap != cp */
190 cp
= MPFR_TMP_LIMBS_ALLOC (cn
);
191 MPN_COPY(cp
, ap
, cn
);
196 cp
= MPFR_TMP_LIMBS_ALLOC (cn
+ 1);
197 cp
[0] = mpn_rshift (cp
+ 1, MPFR_MANT(c
), cn
++, shift_c
);
201 printf ("rnd=%s shift_b=%d shift_c=%d diffexp=%lu\n",
202 mpfr_print_rnd_mode (rnd_mode
), shift_b
, shift_c
,
203 (unsigned long) diff_exp
);
206 MPFR_ASSERTD (ap
!= cp
);
207 MPFR_ASSERTD (bp
!= cp
);
209 /* here we have shift_c = (diff_exp - cancel) % GMP_NUMB_BITS,
210 0 <= shift_c < GMP_NUMB_BITS
211 thus we want cancel2 = ceil((cancel - diff_exp) / GMP_NUMB_BITS) */
213 /* Possible optimization with a C99 compiler (i.e. well-defined
214 integer division): if MPFR_PREC_MAX is reduced to
215 ((mpfr_prec_t)((mpfr_uprec_t)(~(mpfr_uprec_t)0)>>1) - GMP_NUMB_BITS + 1)
216 and diff_exp is of type mpfr_exp_t (no need for mpfr_uexp_t, since
217 the sum or difference of 2 exponents must be representable, as used
218 by the multiplication code), then the computation of cancel2 could
220 cancel2 = (cancel - (diff_exp - shift_c)) / GMP_NUMB_BITS;
221 because cancel, diff_exp and shift_c are all non-negative and
222 these variables are signed. */
224 MPFR_ASSERTD (cancel
>= 0);
225 if (cancel
>= diff_exp
)
226 /* Note that cancel is signed and will be converted to mpfr_uexp_t
227 (type of diff_exp) in the expression below, so that this will
228 work even if cancel is very large and diff_exp = 0. */
229 cancel2
= (cancel
- diff_exp
+ (GMP_NUMB_BITS
- 1)) / GMP_NUMB_BITS
;
231 cancel2
= - (mp_size_t
) ((diff_exp
- cancel
) / GMP_NUMB_BITS
);
232 /* the high cancel2 limbs from b should not be taken into account */
234 printf ("cancel=%lu cancel1=%lu cancel2=%ld\n",
235 (unsigned long) cancel
, (unsigned long) cancel1
, (long) cancel2
);
239 <----------------+-----------|---->
240 <----------PREC(a)----------><-sh->
242 limbs bp[bn-cancel1-1]
243 <--...-----><----------------+-----------+----------->
245 limbs cp[cn-cancel2-1] cancel2 >= 0
246 <--...--><----------------+----------------+---------------->
247 (-cancel2) cancel2 < 0
248 limbs <----------------+---------------->
251 /* first part: put in ap[0..an-1] the value of high(b) - high(c),
252 where high(b) consists of the high an+cancel1 limbs of b,
253 and high(c) consists of the high an+cancel2 limbs of c.
256 /* copy high(b) into a */
257 if (MPFR_LIKELY(an
+ (mp_size_t
) cancel1
<= bn
))
258 /* a: <----------------+-----------|---->
259 b: <-----------------------------------------> */
260 MPN_COPY (ap
, bp
+ bn
- (an
+ cancel1
), an
);
262 /* a: <----------------+-----------|---->
263 b: <-------------------------> */
264 if ((mp_size_t
) cancel1
< bn
) /* otherwise b does not overlap with a */
266 MPN_ZERO (ap
, an
+ cancel1
- bn
);
267 MPN_COPY (ap
+ (an
+ cancel1
- bn
), bp
, bn
- cancel1
);
273 printf("after copying high(b), a="); mpfr_print_binary(a
); putchar('\n');
276 /* subtract high(c) */
277 if (MPFR_LIKELY(an
+ cancel2
> 0)) /* otherwise c does not overlap with a */
283 if (an
+ cancel2
<= cn
)
284 /* a: <----------------------------->
285 c: <-----------------------------------------> */
286 mpn_sub_n (ap
, ap
, cp
+ cn
- (an
+ cancel2
), an
);
288 /* a: <---------------------------->
289 c: <-------------------------> */
291 ap2
= ap
+ an
+ (cancel2
- cn
);
293 mpn_sub_n (ap2
, ap2
, cp
, cn
- cancel2
);
296 else /* cancel2 < 0 */
300 if (an
+ cancel2
<= cn
)
301 /* a: <----------------------------->
302 c: <-----------------------------> */
303 borrow
= mpn_sub_n (ap
, ap
, cp
+ cn
- (an
+ cancel2
),
306 /* a: <---------------------------->
307 c: <----------------> */
309 ap2
= ap
+ an
+ cancel2
- cn
;
310 borrow
= mpn_sub_n (ap2
, ap2
, cp
, cn
);
312 ap2
= ap
+ an
+ cancel2
;
313 mpn_sub_1 (ap2
, ap2
, -cancel2
, borrow
);
318 printf("after subtracting high(c), a=");
319 mpfr_print_binary(a
);
323 /* now perform rounding */
324 sh
= (mpfr_prec_t
) an
* GMP_NUMB_BITS
- MPFR_PREC(a
);
325 /* last unused bits from a */
326 carry
= ap
[0] & MPFR_LIMB_MASK (sh
);
329 if (MPFR_LIKELY(rnd_mode
== MPFR_RNDN
))
333 /* can decide except when carry = 2^(sh-1) [middle]
334 or carry = 0 [truncate, but cannot decide inexact flag] */
335 if (carry
> (MPFR_LIMB_ONE
<< (sh
- 1)))
337 else if ((0 < carry
) && (carry
< (MPFR_LIMB_ONE
<< (sh
- 1))))
339 inexact
= -1; /* result if smaller than exact value */
342 /* now carry = 2^(sh-1), in which case cmp_low=2,
343 or carry = 0, in which case cmp_low=0 */
344 cmp_low
= (carry
== 0) ? 0 : 2;
347 else /* directed rounding: set rnd_mode to RNDZ iff toward zero */
349 if (MPFR_IS_RNDUTEST_OR_RNDDNOTTEST(rnd_mode
, MPFR_IS_NEG(a
)))
350 rnd_mode
= MPFR_RNDZ
;
354 if (rnd_mode
== MPFR_RNDZ
)
359 else /* round away */
364 /* we have to consider the low (bn - (an+cancel1)) limbs from b,
365 and the (cn - (an+cancel2)) limbs from c. */
371 printf ("last sh=%d bits from a are %lu, bn=%ld, cn=%ld\n",
372 sh
, (unsigned long) carry
, (long) bn
, (long) cn
);
375 /* for rounding to nearest, we couldn't conclude up to here in the following
377 1. sh = 0, then cmp_low=0: we can either truncate, subtract one ulp
378 or add one ulp: -1 ulp < low(b)-low(c) < 1 ulp
379 2. sh > 0 but the low sh bits from high(b)-high(c) equal 2^(sh-1):
380 -0.5 ulp <= -1/2^sh < low(b)-low(c)-0.5 < 1/2^sh <= 0.5 ulp
381 we can't decide the rounding, in that case cmp_low=2:
382 either we truncate and flag=-1, or we add one ulp and flag=1
383 3. the low sh>0 bits from high(b)-high(c) equal 0: we know we have to
384 truncate but we can't decide the ternary value, here cmp_low=0:
385 -0.5 ulp <= -1/2^sh < low(b)-low(c) < 1/2^sh <= 0.5 ulp
386 we always truncate and inexact can be any of -1,0,1
389 /* note: here cn might exceed cn0, in which case we consider a zero limb */
390 for (k
= 0; (bn
> 0) || (cn
> 0); k
= 1)
392 /* if cmp_low < 0, we know low(b) - low(c) < 0
393 if cmp_low > 0, we know low(b) - low(c) > 0
394 (more precisely if cmp_low = 2, low(b) - low(c) = 0.5 ulp so far)
395 if cmp_low = 0, so far low(b) - low(c) = 0 */
398 bb
= (bn
> 0) ? bp
[--bn
] : 0;
399 if ((cn
> 0) && (cn
-- <= cn0
))
404 /* cmp_low compares low(b) and low(c) */
405 if (cmp_low
== 0) /* case 1 or 3 */
406 cmp_low
= (bb
< cc
) ? -2+k
: (bb
> cc
) ? 1 : 0;
408 /* Case 1 for k=0 splits into 7 subcases:
411 1c: 0 < bb - cc < half
413 1e: -half < bb - cc < 0
417 Case 2 splits into 3 subcases:
422 Case 3 splits into 3 subcases:
428 /* the case rounding to nearest with sh=0 is special since one couldn't
429 subtract above 1/2 ulp in the trailing limb of the result */
430 if (rnd_mode
== MPFR_RNDN
&& sh
== 0 && k
== 0) /* case 1 for k=0 */
432 mp_limb_t half
= MPFR_LIMB_HIGHBIT
;
434 /* add one ulp if bb > cc + half
435 truncate if cc - half < bb < cc + half
436 sub one ulp if bb < cc - half
439 if (cmp_low
< 0) /* bb < cc: -1 ulp < low(b) - low(c) < 0,
440 cases 1e, 1f and 1g */
444 else /* since bb < cc < half, bb+half < 2*half */
446 /* now we have bb < cc + half:
447 we have to subtract one ulp if bb < cc,
448 and truncate if bb > cc */
450 else if (cmp_low
>= 0) /* bb >= cc, cases 1a to 1d */
454 else /* since bb >= cc >= half, bb - half >= 0 */
456 /* now we have bb > cc - half: we have to add one ulp if bb > cc,
457 and truncate if bb < cc */
464 printf ("k=%u bb=%lu cc=%lu cmp_low=%d\n", k
,
465 (unsigned long) bb
, (unsigned long) cc
, cmp_low
);
467 if (cmp_low
< 0) /* low(b) - low(c) < 0: either truncate or subtract
470 if (rnd_mode
== MPFR_RNDZ
)
471 goto sub_one_ulp
; /* set inexact=-1 */
472 else if (rnd_mode
!= MPFR_RNDN
) /* round away */
477 else /* round to nearest */
479 /* If cmp_low < 0 and bb > cc, then -0.5 ulp < low(b)-low(c) < 0,
480 whatever the value of sh.
481 If sh>0, then cmp_low < 0 implies that the initial neglected
482 sh bits were 0 (otherwise cmp_low=2 initially), thus the
483 weight of the new bits is less than 0.5 ulp too.
484 If k > 0 (and sh=0) this means that either the first neglected
485 limbs bb and cc were equal (thus cmp_low was 0 for k=0),
486 or we had bb - cc = -0.5 ulp or 0.5 ulp.
487 The last case is not possible here since we would have
488 cmp_low > 0 which is sticky.
489 In the first case (where we have cmp_low = -1), we truncate,
490 whereas in the 2nd case we have cmp_low = -2 and we subtract
493 if (bb
> cc
|| sh
> 0 || cmp_low
== -1)
494 { /* -0.5 ulp < low(b)-low(c) < 0,
495 bb > cc corresponds to cases 1e and 1f1
496 sh > 0 corresponds to cases 3c and 3b3
497 cmp_low = -1 corresponds to case 1d3 (also 3b3) */
501 else if (bb
< cc
) /* here sh = 0 and low(b)-low(c) < -0.5 ulp,
502 this corresponds to cases 1g and 1f3 */
504 /* the only case where we can't conclude is sh=0 and bb=cc,
505 i.e., we have low(b) - low(c) = -0.5 ulp (up to now), thus
506 we don't know if we must truncate or subtract one ulp.
507 Note: for sh=0 we can't have low(b) - low(c) = -0.5 ulp up to
508 now, since low(b) - low(c) > 1/2^sh */
511 else if (cmp_low
> 0) /* 0 < low(b) - low(c): either truncate or
514 if (rnd_mode
== MPFR_RNDZ
)
519 else if (rnd_mode
!= MPFR_RNDN
) /* round away */
521 else /* round to nearest */
525 /* if sh=0, then bb>cc means that low(b)-low(c) > 0.5 ulp,
526 and similarly when cmp_low=2 */
527 if (cmp_low
== 2) /* cases 1a, 1b1, 2a and 2b1 */
529 /* sh > 0 and cmp_low > 0: this implies that the sh initial
530 neglected bits were 0, and the remaining low(b)-low(c)>0,
531 but its weight is less than 0.5 ulp */
532 else /* 0 < low(b) - low(c) < 0.5 ulp, this corresponds to
533 cases 3a, 1d1 and 3b1 */
539 else if (bb
< cc
) /* 0 < low(b) - low(c) < 0.5 ulp, cases 1c,
545 /* the only case where we can't conclude is bb=cc, i.e.,
546 low(b) - low(c) = 0.5 ulp (up to now), thus we don't know
547 if we must truncate or add one ulp. */
550 /* after k=0, we cannot conclude in the following cases, we split them
551 according to the values of bb and cc for k=1:
552 1b. sh=0 and cmp_low = 1 and bb-cc = half [around 0.5 ulp]
553 1b1. bb > cc: add one ulp, inex = 1
554 1b2: bb = cc: cannot conclude
555 1b3: bb < cc: truncate, inex = -1
556 1d. sh=0 and cmp_low = 0 and bb-cc = 0 [around 0]
557 1d1: bb > cc: truncate, inex = -1
558 1d2: bb = cc: cannot conclude
559 1d3: bb < cc: truncate, inex = +1
560 1f. sh=0 and cmp_low = -1 and bb-cc = -half [around -0.5 ulp]
561 1f1: bb > cc: truncate, inex = +1
562 1f2: bb = cc: cannot conclude
563 1f3: bb < cc: sub one ulp, inex = -1
564 2b. sh > 0 and cmp_low = 2 and bb=cc [around 0.5 ulp]
565 2b1. bb > cc: add one ulp, inex = 1
566 2b2: bb = cc: cannot conclude
567 2b3: bb < cc: truncate, inex = -1
568 3b. sh > 0 and cmp_low = 0 [around 0]
569 3b1. bb > cc: truncate, inex = -1
570 3b2: bb = cc: cannot conclude
571 3b3: bb < cc: truncate, inex = +1
575 if ((rnd_mode
== MPFR_RNDN
) && cmp_low
!= 0)
577 /* even rounding rule */
578 if ((ap
[0] >> sh
) & 1)
586 inexact
= (cmp_low
> 0) ? -1 : 1;
592 sub_one_ulp
: /* sub one unit in last place to a */
593 mpn_sub_1 (ap
, ap
, an
, MPFR_LIMB_ONE
<< sh
);
597 add_one_ulp
: /* add one unit in last place to a */
598 if (MPFR_UNLIKELY(mpn_add_1 (ap
, ap
, an
, MPFR_LIMB_ONE
<< sh
)))
599 /* result is a power of 2: 11111111111111 + 1 = 1000000000000000 */
601 ap
[an
-1] = MPFR_LIMB_HIGHBIT
;
604 inexact
= 1; /* result larger than exact value */
607 if (MPFR_UNLIKELY((ap
[an
-1] >> (GMP_NUMB_BITS
- 1)) == 0))
608 /* case 1 - epsilon */
610 ap
[an
-1] = MPFR_LIMB_HIGHBIT
;
615 /* we have to set MPFR_EXP(a) to MPFR_EXP(b) - cancel + add_exp, taking
616 care of underflows/overflows in that computation, and of the allowed
618 if (MPFR_LIKELY(cancel
))
622 cancel
-= add_exp
; /* OK: add_exp is an int equal to 0 or 1 */
623 exp_a
= MPFR_GET_EXP (b
) - cancel
;
624 if (MPFR_UNLIKELY(exp_a
< __gmpfr_emin
))
626 MPFR_TMP_FREE(marker
);
627 if (rnd_mode
== MPFR_RNDN
&&
628 (exp_a
< __gmpfr_emin
- 1 ||
629 (inexact
>= 0 && mpfr_powerof2_raw (a
))))
630 rnd_mode
= MPFR_RNDZ
;
631 return mpfr_underflow (a
, rnd_mode
, MPFR_SIGN(a
));
633 MPFR_SET_EXP (a
, exp_a
);
635 else /* cancel = 0: MPFR_EXP(a) <- MPFR_EXP(b) + add_exp */
637 /* in case cancel = 0, add_exp can still be 1, in case b is just
638 below a power of two, c is very small, prec(a) < prec(b),
639 and rnd=away or nearest */
642 exp_b
= MPFR_GET_EXP (b
);
643 if (MPFR_UNLIKELY(add_exp
&& exp_b
== __gmpfr_emax
))
645 MPFR_TMP_FREE(marker
);
646 return mpfr_overflow (a
, rnd_mode
, MPFR_SIGN(a
));
648 MPFR_SET_EXP (a
, exp_b
+ add_exp
);
650 MPFR_TMP_FREE(marker
);
652 printf ("result is a="); mpfr_print_binary(a
); putchar('\n');
654 /* check that result is msb-normalized */
655 MPFR_ASSERTD(ap
[an
-1] > ~ap
[an
-1]);
656 MPFR_RET (inexact
* MPFR_INT_SIGN (a
));