1 /* mpfr_sub1 -- internal function to perform a "real" subtraction
3 Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
4 Contributed by the Arenaire and Cacao projects, INRIA.
6 This file is part of the GNU MPFR Library.
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or (at your
11 option) any later version.
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LIB. If not, write to
20 the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
21 MA 02110-1301, USA. */
23 #include "mpfr-impl.h"
25 /* compute sign(b) * (|b| - |c|), with |b| > |c|, diff_exp = EXP(b) - EXP(c)
26 Returns 0 iff result is exact,
27 a negative value when the result is less than the exact value,
28 a positive value otherwise.
32 mpfr_sub1 (mpfr_ptr a
, mpfr_srcptr b
, mpfr_srcptr c
, mp_rnd_t rnd_mode
)
35 mp_exp_unsigned_t diff_exp
;
36 mp_prec_t cancel
, cancel1
;
37 mp_size_t cancel2
, an
, bn
, cn
, cn0
;
38 mp_limb_t
*ap
, *bp
, *cp
;
39 mp_limb_t carry
, bb
, cc
, borrow
= 0;
40 int inexact
, shift_b
, shift_c
, is_exact
= 1, down
= 0, add_exp
= 0;
42 MPFR_TMP_DECL(marker
);
44 MPFR_TMP_MARK(marker
);
46 an
= MPFR_LIMB_SIZE(a
);
48 sign
= mpfr_cmp2 (b
, c
, &cancel
);
49 if (MPFR_UNLIKELY(sign
== 0))
51 if (rnd_mode
== GMP_RNDD
)
60 * If subtraction: sign(a) = sign * sign(b)
61 * If addition: sign(a) = sign of the larger argument in absolute value.
63 * Both cases can be simplidied in:
65 * if addition: sign(a) = sign * sign(b) = sign(b)
66 * if subtraction, b is greater, so sign(a) = sign(b)
68 * if subtraction, sign(a) = - sign(b)
69 * if addition, sign(a) = sign(c) (since c is greater)
70 * But if it is an addition, sign(b) and sign(c) are opposed!
71 * So sign(a) = - sign(b)
74 if (sign
< 0) /* swap b and c so that |b| > |c| */
77 MPFR_SET_OPPOSITE_SIGN (a
,b
);
81 MPFR_SET_SAME_SIGN (a
,b
);
83 /* Check if c is too small.
84 A more precise test is to replace 2 by
85 (rnd == GMP_RNDN) + mpfr_power2_raw (b)
86 but it is more expensive and not very useful */
87 if (MPFR_UNLIKELY (MPFR_GET_EXP (c
) <= MPFR_GET_EXP (b
)
88 - (mp_exp_t
) MAX (MPFR_PREC (a
), MPFR_PREC (b
)) - 2))
90 /* Remember, we can't have an exact result! */
91 /* A.AAAAAAAAAAAAAAAAA
94 /* A = S*ABS(B) +/- ulp(a) */
95 MPFR_SET_EXP (a
, MPFR_GET_EXP (b
));
96 MPFR_RNDRAW_EVEN (inexact
, a
, MPFR_MANT (b
), MPFR_PREC (b
),
97 rnd_mode
, MPFR_SIGN (a
),
98 if (MPFR_UNLIKELY ( ++MPFR_EXP (a
) > __gmpfr_emax
))
99 inexact
= mpfr_overflow (a
, rnd_mode
, MPFR_SIGN (a
)));
100 /* inexact = mpfr_set4 (a, b, rnd_mode, MPFR_SIGN (a)); */
104 But we know it isn't (Since we have to remove `c')
105 So if we round to Zero, we have to remove one ulp.
106 Otherwise the result is correctly rounded. */
107 if (MPFR_IS_LIKE_RNDZ (rnd_mode
, MPFR_IS_NEG (a
)))
110 MPFR_RET (- MPFR_INT_SIGN (a
));
112 MPFR_RET (MPFR_INT_SIGN (a
));
119 /* It isn't exact so Prec(b) > Prec(a) and the last
120 Prec(b)-Prec(a) bits of `b' are not zeros.
121 Which means that removing c from b can't generate a carry
122 execpt in case of even rounding.
123 In all other case the result and the inexact flag should be
124 correct (We can't have an exact result).
125 In case of EVEN rounding:
128 = 1.BBBBBBBBBBBBBx01 Rounded to Prec(b)
129 = 1.BBBBBBBBBBBBBx Nearest / Rounded to Prec(a)
131 1.BBBBBBBBBBBBB0 if inexact == EVEN_INEX (x == 0)
132 1.BBBBBBBBBBBBB1+1 if inexact == -EVEN_INEX (x == 1)
133 which means we get a wrong rounded result if x==1,
134 i.e. inexact= MPFR_EVEN_INEX */
135 if (MPFR_UNLIKELY (inexact
== MPFR_EVEN_INEX
*MPFR_INT_SIGN (a
)))
138 inexact
= -MPFR_INT_SIGN (a
);
144 diff_exp
= (mp_exp_unsigned_t
) MPFR_GET_EXP (b
) - MPFR_GET_EXP (c
);
146 /* reserve a space to store b aligned with the result, i.e. shifted by
147 (-cancel) % BITS_PER_MP_LIMB to the right */
148 bn
= MPFR_LIMB_SIZE (b
);
149 MPFR_UNSIGNED_MINUS_MODULO (shift_b
, cancel
);
150 cancel1
= (cancel
+ shift_b
) / BITS_PER_MP_LIMB
;
152 /* the high cancel1 limbs from b should not be taken into account */
153 if (MPFR_UNLIKELY (shift_b
== 0))
155 bp
= MPFR_MANT(b
); /* no need of an extra space */
156 /* Ensure ap != bp */
157 if (MPFR_UNLIKELY (ap
== bp
))
159 bp
= (mp_ptr
) MPFR_TMP_ALLOC(bn
* BYTES_PER_MP_LIMB
);
160 MPN_COPY (bp
, ap
, bn
);
165 bp
= (mp_ptr
) MPFR_TMP_ALLOC ((bn
+ 1) * BYTES_PER_MP_LIMB
);
166 bp
[0] = mpn_rshift (bp
+ 1, MPFR_MANT(b
), bn
++, shift_b
);
169 /* reserve a space to store c aligned with the result, i.e. shifted by
170 (diff_exp-cancel) % BITS_PER_MP_LIMB to the right */
171 cn
= MPFR_LIMB_SIZE(c
);
172 if ((UINT_MAX
% BITS_PER_MP_LIMB
) == (BITS_PER_MP_LIMB
-1)
173 && ((-(unsigned) 1)%BITS_PER_MP_LIMB
> 0))
174 shift_c
= (diff_exp
- cancel
) % BITS_PER_MP_LIMB
;
177 shift_c
= diff_exp
- (cancel
% BITS_PER_MP_LIMB
);
178 shift_c
= (shift_c
+ BITS_PER_MP_LIMB
) % BITS_PER_MP_LIMB
;
180 MPFR_ASSERTD( shift_c
>= 0 && shift_c
< BITS_PER_MP_LIMB
);
182 if (MPFR_UNLIKELY(shift_c
== 0))
185 /* Ensure ap != cp */
188 cp
= (mp_ptr
) MPFR_TMP_ALLOC (cn
* BYTES_PER_MP_LIMB
);
189 MPN_COPY(cp
, ap
, cn
);
194 cp
= (mp_ptr
) MPFR_TMP_ALLOC ((cn
+ 1) * BYTES_PER_MP_LIMB
);
195 cp
[0] = mpn_rshift (cp
+ 1, MPFR_MANT(c
), cn
++, shift_c
);
199 printf ("shift_b=%d shift_c=%d diffexp=%lu\n", shift_b
, shift_c
,
200 (unsigned long) diff_exp
);
203 MPFR_ASSERTD (ap
!= cp
);
204 MPFR_ASSERTD (bp
!= cp
);
206 /* here we have shift_c = (diff_exp - cancel) % BITS_PER_MP_LIMB,
207 0 <= shift_c < BITS_PER_MP_LIMB
208 thus we want cancel2 = ceil((cancel - diff_exp) / BITS_PER_MP_LIMB) */
210 cancel2
= (long int) (cancel
- (diff_exp
- shift_c
)) / BITS_PER_MP_LIMB
;
211 /* the high cancel2 limbs from b should not be taken into account */
213 printf ("cancel=%lu cancel1=%lu cancel2=%ld\n",
214 (unsigned long) cancel
, (unsigned long) cancel1
, (long) cancel2
);
218 <----------------+-----------|---->
219 <----------PREC(a)----------><-sh->
221 limbs bp[bn-cancel1-1]
222 <--...-----><----------------+-----------+----------->
224 limbs cp[cn-cancel2-1] cancel2 >= 0
225 <--...--><----------------+----------------+---------------->
226 (-cancel2) cancel2 < 0
227 limbs <----------------+---------------->
230 /* first part: put in ap[0..an-1] the value of high(b) - high(c),
231 where high(b) consists of the high an+cancel1 limbs of b,
232 and high(c) consists of the high an+cancel2 limbs of c.
235 /* copy high(b) into a */
236 if (MPFR_LIKELY(an
+ (mp_size_t
) cancel1
<= bn
))
237 /* a: <----------------+-----------|---->
238 b: <-----------------------------------------> */
239 MPN_COPY (ap
, bp
+ bn
- (an
+ cancel1
), an
);
241 /* a: <----------------+-----------|---->
242 b: <-------------------------> */
243 if ((mp_size_t
) cancel1
< bn
) /* otherwise b does not overlap with a */
245 MPN_ZERO (ap
, an
+ cancel1
- bn
);
246 MPN_COPY (ap
+ an
+ cancel1
- bn
, bp
, bn
- cancel1
);
252 printf("after copying high(b), a="); mpfr_print_binary(a
); putchar('\n');
255 /* subtract high(c) */
256 if (MPFR_LIKELY(an
+ cancel2
> 0)) /* otherwise c does not overlap with a */
262 if (an
+ cancel2
<= cn
)
263 /* a: <----------------------------->
264 c: <-----------------------------------------> */
265 mpn_sub_n (ap
, ap
, cp
+ cn
- (an
+ cancel2
), an
);
267 /* a: <---------------------------->
268 c: <-------------------------> */
270 ap2
= ap
+ an
+ cancel2
- cn
;
272 mpn_sub_n (ap2
, ap2
, cp
, cn
- cancel2
);
275 else /* cancel2 < 0 */
277 if (an
+ cancel2
<= cn
)
278 /* a: <----------------------------->
279 c: <-----------------------------> */
280 borrow
= mpn_sub_n (ap
, ap
, cp
+ cn
- (an
+ cancel2
),
283 /* a: <---------------------------->
284 c: <----------------> */
286 ap2
= ap
+ an
+ cancel2
- cn
;
287 borrow
= mpn_sub_n (ap2
, ap2
, cp
, cn
);
289 ap2
= ap
+ an
+ cancel2
;
290 mpn_sub_1 (ap2
, ap2
, -cancel2
, borrow
);
295 printf("after subtracting high(c), a=");
296 mpfr_print_binary(a
);
300 /* now perform rounding */
301 sh
= (mp_prec_t
) an
* BITS_PER_MP_LIMB
- MPFR_PREC(a
);
302 /* last unused bits from a */
303 carry
= ap
[0] & MPFR_LIMB_MASK (sh
);
306 if (MPFR_LIKELY(rnd_mode
== GMP_RNDN
))
310 is_exact
= (carry
== 0);
311 /* can decide except when carry = 2^(sh-1) [middle]
312 or carry = 0 [truncate, but cannot decide inexact flag] */
313 down
= (carry
< (MPFR_LIMB_ONE
<< (sh
- 1)));
314 if (carry
> (MPFR_LIMB_ONE
<< (sh
- 1)))
316 else if ((0 < carry
) && down
)
318 inexact
= -1; /* result if smaller than exact value */
323 else /* directed rounding: set rnd_mode to RNDZ iff towards zero */
325 if (MPFR_IS_RNDUTEST_OR_RNDDNOTTEST(rnd_mode
, MPFR_IS_NEG(a
)))
330 if (rnd_mode
== GMP_RNDZ
)
335 else /* round away */
340 /* we have to consider the low (bn - (an+cancel1)) limbs from b,
341 and the (cn - (an+cancel2)) limbs from c. */
344 cn
-= (long int) an
+ cancel2
;
347 printf ("last %d bits from a are %lu, bn=%ld, cn=%ld\n",
348 sh
, (unsigned long) carry
, (long) bn
, (long) cn
);
351 for (k
= 0; (bn
> 0) || (cn
> 0); k
= 1)
354 bb
= (bn
> 0) ? bp
[--bn
] : 0;
355 if ((cn
> 0) && (cn
-- <= cn0
))
360 /* down is set when low(b) < low(c) */
364 /* the case rounding to nearest with sh=0 is special since one couldn't
365 subtract above 1/2 ulp in the trailing limb of the result */
366 if ((rnd_mode
== GMP_RNDN
) && sh
== 0 && k
== 0)
368 mp_limb_t half
= MPFR_LIMB_HIGHBIT
;
370 is_exact
= (bb
== cc
);
372 /* add one ulp if bb > cc + half
373 truncate if cc - half < bb < cc + half
374 sub one ulp if bb < cc - half
394 printf (" bb=%lu cc=%lu down=%d is_exact=%d\n",
395 (unsigned long) bb
, (unsigned long) cc
, down
, is_exact
);
399 if (rnd_mode
== GMP_RNDZ
)
401 else if (rnd_mode
!= GMP_RNDN
) /* round away */
406 else /* round to nearest: special case here since for sh=k=0
407 bb = bb0 - MPFR_LIMB_HIGHBIT */
409 if (is_exact
&& sh
== 0)
411 /* For k=0 we can't decide exactness since it may depend
413 For k=1, the first low limbs matched: low(b)-low(c)<0. */
420 else if (down
&& sh
== 0)
424 inexact
= (is_exact
) ? 1 : -1;
431 if (rnd_mode
== GMP_RNDZ
)
436 else if (rnd_mode
!= GMP_RNDN
) /* round away */
438 else /* round to nearest */
456 if ((rnd_mode
== GMP_RNDN
) && !is_exact
)
458 /* even rounding rule */
459 if ((ap
[0] >> sh
) & 1)
467 inexact
= (down
) ? 1 : -1;
473 sub_one_ulp
: /* sub one unit in last place to a */
474 mpn_sub_1 (ap
, ap
, an
, MPFR_LIMB_ONE
<< sh
);
478 add_one_ulp
: /* add one unit in last place to a */
479 if (MPFR_UNLIKELY(mpn_add_1 (ap
, ap
, an
, MPFR_LIMB_ONE
<< sh
)))
480 /* result is a power of 2: 11111111111111 + 1 = 1000000000000000 */
482 ap
[an
-1] = MPFR_LIMB_HIGHBIT
;
485 inexact
= 1; /* result larger than exact value */
488 if (MPFR_UNLIKELY((ap
[an
-1] >> (BITS_PER_MP_LIMB
- 1)) == 0))
489 /* case 1 - epsilon */
491 ap
[an
-1] = MPFR_LIMB_HIGHBIT
;
496 /* we have to set MPFR_EXP(a) to MPFR_EXP(b) - cancel + add_exp, taking
497 care of underflows/overflows in that computation, and of the allowed
499 if (MPFR_LIKELY(cancel
))
503 cancel
-= add_exp
; /* still valid as unsigned long */
504 exp_a
= MPFR_GET_EXP (b
) - cancel
;
505 if (MPFR_UNLIKELY(exp_a
< __gmpfr_emin
))
507 MPFR_TMP_FREE(marker
);
508 if (rnd_mode
== GMP_RNDN
&&
509 (exp_a
< __gmpfr_emin
- 1 ||
510 (inexact
>= 0 && mpfr_powerof2_raw (a
))))
512 return mpfr_underflow (a
, rnd_mode
, MPFR_SIGN(a
));
514 MPFR_SET_EXP (a
, exp_a
);
516 else /* cancel = 0: MPFR_EXP(a) <- MPFR_EXP(b) + add_exp */
518 /* in case cancel = 0, add_exp can still be 1, in case b is just
519 below a power of two, c is very small, prec(a) < prec(b),
520 and rnd=away or nearest */
523 exp_b
= MPFR_GET_EXP (b
);
524 if (MPFR_UNLIKELY(add_exp
&& exp_b
== __gmpfr_emax
))
526 MPFR_TMP_FREE(marker
);
527 return mpfr_overflow (a
, rnd_mode
, MPFR_SIGN(a
));
529 MPFR_SET_EXP (a
, exp_b
+ add_exp
);
531 MPFR_TMP_FREE(marker
);
533 printf ("result is a="); mpfr_print_binary(a
); putchar('\n');
535 /* check that result is msb-normalized */
536 MPFR_ASSERTD(ap
[an
-1] > ~ap
[an
-1]);
537 MPFR_RET (inexact
* MPFR_INT_SIGN (a
));