2 Copyright (C) 2007, 2009, 2011-2024 Free Software Foundation, Inc.
4 This file is free software: you can redistribute it and/or modify
5 it under the terms of the GNU Lesser General Public License as
6 published by the Free Software Foundation, either version 3 of the
7 License, or (at your option) any later version.
9 This file is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Bruno Haible <bruno@clisp.org>, 2011. */
19 #if ! defined USE_LONG_DOUBLE
34 #include "integer_length.h"
36 #ifdef USE_LONG_DOUBLE
38 # define DOUBLE long double
41 # define MIN_EXP LDBL_MIN_EXP
42 # define MANT_BIT LDBL_MANT_BIT
43 # define L_(literal) literal##L
44 #elif ! defined USE_FLOAT
46 # define DOUBLE double
49 # define MIN_EXP DBL_MIN_EXP
50 # define MANT_BIT DBL_MANT_BIT
51 # define L_(literal) literal
52 #else /* defined USE_FLOAT */
57 # define MIN_EXP FLT_MIN_EXP
58 # define MANT_BIT FLT_MANT_BIT
59 # define L_(literal) literal##f
63 #define MAX(a,b) ((a) > (b) ? (a) : (b))
66 #define MIN(a,b) ((a) < (b) ? (a) : (b))
68 /* MSVC with option -fp:strict refuses to compile constant initializers that
69 contain floating-point operations. Pacify this compiler. */
70 #if defined _MSC_VER && !defined __clang__
71 # pragma fenv_access (off)
74 /* Work around GCC 4.2.1 bug on OpenBSD 5.1/SPARC64. */
75 #if defined __GNUC__ && defined __sparc__
76 # define VOLATILE volatile
81 /* It is possible to write an implementation of fused multiply-add with
82 floating-point operations alone. See
83 Sylvie Boldo, Guillaume Melquiond:
84 Emulation of FMA and correctly-rounded sums: proved algorithms using
86 <https://www.lri.fr/~melquion/doc/08-tc.pdf>
87 But is it complicated.
88 Here we take the simpler (and probably slower) approach of doing
89 multi-precision arithmetic. */
91 /* We use the naming conventions of GNU gmp, but vastly simpler (and slower)
94 typedef unsigned int mp_limb_t
;
95 #define GMP_LIMB_BITS 32
96 static_assert (sizeof (mp_limb_t
) * CHAR_BIT
== GMP_LIMB_BITS
);
98 typedef unsigned long long mp_twolimb_t
;
99 #define GMP_TWOLIMB_BITS 64
100 static_assert (sizeof (mp_twolimb_t
) * CHAR_BIT
== GMP_TWOLIMB_BITS
);
102 /* Number of limbs needed for a single DOUBLE. */
103 #define NLIMBS1 ((MANT_BIT + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS)
105 /* Number of limbs needed for the accumulator. */
106 #define NLIMBS3 (3 * NLIMBS1 + 1)
108 /* Assuming 0.5 <= x < 1.0:
109 Convert the mantissa (x * 2^DBL_MANT_BIT) to a sequence of limbs. */
111 decode (DOUBLE x
, mp_limb_t limbs
[NLIMBS1
])
113 /* I'm not sure whether it's safe to cast a 'double' value between
114 2^31 and 2^32 to 'unsigned int', therefore play safe and cast only
115 'double' values between 0 and 2^31 (to 'unsigned int' or 'int',
117 So, we split the MANT_BIT bits of x into a number of chunks of
119 enum { chunk_count
= (MANT_BIT
- 1) / 31 + 1 };
120 /* Variables used for storing the bits limb after limb. */
121 mp_limb_t
*p
= limbs
+ NLIMBS1
- 1;
123 unsigned int bits_needed
= MANT_BIT
- (NLIMBS1
- 1) * GMP_LIMB_BITS
;
124 /* The bits bits_needed-1...0 need to be ORed into the accu.
125 1 <= bits_needed <= GMP_LIMB_BITS. */
126 /* Unroll the first 4 loop rounds. */
129 /* Here we still have MANT_BIT-0*31 bits to extract from x. */
130 enum { chunk_bits
= MIN (31, MANT_BIT
- 0 * 31) }; /* > 0, <= 31 */
132 x
*= (mp_limb_t
) 1 << chunk_bits
;
133 d
= (int) x
; /* 0 <= d < 2^chunk_bits. */
135 if (!(x
>= L_(0.0) && x
< L_(1.0)))
137 if (bits_needed
< chunk_bits
)
139 /* store bits_needed bits */
140 accu
|= d
>> (chunk_bits
- bits_needed
);
145 /* hold (chunk_bits - bits_needed) bits */
146 accu
= d
<< (GMP_LIMB_BITS
- (chunk_bits
- bits_needed
));
147 bits_needed
= GMP_LIMB_BITS
- (chunk_bits
- bits_needed
);
151 /* store chunk_bits bits */
152 accu
|= d
<< (bits_needed
- chunk_bits
);
153 bits_needed
-= chunk_bits
;
154 if (bits_needed
== 0)
161 bits_needed
= GMP_LIMB_BITS
;
167 /* Here we still have MANT_BIT-1*31 bits to extract from x. */
168 enum { chunk_bits
= MIN (31, MAX (MANT_BIT
- 1 * 31, 0)) }; /* > 0, <= 31 */
170 x
*= (mp_limb_t
) 1 << chunk_bits
;
171 d
= (int) x
; /* 0 <= d < 2^chunk_bits. */
173 if (!(x
>= L_(0.0) && x
< L_(1.0)))
175 if (bits_needed
< chunk_bits
)
177 /* store bits_needed bits */
178 accu
|= d
>> (chunk_bits
- bits_needed
);
183 /* hold (chunk_bits - bits_needed) bits */
184 accu
= d
<< (GMP_LIMB_BITS
- (chunk_bits
- bits_needed
));
185 bits_needed
= GMP_LIMB_BITS
- (chunk_bits
- bits_needed
);
189 /* store chunk_bits bits */
190 accu
|= d
<< (bits_needed
- chunk_bits
);
191 bits_needed
-= chunk_bits
;
192 if (bits_needed
== 0)
199 bits_needed
= GMP_LIMB_BITS
;
205 /* Here we still have MANT_BIT-2*31 bits to extract from x. */
206 enum { chunk_bits
= MIN (31, MAX (MANT_BIT
- 2 * 31, 0)) }; /* > 0, <= 31 */
208 x
*= (mp_limb_t
) 1 << chunk_bits
;
209 d
= (int) x
; /* 0 <= d < 2^chunk_bits. */
211 if (!(x
>= L_(0.0) && x
< L_(1.0)))
213 if (bits_needed
< chunk_bits
)
215 /* store bits_needed bits */
216 accu
|= d
>> (chunk_bits
- bits_needed
);
221 /* hold (chunk_bits - bits_needed) bits */
222 accu
= d
<< (GMP_LIMB_BITS
- (chunk_bits
- bits_needed
));
223 bits_needed
= GMP_LIMB_BITS
- (chunk_bits
- bits_needed
);
227 /* store chunk_bits bits */
228 accu
|= d
<< (bits_needed
- chunk_bits
);
229 bits_needed
-= chunk_bits
;
230 if (bits_needed
== 0)
237 bits_needed
= GMP_LIMB_BITS
;
243 /* Here we still have MANT_BIT-3*31 bits to extract from x. */
244 enum { chunk_bits
= MIN (31, MAX (MANT_BIT
- 3 * 31, 0)) }; /* > 0, <= 31 */
246 x
*= (mp_limb_t
) 1 << chunk_bits
;
247 d
= (int) x
; /* 0 <= d < 2^chunk_bits. */
249 if (!(x
>= L_(0.0) && x
< L_(1.0)))
251 if (bits_needed
< chunk_bits
)
253 /* store bits_needed bits */
254 accu
|= d
>> (chunk_bits
- bits_needed
);
259 /* hold (chunk_bits - bits_needed) bits */
260 accu
= d
<< (GMP_LIMB_BITS
- (chunk_bits
- bits_needed
));
261 bits_needed
= GMP_LIMB_BITS
- (chunk_bits
- bits_needed
);
265 /* store chunk_bits bits */
266 accu
|= d
<< (bits_needed
- chunk_bits
);
267 bits_needed
-= chunk_bits
;
268 if (bits_needed
== 0)
275 bits_needed
= GMP_LIMB_BITS
;
281 /* Here we still have MANT_BIT-4*31 bits to extract from x. */
284 for (k
= 4; k
< chunk_count
; k
++)
286 size_t chunk_bits
= MIN (31, MANT_BIT
- k
* 31); /* > 0, <= 31 */
288 x
*= (mp_limb_t
) 1 << chunk_bits
;
289 d
= (int) x
; /* 0 <= d < 2^chunk_bits. */
291 if (!(x
>= L_(0.0) && x
< L_(1.0)))
293 if (bits_needed
< chunk_bits
)
295 /* store bits_needed bits */
296 accu
|= d
>> (chunk_bits
- bits_needed
);
301 /* hold (chunk_bits - bits_needed) bits */
302 accu
= d
<< (GMP_LIMB_BITS
- (chunk_bits
- bits_needed
));
303 bits_needed
= GMP_LIMB_BITS
- (chunk_bits
- bits_needed
);
307 /* store chunk_bits bits */
308 accu
|= d
<< (bits_needed
- chunk_bits
);
309 bits_needed
-= chunk_bits
;
310 if (bits_needed
== 0)
317 bits_needed
= GMP_LIMB_BITS
;
322 /* We shouldn't get here. */
326 #ifndef USE_LONG_DOUBLE /* On FreeBSD 6.1/x86, 'long double' numbers sometimes
327 have excess precision. */
333 /* Multiply two sequences of limbs. */
335 multiply (mp_limb_t xlimbs
[NLIMBS1
], mp_limb_t ylimbs
[NLIMBS1
],
336 mp_limb_t prod_limbs
[2 * NLIMBS1
])
339 enum { len1
= NLIMBS1
};
340 enum { len2
= NLIMBS1
};
342 for (k
= len2
; k
> 0; )
344 for (i
= 0; i
< len1
; i
++)
346 mp_limb_t digit1
= xlimbs
[i
];
347 mp_twolimb_t carry
= 0;
348 for (j
= 0; j
< len2
; j
++)
350 mp_limb_t digit2
= ylimbs
[j
];
351 carry
+= (mp_twolimb_t
) digit1
* (mp_twolimb_t
) digit2
;
352 carry
+= prod_limbs
[i
+ j
];
353 prod_limbs
[i
+ j
] = (mp_limb_t
) carry
;
354 carry
= carry
>> GMP_LIMB_BITS
;
356 prod_limbs
[i
+ len2
] = (mp_limb_t
) carry
;
361 FUNC (DOUBLE x
, DOUBLE y
, DOUBLE z
)
363 if (isfinite (x
) && isfinite (y
))
367 /* x, y, z are all finite. */
368 if (x
== L_(0.0) || y
== L_(0.0))
372 /* x, y, z are all non-zero.
373 The result is x * y + z. */
375 int e
; /* exponent of x * y + z */
377 mp_limb_t sum
[NLIMBS3
];
381 int xys
; /* sign of x * y */
382 int zs
; /* sign of z */
383 int xye
; /* sum of exponents of x and y */
384 int ze
; /* exponent of z */
385 mp_limb_t summand1
[NLIMBS3
];
387 mp_limb_t summand2
[NLIMBS3
];
391 mp_limb_t zlimbs
[NLIMBS1
];
392 mp_limb_t xylimbs
[2 * NLIMBS1
];
395 DOUBLE xn
; /* normalized part of x */
396 DOUBLE yn
; /* normalized part of y */
397 DOUBLE zn
; /* normalized part of z */
398 int xe
; /* exponent of x */
399 int ye
; /* exponent of y */
400 mp_limb_t xlimbs
[NLIMBS1
];
401 mp_limb_t ylimbs
[NLIMBS1
];
425 /* xn, yn, zn are all positive.
426 The result is (-1)^xys * xn * yn + (-1)^zs * zn. */
427 xn
= FREXP (xn
, &xe
);
428 yn
= FREXP (yn
, &ye
);
429 zn
= FREXP (zn
, &ze
);
431 /* xn, yn, zn are all < 1.0 and >= 0.5.
433 (-1)^xys * 2^xye * xn * yn + (-1)^zs * 2^ze * zn. */
434 if (xye
< ze
- MANT_BIT
)
436 /* 2^xye * xn * yn < 2^xye <= 2^(ze-MANT_BIT-1) */
439 if (xye
- 2 * MANT_BIT
> ze
)
441 /* 2^ze * zn < 2^ze <= 2^(xye-2*MANT_BIT-1).
444 because it would round differently: A round-to-even
445 in the multiplication can be a round-up or round-down
446 here, due to z. So replace z with a value that doesn't
447 require the use of long bignums but that rounds the
450 ze
= xye
- 2 * MANT_BIT
- 1;
452 /* Convert mantissas of xn, yn, zn to limb sequences:
453 xlimbs = 2^MANT_BIT * xn
454 ylimbs = 2^MANT_BIT * yn
455 zlimbs = 2^MANT_BIT * zn */
459 /* Multiply the mantissas of xn and yn:
460 xylimbs = xlimbs * ylimbs */
461 multiply (xlimbs
, ylimbs
, xylimbs
);
464 (-1)^xys * 2^(xye-2*MANT_BIT) * xylimbs
465 + (-1)^zs * 2^(ze-MANT_BIT) * zlimbs.
467 e = min (xye-2*MANT_BIT, ze-MANT_BIT)
469 summand1 = 2^(xye-2*MANT_BIT-e) * xylimbs
470 summand2 = 2^(ze-MANT_BIT-e) * zlimbs */
471 e
= MIN (xye
- 2 * MANT_BIT
, ze
- MANT_BIT
);
472 if (e
== xye
- 2 * MANT_BIT
)
474 /* Simply copy the limbs of xylimbs. */
476 for (i
= 0; i
< 2 * NLIMBS1
; i
++)
477 summand1
[i
] = xylimbs
[i
];
478 summand1_len
= 2 * NLIMBS1
;
482 size_t ediff
= xye
- 2 * MANT_BIT
- e
;
483 /* Left shift the limbs of xylimbs by ediff bits. */
484 size_t ldiff
= ediff
/ GMP_LIMB_BITS
;
485 size_t shift
= ediff
% GMP_LIMB_BITS
;
487 for (i
= 0; i
< ldiff
; i
++)
492 for (i
= 0; i
< 2 * NLIMBS1
; i
++)
494 summand1
[ldiff
+ i
] = (xylimbs
[i
] << shift
) | carry
;
495 carry
= xylimbs
[i
] >> (GMP_LIMB_BITS
- shift
);
497 summand1
[ldiff
+ 2 * NLIMBS1
] = carry
;
498 summand1_len
= ldiff
+ 2 * NLIMBS1
+ 1;
502 for (i
= 0; i
< 2 * NLIMBS1
; i
++)
503 summand1
[ldiff
+ i
] = xylimbs
[i
];
504 summand1_len
= ldiff
+ 2 * NLIMBS1
;
506 /* Estimation of needed array size:
507 ediff = (xye - 2 * MANT_BIT) - (ze - MANT_BIT) <= MANT_BIT + 1
510 = (ediff + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS + 2 * NLIMBS1
511 <= (MANT_BIT + GMP_LIMB_BITS) / GMP_LIMB_BITS + 2 * NLIMBS1
514 if (!(summand1_len
<= NLIMBS3
))
517 if (e
== ze
- MANT_BIT
)
519 /* Simply copy the limbs of zlimbs. */
521 for (i
= 0; i
< NLIMBS1
; i
++)
522 summand2
[i
] = zlimbs
[i
];
523 summand2_len
= NLIMBS1
;
527 size_t ediff
= ze
- MANT_BIT
- e
;
528 /* Left shift the limbs of zlimbs by ediff bits. */
529 size_t ldiff
= ediff
/ GMP_LIMB_BITS
;
530 size_t shift
= ediff
% GMP_LIMB_BITS
;
532 for (i
= 0; i
< ldiff
; i
++)
537 for (i
= 0; i
< NLIMBS1
; i
++)
539 summand2
[ldiff
+ i
] = (zlimbs
[i
] << shift
) | carry
;
540 carry
= zlimbs
[i
] >> (GMP_LIMB_BITS
- shift
);
542 summand2
[ldiff
+ NLIMBS1
] = carry
;
543 summand2_len
= ldiff
+ NLIMBS1
+ 1;
547 for (i
= 0; i
< NLIMBS1
; i
++)
548 summand2
[ldiff
+ i
] = zlimbs
[i
];
549 summand2_len
= ldiff
+ NLIMBS1
;
551 /* Estimation of needed array size:
552 ediff = (ze - MANT_BIT) - (xye - 2 * MANT_BIT) <= 2 * MANT_BIT
555 = (ediff + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS + NLIMBS1
556 <= (2 * MANT_BIT + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS + NLIMBS1
559 if (!(summand2_len
<= NLIMBS3
))
564 (-1)^xys * 2^e * summand1 + (-1)^zs * 2^e * summand2. */
567 /* Perform an addition. */
573 for (i
= 0; i
< MIN (summand1_len
, summand2_len
); i
++)
575 mp_limb_t digit1
= summand1
[i
];
576 mp_limb_t digit2
= summand2
[i
];
577 sum
[i
] = digit1
+ digit2
+ carry
;
579 ? digit1
>= (mp_limb_t
)-1 - digit2
580 : digit1
> (mp_limb_t
)-1 - digit2
);
582 if (summand1_len
> summand2_len
)
583 for (; i
< summand1_len
; i
++)
585 mp_limb_t digit1
= summand1
[i
];
586 sum
[i
] = carry
+ digit1
;
587 carry
= carry
&& digit1
== (mp_limb_t
)-1;
590 for (; i
< summand2_len
; i
++)
592 mp_limb_t digit2
= summand2
[i
];
593 sum
[i
] = carry
+ digit2
;
594 carry
= carry
&& digit2
== (mp_limb_t
)-1;
602 /* Perform a subtraction. */
603 /* Compare summand1 and summand2 by magnitude. */
604 while (summand1
[summand1_len
- 1] == 0)
606 while (summand2
[summand2_len
- 1] == 0)
608 if (summand1_len
> summand2_len
)
610 else if (summand1_len
< summand2_len
)
614 size_t i
= summand1_len
;
618 if (summand1
[i
] > summand2
[i
])
623 if (summand1
[i
] < summand2
[i
])
629 /* summand1 and summand2 are equal. */
635 /* Compute summand1 - summand2. */
640 for (i
= 0; i
< summand2_len
; i
++)
642 mp_limb_t digit1
= summand1
[i
];
643 mp_limb_t digit2
= summand2
[i
];
644 sum
[i
] = digit1
- digit2
- carry
;
645 carry
= (carry
? digit1
<= digit2
: digit1
< digit2
);
647 for (; i
< summand1_len
; i
++)
649 mp_limb_t digit1
= summand1
[i
];
650 sum
[i
] = digit1
- carry
;
651 carry
= carry
&& digit1
== 0;
655 sum_len
= summand1_len
;
659 /* Compute summand2 - summand1. */
664 for (i
= 0; i
< summand1_len
; i
++)
666 mp_limb_t digit1
= summand1
[i
];
667 mp_limb_t digit2
= summand2
[i
];
668 sum
[i
] = digit2
- digit1
- carry
;
669 carry
= (carry
? digit2
<= digit1
: digit2
< digit1
);
671 for (; i
< summand2_len
; i
++)
673 mp_limb_t digit2
= summand2
[i
];
674 sum
[i
] = digit2
- carry
;
675 carry
= carry
&& digit2
== 0;
679 sum_len
= summand2_len
;
684 (-1)^sign * 2^e * sum. */
685 /* Now perform the rounding to MANT_BIT mantissa bits. */
686 while (sum
[sum_len
- 1] == 0)
688 /* Here we know that the most significant limb, sum[sum_len - 1], is
691 /* How many bits the sum has. */
692 unsigned int sum_bits
=
693 integer_length (sum
[sum_len
- 1]) + (sum_len
- 1) * GMP_LIMB_BITS
;
694 /* How many bits to keep when rounding. */
695 unsigned int keep_bits
;
696 /* How many bits to round off. */
697 unsigned int roundoff_bits
;
698 if (e
+ (int) sum_bits
>= MIN_EXP
)
699 /* 2^e * sum >= 2^(MIN_EXP-1).
700 result will be a normalized number. */
701 keep_bits
= MANT_BIT
;
702 else if (e
+ (int) sum_bits
>= MIN_EXP
- MANT_BIT
)
703 /* 2^e * sum >= 2^(MIN_EXP-MANT_BIT-1).
704 result will be a denormalized number or rounded to zero. */
705 keep_bits
= e
+ (int) sum_bits
- (MIN_EXP
+ MANT_BIT
);
707 /* 2^e * sum < 2^(MIN_EXP-MANT_BIT-1). Round to zero. */
709 /* Note: 0 <= keep_bits <= MANT_BIT. */
710 if (sum_bits
<= keep_bits
)
714 keep_bits
= sum_bits
;
719 roundoff_bits
= sum_bits
- keep_bits
; /* > 0, <= sum_bits */
721 #if HAVE_FEGETROUND && defined FE_TOWARDZERO
722 /* Cf. <https://pubs.opengroup.org/onlinepubs/9699919799/functions/fegetround.html> */
723 int rounding_mode
= fegetround ();
724 if (rounding_mode
== FE_TOWARDZERO
)
726 # if defined FE_DOWNWARD /* not defined on sh4 */
727 else if (rounding_mode
== FE_DOWNWARD
)
730 # if defined FE_UPWARD /* not defined on sh4 */
731 else if (rounding_mode
== FE_UPWARD
)
735 /* Cf. <https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/float.h.html> */
736 int rounding_mode
= FLT_ROUNDS
;
737 if (rounding_mode
== 0) /* toward zero */
739 else if (rounding_mode
== 3) /* toward negative infinity */
741 else if (rounding_mode
== 2) /* toward positive infinity */
746 /* Round to nearest. */
748 /* Test bit (roundoff_bits-1). */
749 if ((sum
[(roundoff_bits
- 1) / GMP_LIMB_BITS
]
750 >> ((roundoff_bits
- 1) % GMP_LIMB_BITS
)) & 1)
752 /* Test bits roundoff_bits-1 .. 0. */
754 ((sum
[(roundoff_bits
- 1) / GMP_LIMB_BITS
]
755 & (((mp_limb_t
) 1 << ((roundoff_bits
- 1) % GMP_LIMB_BITS
)) - 1))
760 for (i
= (roundoff_bits
- 1) / GMP_LIMB_BITS
- 1; i
>= 0; i
--)
768 /* Round to even. Test bit roundoff_bits. */
769 round_up
= ((sum
[roundoff_bits
/ GMP_LIMB_BITS
]
770 >> (roundoff_bits
% GMP_LIMB_BITS
)) & 1);
777 /* Perform the rounding. */
779 size_t i
= roundoff_bits
/ GMP_LIMB_BITS
;
790 | (((mp_limb_t
) 1 << (roundoff_bits
% GMP_LIMB_BITS
)) - 1))
794 /* Propagate carry. */
795 while (i
< sum_len
- 1)
803 /* sum[i] is the most significant limb that was
805 if (i
== sum_len
- 1 && (sum
[i
] & (sum
[i
] - 1)) == 0)
807 /* Through the carry, one more bit is needed. */
812 /* Instead of requiring one more limb of memory,
813 perform a shift by one bit, and adjust the
815 sum
[i
] = (mp_limb_t
) 1 << (GMP_LIMB_BITS
- 1);
818 /* The bit sequence has the form 1000...000. */
825 sum
[i
] &= ((mp_limb_t
) -1 << (roundoff_bits
% GMP_LIMB_BITS
));
826 if (i
== sum_len
- 1 && sum
[i
] == 0)
827 /* The entire sum has become zero. */
833 (-1)^sign * 2^e * sum
834 and here we know that
835 2^(sum_bits-1) <= sum < 2^sum_bits,
836 and sum is a multiple of 2^(sum_bits-keep_bits), where
837 0 < keep_bits <= MANT_BIT and keep_bits <= sum_bits.
838 (If keep_bits was initially 0, the rounding either returned zero
839 or produced a bit sequence of the form 1000...000, setting
842 /* Split the keep_bits bits into chunks of at most 32 bits. */
843 unsigned int chunk_count
= (keep_bits
- 1) / GMP_LIMB_BITS
+ 1;
844 /* 1 <= chunk_count <= ceil (sum_bits / GMP_LIMB_BITS) = sum_len. */
845 static const DOUBLE chunk_multiplier
= /* 2^-GMP_LIMB_BITS */
846 L_(1.0) / ((DOUBLE
) (1 << (GMP_LIMB_BITS
/ 2))
847 * (DOUBLE
) (1 << ((GMP_LIMB_BITS
+ 1) / 2)));
848 unsigned int shift
= sum_bits
% GMP_LIMB_BITS
;
850 if (MANT_BIT
<= GMP_LIMB_BITS
)
852 /* Since keep_bits <= MANT_BIT <= GMP_LIMB_BITS,
853 chunk_count is 1. No need for a loop. */
855 fsum
= (DOUBLE
) sum
[sum_len
- 1];
858 ((sum
[sum_len
- 1] << (GMP_LIMB_BITS
- shift
))
859 | (sum_len
>= 2 ? sum
[sum_len
- 2] >> shift
: 0));
867 /* First loop round. */
868 fsum
= (DOUBLE
) sum
[sum_len
- k
- 1];
872 fsum
*= chunk_multiplier
;
873 fsum
+= (DOUBLE
) sum
[sum_len
- k
- 1];
878 /* First loop round. */
880 VOLATILE mp_limb_t chunk
=
881 (sum
[sum_len
- k
- 1] << (GMP_LIMB_BITS
- shift
))
882 | (sum_len
>= k
+ 2 ? sum
[sum_len
- k
- 2] >> shift
: 0);
883 fsum
= (DOUBLE
) chunk
;
888 fsum
*= chunk_multiplier
;
890 VOLATILE mp_limb_t chunk
=
891 (sum
[sum_len
- k
- 1] << (GMP_LIMB_BITS
- shift
))
892 | (sum
[sum_len
- k
- 2] >> shift
);
893 fsum
+= (DOUBLE
) chunk
;
898 fsum
= LDEXP (fsum
, e
+ (int) sum_bits
- GMP_LIMB_BITS
);
899 return (sign
? - fsum
: fsum
);