1 /* Copyright (C) 2007-2024 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 3, or (at your option) any later
10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
24 /*****************************************************************************
26 * Helper add functions (for fma)
28 * __BID_INLINE__ UINT64 get_add64(
29 * UINT64 sign_x, int exponent_x, UINT64 coefficient_x,
30 * UINT64 sign_y, int exponent_y, UINT64 coefficient_y,
33 * __BID_INLINE__ UINT64 get_add128(
34 * UINT64 sign_x, int exponent_x, UINT64 coefficient_x,
35 * UINT64 sign_y, int final_exponent_y, UINT128 CY,
36 * int extra_digits, int rounding_mode)
38 *****************************************************************************
40 * Algorithm description:
42 * get_add64: same as BID64 add, but arguments are unpacked and there
43 * are no special case checks
45 * get_add128: add 64-bit coefficient to 128-bit product (which contains
46 * 16+extra_digits decimal digits),
48 * - the exponents are compared and the two coefficients are
49 * properly aligned for addition/subtraction
50 * - multiple paths are needed
51 * - final result exponent is calculated and the lower term is
52 * rounded first if necessary, to avoid manipulating
53 * coefficients longer than 128 bits
55 ****************************************************************************/
57 #ifndef _INLINE_BID_ADD_H_
58 #define _INLINE_BID_ADD_H_
60 #include "bid_internal.h"
62 #define MAX_FORMAT_DIGITS 16
63 #define DECIMAL_EXPONENT_BIAS 398
64 #define MASK_BINARY_EXPONENT 0x7ff0000000000000ull
65 #define BINARY_EXPONENT_BIAS 0x3ff
66 #define UPPER_EXPON_LIMIT 51
68 ///////////////////////////////////////////////////////////////////////
70 // get_add64() is essentially the same as bid_add(), except that
71 // the arguments are unpacked
73 //////////////////////////////////////////////////////////////////////
75 get_add64 (UINT64 sign_x
, int exponent_x
, UINT64 coefficient_x
,
76 UINT64 sign_y
, int exponent_y
, UINT64 coefficient_y
,
77 int rounding_mode
, unsigned *fpsc
) {
78 UINT128 CA
, CT
, CT_new
;
79 UINT64 sign_a
, sign_b
, coefficient_a
, coefficient_b
, sign_s
, sign_ab
,
81 UINT64 saved_ca
, saved_cb
, C0_64
, C64
, remainder_h
, T1
, carry
, tmp
,
84 int exponent_a
, exponent_b
, diff_dec_expon
;
85 int bin_expon_ca
, extra_digits
, amount
, scale_k
, scale_ca
;
86 unsigned rmode
, status
;
88 // sort arguments by exponent
89 if (exponent_x
<= exponent_y
) {
91 exponent_a
= exponent_y
;
92 coefficient_a
= coefficient_y
;
94 exponent_b
= exponent_x
;
95 coefficient_b
= coefficient_x
;
98 exponent_a
= exponent_x
;
99 coefficient_a
= coefficient_x
;
101 exponent_b
= exponent_y
;
102 coefficient_b
= coefficient_y
;
105 // exponent difference
106 diff_dec_expon
= exponent_a
- exponent_b
;
108 /* get binary coefficients of x and y */
110 //--- get number of bits in the coefficients of x and y ---
112 tempx
.d
= (double) coefficient_a
;
113 bin_expon_ca
= ((tempx
.i
& MASK_BINARY_EXPONENT
) >> 52) - 0x3ff;
115 if (!coefficient_a
) {
116 return get_BID64 (sign_b
, exponent_b
, coefficient_b
, rounding_mode
,
119 if (diff_dec_expon
> MAX_FORMAT_DIGITS
) {
120 // normalize a to a 16-digit coefficient
122 scale_ca
= estimate_decimal_digits
[bin_expon_ca
];
123 if (coefficient_a
>= power10_table_128
[scale_ca
].w
[0])
126 scale_k
= 16 - scale_ca
;
128 coefficient_a
*= power10_table_128
[scale_k
].w
[0];
130 diff_dec_expon
-= scale_k
;
131 exponent_a
-= scale_k
;
133 /* get binary coefficients of x and y */
135 //--- get number of bits in the coefficients of x and y ---
136 tempx
.d
= (double) coefficient_a
;
137 bin_expon_ca
= ((tempx
.i
& MASK_BINARY_EXPONENT
) >> 52) - 0x3ff;
139 if (diff_dec_expon
> MAX_FORMAT_DIGITS
) {
140 #ifdef SET_STATUS_FLAGS
142 __set_status_flags (fpsc
, INEXACT_EXCEPTION
);
146 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
147 #ifndef IEEE_ROUND_NEAREST
148 if (((rounding_mode
) & 3) && coefficient_b
) // not ROUNDING_TO_NEAREST
150 switch (rounding_mode
) {
153 coefficient_a
-= ((((SINT64
) sign_a
) >> 63) | 1);
154 if (coefficient_a
< 1000000000000000ull) {
156 coefficient_a
= 9999999999999999ull;
157 } else if (coefficient_a
>= 10000000000000000ull) {
159 coefficient_a
= 1000000000000000ull;
165 coefficient_a
+= ((((SINT64
) sign_a
) >> 63) | 1);
166 if (coefficient_a
< 1000000000000000ull) {
168 coefficient_a
= 9999999999999999ull;
169 } else if (coefficient_a
>= 10000000000000000ull) {
171 coefficient_a
= 1000000000000000ull;
176 if (sign_a
!= sign_b
) {
178 if (coefficient_a
< 1000000000000000ull) {
180 coefficient_a
= 9999999999999999ull;
188 // check special case here
189 if ((coefficient_a
== 1000000000000000ull)
190 && (diff_dec_expon
== MAX_FORMAT_DIGITS
+ 1)
192 && (coefficient_b
> 5000000000000000ull)) {
193 coefficient_a
= 9999999999999999ull;
197 return get_BID64 (sign_a
, exponent_a
, coefficient_a
,
198 rounding_mode
, fpsc
);
201 // test whether coefficient_a*10^(exponent_a-exponent_b) may exceed 2^62
202 if (bin_expon_ca
+ estimate_bin_expon
[diff_dec_expon
] < 60) {
203 // coefficient_a*10^(exponent_a-exponent_b)<2^63
205 // multiply by 10^(exponent_a-exponent_b)
206 coefficient_a
*= power10_table_128
[diff_dec_expon
].w
[0];
209 sign_b
= ((SINT64
) sign_b
) >> 63;
210 // apply sign to coeff. of b
211 coefficient_b
= (coefficient_b
+ sign_b
) ^ sign_b
;
213 // apply sign to coefficient a
214 sign_a
= ((SINT64
) sign_a
) >> 63;
215 coefficient_a
= (coefficient_a
+ sign_a
) ^ sign_a
;
217 coefficient_a
+= coefficient_b
;
219 sign_s
= ((SINT64
) coefficient_a
) >> 63;
220 coefficient_a
= (coefficient_a
+ sign_s
) ^ sign_s
;
221 sign_s
&= 0x8000000000000000ull
;
223 // coefficient_a < 10^16 ?
224 if (coefficient_a
< power10_table_128
[MAX_FORMAT_DIGITS
].w
[0]) {
225 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
226 #ifndef IEEE_ROUND_NEAREST
227 if (rounding_mode
== ROUNDING_DOWN
&& (!coefficient_a
)
229 sign_s
= 0x8000000000000000ull
;
232 return get_BID64 (sign_s
, exponent_b
, coefficient_a
,
233 rounding_mode
, fpsc
);
235 // otherwise rounding is necessary
237 // already know coefficient_a<10^19
238 // coefficient_a < 10^17 ?
239 if (coefficient_a
< power10_table_128
[17].w
[0])
241 else if (coefficient_a
< power10_table_128
[18].w
[0])
246 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
247 #ifndef IEEE_ROUND_NEAREST
248 rmode
= rounding_mode
;
249 if (sign_s
&& (unsigned) (rmode
- 1) < 2)
257 coefficient_a
+= round_const_table
[rmode
][extra_digits
];
259 // get P*(2^M[extra_digits])/10^extra_digits
260 __mul_64x64_to_128 (CT
, coefficient_a
,
261 reciprocals10_64
[extra_digits
]);
263 // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
264 amount
= short_recip_scale
[extra_digits
];
265 C64
= CT
.w
[1] >> amount
;
268 // coefficient_a*10^(exponent_a-exponent_b) is large
271 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
272 #ifndef IEEE_ROUND_NEAREST
273 rmode
= rounding_mode
;
274 if (sign_s
&& (unsigned) (rmode
- 1) < 2)
283 // check whether we can take faster path
284 scale_ca
= estimate_decimal_digits
[bin_expon_ca
];
286 sign_ab
= sign_a
^ sign_b
;
287 sign_ab
= ((SINT64
) sign_ab
) >> 63;
289 // T1 = 10^(16-diff_dec_expon)
290 T1
= power10_table_128
[16 - diff_dec_expon
].w
[0];
292 // get number of digits in coefficient_a
293 //P_ca = power10_table_128[scale_ca].w[0];
294 //P_ca_m1 = power10_table_128[scale_ca-1].w[0];
295 if (coefficient_a
>= power10_table_128
[scale_ca
].w
[0]) {
298 //P_ca = power10_table_128[scale_ca].w[0];
301 scale_k
= 16 - scale_ca
;
304 //Ts = (T1 + sign_ab) ^ sign_ab;
307 //X = coefficient_a + Ts - P_ca_m1;
310 saved_ca
= coefficient_a
- T1
;
312 (SINT64
) saved_ca
*(SINT64
) power10_table_128
[scale_k
].w
[0];
313 extra_digits
= diff_dec_expon
- scale_k
;
316 saved_cb
= (coefficient_b
+ sign_ab
) ^ sign_ab
;
317 // add 10^16 and rounding constant
319 saved_cb
+ 10000000000000000ull +
320 round_const_table
[rmode
][extra_digits
];
322 // get P*(2^M[extra_digits])/10^extra_digits
323 __mul_64x64_to_128 (CT
, coefficient_b
,
324 reciprocals10_64
[extra_digits
]);
326 // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
327 amount
= short_recip_scale
[extra_digits
];
328 C0_64
= CT
.w
[1] >> amount
;
330 // result coefficient
331 C64
= C0_64
+ coefficient_a
;
332 // filter out difficult (corner) cases
333 // the following test is equivalent to
334 // ( (initial_coefficient_a + Ts) < P_ca &&
335 // (initial_coefficient_a + Ts) > P_ca_m1 ),
336 // which ensures the number of digits in coefficient_a does not change
337 // after adding (the appropriately scaled and rounded) coefficient_b
338 if ((UINT64
) (C64
- 1000000000000000ull - 1) >
339 9000000000000000ull - 2) {
340 if (C64
>= 10000000000000000ull) {
341 // result has more than 16 digits
343 // must divide coeff_a by 10
344 saved_ca
= saved_ca
+ T1
;
345 __mul_64x64_to_128 (CA
, saved_ca
, 0x3333333333333334ull
);
346 //reciprocals10_64[1]);
347 coefficient_a
= CA
.w
[1] >> 1;
349 saved_ca
- (coefficient_a
<< 3) - (coefficient_a
<< 1);
350 coefficient_a
= coefficient_a
- T1
;
353 /*90000000000000000 */ +rem_a
*
354 power10_table_128
[diff_dec_expon
].w
[0];
357 (SINT64
) (saved_ca
- T1
-
358 (T1
<< 3)) * (SINT64
) power10_table_128
[scale_k
-
363 saved_cb
+ 100000000000000000ull +
364 round_const_table
[rmode
][extra_digits
];
366 // get P*(2^M[extra_digits])/10^extra_digits
367 __mul_64x64_to_128 (CT
, coefficient_b
,
368 reciprocals10_64
[extra_digits
]);
370 // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
371 amount
= short_recip_scale
[extra_digits
];
372 C0_64
= CT
.w
[1] >> amount
;
374 // result coefficient
375 C64
= C0_64
+ coefficient_a
;
376 } else if (C64
<= 1000000000000000ull) {
377 // less than 16 digits in result
379 (SINT64
) saved_ca
*(SINT64
) power10_table_128
[scale_k
+
384 (saved_cb
<< 3) + (saved_cb
<< 1) + 100000000000000000ull +
385 round_const_table
[rmode
][extra_digits
];
387 // get P*(2^M[extra_digits])/10^extra_digits
388 __mul_64x64_to_128 (CT_new
, coefficient_b
,
389 reciprocals10_64
[extra_digits
]);
391 // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
392 amount
= short_recip_scale
[extra_digits
];
393 C0_64
= CT_new
.w
[1] >> amount
;
395 // result coefficient
396 C64_new
= C0_64
+ coefficient_a
;
397 if (C64_new
< 10000000000000000ull) {
399 #ifdef SET_STATUS_FLAGS
410 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
411 #ifndef IEEE_ROUND_NEAREST
412 if (rmode
== 0) //ROUNDING_TO_NEAREST
415 // check whether fractional part of initial_P/10^extra_digits
417 // this is the same as fractional part of
418 // (initial_P + 0.5*10^extra_digits)/10^extra_digits is exactly zero
421 remainder_h
= CT
.w
[1] << (64 - amount
);
423 // test whether fractional part is 0
424 if (!remainder_h
&& (CT
.w
[0] < reciprocals10_64
[extra_digits
])) {
430 #ifdef SET_STATUS_FLAGS
431 status
= INEXACT_EXCEPTION
;
434 remainder_h
= CT
.w
[1] << (64 - amount
);
437 case ROUNDING_TO_NEAREST
:
438 case ROUNDING_TIES_AWAY
:
439 // test whether fractional part is 0
440 if ((remainder_h
== 0x8000000000000000ull
)
441 && (CT
.w
[0] < reciprocals10_64
[extra_digits
]))
442 status
= EXACT_STATUS
;
445 case ROUNDING_TO_ZERO
:
446 if (!remainder_h
&& (CT
.w
[0] < reciprocals10_64
[extra_digits
]))
447 status
= EXACT_STATUS
;
451 __add_carry_out (tmp
, carry
, CT
.w
[0],
452 reciprocals10_64
[extra_digits
]);
453 if ((remainder_h
>> (64 - amount
)) + carry
>=
454 (((UINT64
) 1) << amount
))
455 status
= EXACT_STATUS
;
458 __set_status_flags (fpsc
, status
);
462 return get_BID64 (sign_s
, exponent_b
+ extra_digits
, C64
,
463 rounding_mode
, fpsc
);
467 ///////////////////////////////////////////////////////////////////
468 // round 128-bit coefficient and return result in BID64 format
469 // do not worry about midpoint cases
470 //////////////////////////////////////////////////////////////////
472 __bid_simple_round64_sticky (UINT64 sign
, int exponent
, UINT128 P
,
473 int extra_digits
, int rounding_mode
,
475 UINT128 Q_high
, Q_low
, C128
;
479 rmode
= rounding_mode
;
480 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
481 #ifndef IEEE_ROUND_NEAREST
482 if (sign
&& (unsigned) (rmode
- 1) < 2)
486 __add_128_64 (P
, P
, round_const_table
[rmode
][extra_digits
]);
488 // get P*(2^M[extra_digits])/10^extra_digits
489 __mul_128x128_full (Q_high
, Q_low
, P
,
490 reciprocals10_128
[extra_digits
]);
492 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
493 amount
= recip_scale
[extra_digits
];
494 __shr_128 (C128
, Q_high
, amount
);
496 C64
= __low_64 (C128
);
498 #ifdef SET_STATUS_FLAGS
500 __set_status_flags (fpsc
, INEXACT_EXCEPTION
);
504 return get_BID64 (sign
, exponent
, C64
, rounding_mode
, fpsc
);
507 ///////////////////////////////////////////////////////////////////
508 // round 128-bit coefficient and return result in BID64 format
509 ///////////////////////////////////////////////////////////////////
511 __bid_full_round64 (UINT64 sign
, int exponent
, UINT128 P
,
512 int extra_digits
, int rounding_mode
,
514 UINT128 Q_high
, Q_low
, C128
, Stemp
, PU
;
515 UINT64 remainder_h
, C64
, carry
, CY
;
516 int amount
, amount2
, rmode
, status
= 0;
519 if (exponent
>= -16 && (extra_digits
+ exponent
< 0)) {
520 extra_digits
= -exponent
;
521 #ifdef SET_STATUS_FLAGS
522 if (extra_digits
> 0) {
523 rmode
= rounding_mode
;
524 if (sign
&& (unsigned) (rmode
- 1) < 2)
526 __add_128_128 (PU
, P
,
527 round_const_table_128
[rmode
][extra_digits
]);
528 if (__unsigned_compare_gt_128
529 (power10_table_128
[extra_digits
+ 15], PU
))
530 status
= UNDERFLOW_EXCEPTION
;
536 if (extra_digits
> 0) {
537 exponent
+= extra_digits
;
538 rmode
= rounding_mode
;
539 if (sign
&& (unsigned) (rmode
- 1) < 2)
541 __add_128_128 (P
, P
, round_const_table_128
[rmode
][extra_digits
]);
543 // get P*(2^M[extra_digits])/10^extra_digits
544 __mul_128x128_full (Q_high
, Q_low
, P
,
545 reciprocals10_128
[extra_digits
]);
547 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
548 amount
= recip_scale
[extra_digits
];
549 __shr_128_long (C128
, Q_high
, amount
);
551 C64
= __low_64 (C128
);
553 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
554 #ifndef IEEE_ROUND_NEAREST
555 if (rmode
== 0) //ROUNDING_TO_NEAREST
558 // check whether fractional part of initial_P/10^extra_digits
562 amount2
= 64 - amount
;
565 remainder_h
>>= amount2
;
566 remainder_h
= remainder_h
& Q_high
.w
[0];
569 && (Q_low
.w
[1] < reciprocals10_128
[extra_digits
].w
[1]
570 || (Q_low
.w
[1] == reciprocals10_128
[extra_digits
].w
[1]
572 reciprocals10_128
[extra_digits
].w
[0]))) {
578 #ifdef SET_STATUS_FLAGS
579 status
|= INEXACT_EXCEPTION
;
582 remainder_h
= Q_high
.w
[0] << (64 - amount
);
585 case ROUNDING_TO_NEAREST
:
586 case ROUNDING_TIES_AWAY
:
587 // test whether fractional part is 0
588 if (remainder_h
== 0x8000000000000000ull
589 && (Q_low
.w
[1] < reciprocals10_128
[extra_digits
].w
[1]
590 || (Q_low
.w
[1] == reciprocals10_128
[extra_digits
].w
[1]
592 reciprocals10_128
[extra_digits
].w
[0])))
593 status
= EXACT_STATUS
;
596 case ROUNDING_TO_ZERO
:
598 && (Q_low
.w
[1] < reciprocals10_128
[extra_digits
].w
[1]
599 || (Q_low
.w
[1] == reciprocals10_128
[extra_digits
].w
[1]
601 reciprocals10_128
[extra_digits
].w
[0])))
602 status
= EXACT_STATUS
;
606 __add_carry_out (Stemp
.w
[0], CY
, Q_low
.w
[0],
607 reciprocals10_128
[extra_digits
].w
[0]);
608 __add_carry_in_out (Stemp
.w
[1], carry
, Q_low
.w
[1],
609 reciprocals10_128
[extra_digits
].w
[1], CY
);
610 if ((remainder_h
>> (64 - amount
)) + carry
>=
611 (((UINT64
) 1) << amount
))
612 status
= EXACT_STATUS
;
615 __set_status_flags (fpsc
, status
);
622 if (rounding_mode
== ROUNDING_DOWN
)
623 sign
= 0x8000000000000000ull
;
626 return get_BID64 (sign
, exponent
, C64
, rounding_mode
, fpsc
);
629 /////////////////////////////////////////////////////////////////////////////////
630 // round 192-bit coefficient (P, remainder_P) and return result in BID64 format
631 // the lowest 64 bits (remainder_P) are used for midpoint checking only
632 ////////////////////////////////////////////////////////////////////////////////
634 __bid_full_round64_remainder (UINT64 sign
, int exponent
, UINT128 P
,
635 int extra_digits
, UINT64 remainder_P
,
636 int rounding_mode
, unsigned *fpsc
,
637 unsigned uf_status
) {
638 UINT128 Q_high
, Q_low
, C128
, Stemp
;
639 UINT64 remainder_h
, C64
, carry
, CY
;
640 int amount
, amount2
, rmode
, status
= uf_status
;
642 rmode
= rounding_mode
;
643 if (sign
&& (unsigned) (rmode
- 1) < 2)
645 if (rmode
== ROUNDING_UP
&& remainder_P
) {
652 __add_128_64 (P
, P
, round_const_table
[rmode
][extra_digits
]);
654 // get P*(2^M[extra_digits])/10^extra_digits
655 __mul_128x128_full (Q_high
, Q_low
, P
,
656 reciprocals10_128
[extra_digits
]);
658 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
659 amount
= recip_scale
[extra_digits
];
660 __shr_128 (C128
, Q_high
, amount
);
662 C64
= __low_64 (C128
);
664 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
665 #ifndef IEEE_ROUND_NEAREST
666 if (rmode
== 0) //ROUNDING_TO_NEAREST
668 if (!remainder_P
&& (C64
& 1)) {
669 // check whether fractional part of initial_P/10^extra_digits
673 amount2
= 64 - amount
;
676 remainder_h
>>= amount2
;
677 remainder_h
= remainder_h
& Q_high
.w
[0];
680 && (Q_low
.w
[1] < reciprocals10_128
[extra_digits
].w
[1]
681 || (Q_low
.w
[1] == reciprocals10_128
[extra_digits
].w
[1]
683 reciprocals10_128
[extra_digits
].w
[0]))) {
689 #ifdef SET_STATUS_FLAGS
690 status
|= INEXACT_EXCEPTION
;
694 remainder_h
= Q_high
.w
[0] << (64 - amount
);
697 case ROUNDING_TO_NEAREST
:
698 case ROUNDING_TIES_AWAY
:
699 // test whether fractional part is 0
700 if (remainder_h
== 0x8000000000000000ull
701 && (Q_low
.w
[1] < reciprocals10_128
[extra_digits
].w
[1]
702 || (Q_low
.w
[1] == reciprocals10_128
[extra_digits
].w
[1]
704 reciprocals10_128
[extra_digits
].w
[0])))
705 status
= EXACT_STATUS
;
708 case ROUNDING_TO_ZERO
:
710 && (Q_low
.w
[1] < reciprocals10_128
[extra_digits
].w
[1]
711 || (Q_low
.w
[1] == reciprocals10_128
[extra_digits
].w
[1]
713 reciprocals10_128
[extra_digits
].w
[0])))
714 status
= EXACT_STATUS
;
718 __add_carry_out (Stemp
.w
[0], CY
, Q_low
.w
[0],
719 reciprocals10_128
[extra_digits
].w
[0]);
720 __add_carry_in_out (Stemp
.w
[1], carry
, Q_low
.w
[1],
721 reciprocals10_128
[extra_digits
].w
[1], CY
);
722 if ((remainder_h
>> (64 - amount
)) + carry
>=
723 (((UINT64
) 1) << amount
))
724 status
= EXACT_STATUS
;
727 __set_status_flags (fpsc
, status
);
732 #ifdef SET_STATUS_FLAGS
734 __set_status_flags (fpsc
, uf_status
| INEXACT_EXCEPTION
);
739 return get_BID64 (sign
, exponent
+ extra_digits
, C64
, rounding_mode
,
744 ///////////////////////////////////////////////////////////////////
745 // get P/10^extra_digits
746 // result fits in 64 bits
747 ///////////////////////////////////////////////////////////////////
748 __BID_INLINE__ UINT64
749 __truncate (UINT128 P
, int extra_digits
)
750 // extra_digits <= 16
752 UINT128 Q_high
, Q_low
, C128
;
756 // get P*(2^M[extra_digits])/10^extra_digits
757 __mul_128x128_full (Q_high
, Q_low
, P
,
758 reciprocals10_128
[extra_digits
]);
760 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
761 amount
= recip_scale
[extra_digits
];
762 __shr_128 (C128
, Q_high
, amount
);
764 C64
= __low_64 (C128
);
770 ///////////////////////////////////////////////////////////////////
771 // return number of decimal digits in 128-bit value X
772 ///////////////////////////////////////////////////////////////////
774 __get_dec_digits64 (UINT128 X
) {
776 int digits_x
, bin_expon_cx
;
779 //--- get number of bits in the coefficients of x and y ---
780 tempx
.d
= (double) X
.w
[0];
781 bin_expon_cx
= ((tempx
.i
& MASK_BINARY_EXPONENT
) >> 52) - 0x3ff;
782 // get number of decimal digits in the coeff_x
783 digits_x
= estimate_decimal_digits
[bin_expon_cx
];
784 if (X
.w
[0] >= power10_table_128
[digits_x
].w
[0])
788 tempx
.d
= (double) X
.w
[1];
789 bin_expon_cx
= ((tempx
.i
& MASK_BINARY_EXPONENT
) >> 52) - 0x3ff;
790 // get number of decimal digits in the coeff_x
791 digits_x
= estimate_decimal_digits
[bin_expon_cx
+ 64];
792 if (__unsigned_compare_ge_128 (X
, power10_table_128
[digits_x
]))
799 ////////////////////////////////////////////////////////////////////////////////
801 // add 64-bit coefficient to 128-bit coefficient, return result in BID64 format
803 ////////////////////////////////////////////////////////////////////////////////
804 __BID_INLINE__ UINT64
805 get_add128 (UINT64 sign_x
, int exponent_x
, UINT64 coefficient_x
,
806 UINT64 sign_y
, int final_exponent_y
, UINT128 CY
,
807 int extra_digits
, int rounding_mode
, unsigned *fpsc
) {
808 UINT128 CY_L
, CX
, FS
, F
, CT
, ST
, T2
;
809 UINT64 CYh
, CY0L
, T
, S
, coefficient_y
, remainder_y
;
812 int diff_dec_expon
, extra_digits2
, exponent_y
, status
;
813 int extra_dx
, diff_dec2
, bin_expon_cx
, digits_x
, rmode
;
815 // CY has more than 16 decimal digits
817 exponent_y
= final_exponent_y
- extra_digits
;
819 #ifdef IEEE_ROUND_NEAREST_TIES_AWAY
822 #ifdef IEEE_ROUND_NEAREST
826 if (exponent_x
> exponent_y
) {
828 //--- get number of bits in the coefficients of x and y ---
829 tempx
.d
= (double) coefficient_x
;
830 bin_expon_cx
= ((tempx
.i
& MASK_BINARY_EXPONENT
) >> 52) - 0x3ff;
831 // get number of decimal digits in the coeff_x
832 digits_x
= estimate_decimal_digits
[bin_expon_cx
];
833 if (coefficient_x
>= power10_table_128
[digits_x
].w
[0])
836 extra_dx
= 16 - digits_x
;
837 coefficient_x
*= power10_table_128
[extra_dx
].w
[0];
838 if ((sign_x
^ sign_y
) && (coefficient_x
== 1000000000000000ull)) {
840 coefficient_x
= 10000000000000000ull;
842 exponent_x
-= extra_dx
;
844 if (exponent_x
> exponent_y
) {
846 // exponent_x > exponent_y
847 diff_dec_expon
= exponent_x
- exponent_y
;
849 if (exponent_x
<= final_exponent_y
+ 1) {
850 __mul_64x64_to_128 (CX
, coefficient_x
,
851 power10_table_128
[diff_dec_expon
].w
[0]);
853 if (sign_x
== sign_y
) {
854 __add_128_128 (CT
, CY
, CX
);
856 final_exponent_y
) /*&& (final_exponent_y>0) */ )
858 if (__unsigned_compare_ge_128
859 (CT
, power10_table_128
[16 + extra_digits
]))
862 __sub_128_128 (CT
, CY
, CX
);
863 if (((SINT64
) CT
.w
[1]) < 0) {
864 CT
.w
[0] = 0 - CT
.w
[0];
865 CT
.w
[1] = 0 - CT
.w
[1];
869 } else if (!(CT
.w
[1] | CT
.w
[0])) {
872 ROUNDING_DOWN
) ? 0 : 0x8000000000000000ull
;
874 if ((exponent_x
+ 1 >=
875 final_exponent_y
) /*&& (final_exponent_y>=0) */ ) {
876 extra_digits
= __get_dec_digits64 (CT
) - 16;
877 if (extra_digits
<= 0) {
878 if (!CT
.w
[0] && rounding_mode
== ROUNDING_DOWN
)
879 sign_y
= 0x8000000000000000ull
;
880 return get_BID64 (sign_y
, exponent_y
, CT
.w
[0],
881 rounding_mode
, fpsc
);
884 if (__unsigned_compare_gt_128
885 (power10_table_128
[15 + extra_digits
], CT
))
889 return __bid_full_round64 (sign_y
, exponent_y
, CT
, extra_digits
,
890 rounding_mode
, fpsc
);
892 // diff_dec2+extra_digits is the number of digits to eliminate from
894 diff_dec2
= exponent_x
- final_exponent_y
;
896 if (diff_dec2
>= 17) {
897 #ifndef IEEE_ROUND_NEAREST
898 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
899 if ((rounding_mode
) & 3) {
900 switch (rounding_mode
) {
903 D
= ((SINT64
) (sign_x
^ sign_y
)) >> 63;
910 D
= ((SINT64
) (sign_x
^ sign_y
)) >> 63;
915 case ROUNDING_TO_ZERO
:
916 if (sign_y
!= sign_x
) {
921 default: break; // default added to avoid compiler warning
923 if (coefficient_x
< 1000000000000000ull) {
926 D
+ (coefficient_x
<< 1) + (coefficient_x
<< 3);
932 #ifdef SET_STATUS_FLAGS
933 if (CY
.w
[1] | CY
.w
[0])
934 __set_status_flags (fpsc
, INEXACT_EXCEPTION
);
936 return get_BID64 (sign_x
, exponent_x
, coefficient_x
,
937 rounding_mode
, fpsc
);
939 // here exponent_x <= 16+final_exponent_y
941 // truncate CY to 16 dec. digits
942 CYh
= __truncate (CY
, extra_digits
);
945 T
= power10_table_128
[extra_digits
].w
[0];
946 __mul_64x64_to_64 (CY0L
, CYh
, T
);
948 remainder_y
= CY
.w
[0] - CY0L
;
950 // align coeff_x, CYh
951 __mul_64x64_to_128 (CX
, coefficient_x
,
952 power10_table_128
[diff_dec2
].w
[0]);
954 if (sign_x
== sign_y
) {
955 __add_128_64 (CT
, CX
, CYh
);
956 if (__unsigned_compare_ge_128
957 (CT
, power10_table_128
[16 + diff_dec2
]))
962 __sub_128_64 (CT
, CX
, CYh
);
963 if (__unsigned_compare_gt_128
964 (power10_table_128
[15 + diff_dec2
], CT
))
968 return __bid_full_round64_remainder (sign_x
, final_exponent_y
, CT
,
969 diff_dec2
, remainder_y
,
970 rounding_mode
, fpsc
, 0);
973 // Here (exponent_x <= exponent_y)
975 diff_dec_expon
= exponent_y
- exponent_x
;
977 if (diff_dec_expon
> MAX_FORMAT_DIGITS
) {
978 rmode
= rounding_mode
;
980 if ((sign_x
^ sign_y
)) {
984 if (__unsigned_compare_gt_128
985 (power10_table_128
[15 + extra_digits
], CY
)) {
990 CY
.w
[0] = 1000000000000000ull;
996 __scale128_10 (CY
, CY
);
1000 return __bid_simple_round64_sticky (sign_y
, final_exponent_y
, CY
,
1001 extra_digits
, rmode
, fpsc
);
1003 // apply sign to coeff_x
1005 sign_x
= ((SINT64
) sign_x
) >> 63;
1006 CX
.w
[0] = (coefficient_x
+ sign_x
) ^ sign_x
;
1009 // check whether CY (rounded to 16 digits) and CX have
1010 // any digits in the same position
1011 diff_dec2
= final_exponent_y
- exponent_x
;
1013 if (diff_dec2
<= 17) {
1014 // align CY to 10^ex
1015 S
= power10_table_128
[diff_dec_expon
].w
[0];
1016 __mul_64x128_short (CY_L
, S
, CY
);
1018 __add_128_128 (ST
, CY_L
, CX
);
1019 extra_digits2
= __get_dec_digits64 (ST
) - 16;
1020 return __bid_full_round64 (sign_y
, exponent_x
, ST
, extra_digits2
,
1021 rounding_mode
, fpsc
);
1023 // truncate CY to 16 dec. digits
1024 CYh
= __truncate (CY
, extra_digits
);
1027 T
= power10_table_128
[extra_digits
].w
[0];
1028 __mul_64x64_to_64 (CY0L
, CYh
, T
);
1030 coefficient_y
= CY
.w
[0] - CY0L
;
1031 // add rounding constant
1032 rmode
= rounding_mode
;
1033 if (sign_y
&& (unsigned) (rmode
- 1) < 2)
1035 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1036 #ifndef IEEE_ROUND_NEAREST
1037 if (!(rmode
& 3)) //ROUNDING_TO_NEAREST
1041 coefficient_y
+= round_const_table
[rmode
][extra_digits
];
1043 // align coefficient_y, coefficient_x
1044 S
= power10_table_128
[diff_dec_expon
].w
[0];
1045 __mul_64x64_to_128 (F
, coefficient_y
, S
);
1048 __add_128_128 (FS
, F
, CX
);
1050 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1051 #ifndef IEEE_ROUND_NEAREST
1052 if (rmode
== 0) //ROUNDING_TO_NEAREST
1055 // rounding code, here RN_EVEN
1056 // 10^(extra_digits+diff_dec_expon)
1057 T2
= power10_table_128
[diff_dec_expon
+ extra_digits
];
1058 if (__unsigned_compare_gt_128 (FS
, T2
)
1059 || ((CYh
& 1) && __test_equal_128 (FS
, T2
))) {
1061 __sub_128_128 (FS
, FS
, T2
);
1065 #ifndef IEEE_ROUND_NEAREST
1066 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1067 if (rmode
== 4) //ROUNDING_TO_NEAREST
1070 // rounding code, here RN_AWAY
1071 // 10^(extra_digits+diff_dec_expon)
1072 T2
= power10_table_128
[diff_dec_expon
+ extra_digits
];
1073 if (__unsigned_compare_ge_128 (FS
, T2
)) {
1075 __sub_128_128 (FS
, FS
, T2
);
1079 #ifndef IEEE_ROUND_NEAREST
1080 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1083 case ROUNDING_TO_ZERO
:
1084 if ((SINT64
) FS
.w
[1] < 0) {
1086 if (CYh
< 1000000000000000ull) {
1087 CYh
= 9999999999999999ull;
1091 T2
= power10_table_128
[diff_dec_expon
+ extra_digits
];
1092 if (__unsigned_compare_ge_128 (FS
, T2
)) {
1094 __sub_128_128 (FS
, FS
, T2
);
1099 if ((SINT64
) FS
.w
[1] < 0)
1101 T2
= power10_table_128
[diff_dec_expon
+ extra_digits
];
1102 if (__unsigned_compare_gt_128 (FS
, T2
)) {
1104 __sub_128_128 (FS
, FS
, T2
);
1105 } else if ((FS
.w
[1] == T2
.w
[1]) && (FS
.w
[0] == T2
.w
[0])) {
1107 FS
.w
[1] = FS
.w
[0] = 0;
1108 } else if (FS
.w
[1] | FS
.w
[0])
1111 default: break; // default added to avoid compiler warning
1116 #ifdef SET_STATUS_FLAGS
1117 status
= INEXACT_EXCEPTION
;
1118 #ifndef IEEE_ROUND_NEAREST
1119 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1126 round_const_table_128
[0][diff_dec_expon
+ extra_digits
].w
[1])
1128 round_const_table_128
[0][diff_dec_expon
+
1129 extra_digits
].w
[0]))
1130 status
= EXACT_STATUS
;
1132 #ifndef IEEE_ROUND_NEAREST
1133 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1134 else if (!FS
.w
[1] && !FS
.w
[0])
1135 status
= EXACT_STATUS
;
1139 __set_status_flags (fpsc
, status
);
1142 return get_BID64 (sign_y
, final_exponent_y
, CYh
, rounding_mode
,
1148 //////////////////////////////////////////////////////////////////////////
1150 // If coefficient_z is less than 16 digits long, normalize to 16 digits
1152 /////////////////////////////////////////////////////////////////////////
1154 BID_normalize (UINT64 sign_z
, int exponent_z
,
1155 UINT64 coefficient_z
, UINT64 round_dir
, int round_flag
,
1156 int rounding_mode
, unsigned *fpsc
) {
1159 int digits_z
, bin_expon
, scale
, rmode
;
1161 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1162 #ifndef IEEE_ROUND_NEAREST
1163 rmode
= rounding_mode
;
1164 if (sign_z
&& (unsigned) (rmode
- 1) < 2)
1167 if (coefficient_z
>= power10_table_128
[15].w
[0])
1172 //--- get number of bits in the coefficients of x and y ---
1173 tempx
.d
= (double) coefficient_z
;
1174 bin_expon
= ((tempx
.i
& MASK_BINARY_EXPONENT
) >> 52) - 0x3ff;
1175 // get number of decimal digits in the coeff_x
1176 digits_z
= estimate_decimal_digits
[bin_expon
];
1177 if (coefficient_z
>= power10_table_128
[digits_z
].w
[0])
1180 scale
= 16 - digits_z
;
1181 exponent_z
-= scale
;
1182 if (exponent_z
< 0) {
1183 scale
+= exponent_z
;
1186 coefficient_z
*= power10_table_128
[scale
].w
[0];
1188 #ifdef SET_STATUS_FLAGS
1190 __set_status_flags (fpsc
, INEXACT_EXCEPTION
);
1191 if (coefficient_z
< 1000000000000000ull)
1192 __set_status_flags (fpsc
, UNDERFLOW_EXCEPTION
);
1193 else if ((coefficient_z
== 1000000000000000ull) && !exponent_z
1194 && ((SINT64
) (round_dir
^ sign_z
) < 0) && round_flag
1195 && (rmode
== ROUNDING_DOWN
|| rmode
== ROUNDING_TO_ZERO
))
1196 __set_status_flags (fpsc
, UNDERFLOW_EXCEPTION
);
1200 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1201 #ifndef IEEE_ROUND_NEAREST
1202 if (round_flag
&& (rmode
& 3)) {
1203 D
= round_dir
^ sign_z
;
1205 if (rmode
== ROUNDING_UP
) {
1211 if (coefficient_z
< 1000000000000000ull && exponent_z
) {
1212 coefficient_z
= 9999999999999999ull;
1220 return get_BID64 (sign_z
, exponent_z
, coefficient_z
, rounding_mode
,
1225 //////////////////////////////////////////////////////////////////////////
1227 // 0*10^ey + cz*10^ez, ey<ez
1229 //////////////////////////////////////////////////////////////////////////
1231 __BID_INLINE__ UINT64
1232 add_zero64 (int exponent_y
, UINT64 sign_z
, int exponent_z
,
1233 UINT64 coefficient_z
, unsigned *prounding_mode
,
1236 int bin_expon
, scale_k
, scale_cz
;
1239 diff_expon
= exponent_z
- exponent_y
;
1241 tempx
.d
= (double) coefficient_z
;
1242 bin_expon
= ((tempx
.i
& MASK_BINARY_EXPONENT
) >> 52) - 0x3ff;
1243 scale_cz
= estimate_decimal_digits
[bin_expon
];
1244 if (coefficient_z
>= power10_table_128
[scale_cz
].w
[0])
1247 scale_k
= 16 - scale_cz
;
1248 if (diff_expon
< scale_k
)
1249 scale_k
= diff_expon
;
1250 coefficient_z
*= power10_table_128
[scale_k
].w
[0];
1252 return get_BID64 (sign_z
, exponent_z
- scale_k
, coefficient_z
,
1253 *prounding_mode
, fpsc
);