1 /* Copyright (C) 2007 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 2, or (at your option) any later
10 In addition to the permissions in the GNU General Public License, the
11 Free Software Foundation gives you unlimited permission to link the
12 compiled version of this file into combinations with other programs,
13 and to distribute those combinations without any restriction coming
14 from the use of this file. (The General Public License restrictions
15 do apply in other respects; for example, they cover modification of
16 the file, and distribution when not linked into a combine
19 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
20 WARRANTY; without even the implied warranty of MERCHANTABILITY or
21 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
24 You should have received a copy of the GNU General Public License
25 along with GCC; see the file COPYING. If not, write to the Free
26 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
29 /*****************************************************************************
31 * Helper add functions (for fma)
33 * __BID_INLINE__ UINT64 get_add64(
34 * UINT64 sign_x, int exponent_x, UINT64 coefficient_x,
35 * UINT64 sign_y, int exponent_y, UINT64 coefficient_y,
38 * __BID_INLINE__ UINT64 get_add128(
39 * UINT64 sign_x, int exponent_x, UINT64 coefficient_x,
40 * UINT64 sign_y, int final_exponent_y, UINT128 CY,
41 * int extra_digits, int rounding_mode)
43 *****************************************************************************
45 * Algorithm description:
47 * get_add64: same as BID64 add, but arguments are unpacked and there
48 * are no special case checks
50 * get_add128: add 64-bit coefficient to 128-bit product (which contains
51 * 16+extra_digits decimal digits),
53 * - the exponents are compared and the two coefficients are
54 * properly aligned for addition/subtraction
55 * - multiple paths are needed
56 * - final result exponent is calculated and the lower term is
57 * rounded first if necessary, to avoid manipulating
58 * coefficients longer than 128 bits
60 ****************************************************************************/
62 #ifndef _INLINE_BID_ADD_H_
63 #define _INLINE_BID_ADD_H_
65 #include "bid_internal.h"
67 #define MAX_FORMAT_DIGITS 16
68 #define DECIMAL_EXPONENT_BIAS 398
69 #define MASK_BINARY_EXPONENT 0x7ff0000000000000ull
70 #define BINARY_EXPONENT_BIAS 0x3ff
71 #define UPPER_EXPON_LIMIT 51
73 ///////////////////////////////////////////////////////////////////////
75 // get_add64() is essentially the same as bid_add(), except that
76 // the arguments are unpacked
78 //////////////////////////////////////////////////////////////////////
80 get_add64 (UINT64 sign_x
, int exponent_x
, UINT64 coefficient_x
,
81 UINT64 sign_y
, int exponent_y
, UINT64 coefficient_y
,
82 int rounding_mode
, unsigned *fpsc
) {
83 UINT128 CA
, CT
, CT_new
;
84 UINT64 sign_a
, sign_b
, coefficient_a
, coefficient_b
, sign_s
, sign_ab
,
86 UINT64 saved_ca
, saved_cb
, C0_64
, C64
, remainder_h
, T1
, carry
, tmp
,
89 int exponent_a
, exponent_b
, diff_dec_expon
;
90 int bin_expon_ca
, extra_digits
, amount
, scale_k
, scale_ca
;
91 unsigned rmode
, status
;
93 // sort arguments by exponent
94 if (exponent_x
<= exponent_y
) {
96 exponent_a
= exponent_y
;
97 coefficient_a
= coefficient_y
;
99 exponent_b
= exponent_x
;
100 coefficient_b
= coefficient_x
;
103 exponent_a
= exponent_x
;
104 coefficient_a
= coefficient_x
;
106 exponent_b
= exponent_y
;
107 coefficient_b
= coefficient_y
;
110 // exponent difference
111 diff_dec_expon
= exponent_a
- exponent_b
;
113 /* get binary coefficients of x and y */
115 //--- get number of bits in the coefficients of x and y ---
117 tempx
.d
= (double) coefficient_a
;
118 bin_expon_ca
= ((tempx
.i
& MASK_BINARY_EXPONENT
) >> 52) - 0x3ff;
120 if (!coefficient_a
) {
121 return get_BID64 (sign_b
, exponent_b
, coefficient_b
, rounding_mode
,
124 if (diff_dec_expon
> MAX_FORMAT_DIGITS
) {
125 // normalize a to a 16-digit coefficient
127 scale_ca
= estimate_decimal_digits
[bin_expon_ca
];
128 if (coefficient_a
>= power10_table_128
[scale_ca
].w
[0])
131 scale_k
= 16 - scale_ca
;
133 coefficient_a
*= power10_table_128
[scale_k
].w
[0];
135 diff_dec_expon
-= scale_k
;
136 exponent_a
-= scale_k
;
138 /* get binary coefficients of x and y */
140 //--- get number of bits in the coefficients of x and y ---
141 tempx
.d
= (double) coefficient_a
;
142 bin_expon_ca
= ((tempx
.i
& MASK_BINARY_EXPONENT
) >> 52) - 0x3ff;
144 if (diff_dec_expon
> MAX_FORMAT_DIGITS
) {
145 #ifdef SET_STATUS_FLAGS
147 __set_status_flags (fpsc
, INEXACT_EXCEPTION
);
151 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
152 #ifndef IEEE_ROUND_NEAREST
153 if (((rounding_mode
) & 3) && coefficient_b
) // not ROUNDING_TO_NEAREST
155 switch (rounding_mode
) {
158 coefficient_a
-= ((((SINT64
) sign_a
) >> 63) | 1);
159 if (coefficient_a
< 1000000000000000ull) {
161 coefficient_a
= 9999999999999999ull;
162 } else if (coefficient_a
>= 10000000000000000ull) {
164 coefficient_a
= 1000000000000000ull;
170 coefficient_a
+= ((((SINT64
) sign_a
) >> 63) | 1);
171 if (coefficient_a
< 1000000000000000ull) {
173 coefficient_a
= 9999999999999999ull;
174 } else if (coefficient_a
>= 10000000000000000ull) {
176 coefficient_a
= 1000000000000000ull;
181 if (sign_a
!= sign_b
) {
183 if (coefficient_a
< 1000000000000000ull) {
185 coefficient_a
= 9999999999999999ull;
193 // check special case here
194 if ((coefficient_a
== 1000000000000000ull)
195 && (diff_dec_expon
== MAX_FORMAT_DIGITS
+ 1)
197 && (coefficient_b
> 5000000000000000ull)) {
198 coefficient_a
= 9999999999999999ull;
202 return get_BID64 (sign_a
, exponent_a
, coefficient_a
,
203 rounding_mode
, fpsc
);
206 // test whether coefficient_a*10^(exponent_a-exponent_b) may exceed 2^62
207 if (bin_expon_ca
+ estimate_bin_expon
[diff_dec_expon
] < 60) {
208 // coefficient_a*10^(exponent_a-exponent_b)<2^63
210 // multiply by 10^(exponent_a-exponent_b)
211 coefficient_a
*= power10_table_128
[diff_dec_expon
].w
[0];
214 sign_b
= ((SINT64
) sign_b
) >> 63;
215 // apply sign to coeff. of b
216 coefficient_b
= (coefficient_b
+ sign_b
) ^ sign_b
;
218 // apply sign to coefficient a
219 sign_a
= ((SINT64
) sign_a
) >> 63;
220 coefficient_a
= (coefficient_a
+ sign_a
) ^ sign_a
;
222 coefficient_a
+= coefficient_b
;
224 sign_s
= ((SINT64
) coefficient_a
) >> 63;
225 coefficient_a
= (coefficient_a
+ sign_s
) ^ sign_s
;
226 sign_s
&= 0x8000000000000000ull
;
228 // coefficient_a < 10^16 ?
229 if (coefficient_a
< power10_table_128
[MAX_FORMAT_DIGITS
].w
[0]) {
230 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
231 #ifndef IEEE_ROUND_NEAREST
232 if (rounding_mode
== ROUNDING_DOWN
&& (!coefficient_a
)
234 sign_s
= 0x8000000000000000ull
;
237 return get_BID64 (sign_s
, exponent_b
, coefficient_a
,
238 rounding_mode
, fpsc
);
240 // otherwise rounding is necessary
242 // already know coefficient_a<10^19
243 // coefficient_a < 10^17 ?
244 if (coefficient_a
< power10_table_128
[17].w
[0])
246 else if (coefficient_a
< power10_table_128
[18].w
[0])
251 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
252 #ifndef IEEE_ROUND_NEAREST
253 rmode
= rounding_mode
;
254 if (sign_s
&& (unsigned) (rmode
- 1) < 2)
262 coefficient_a
+= round_const_table
[rmode
][extra_digits
];
264 // get P*(2^M[extra_digits])/10^extra_digits
265 __mul_64x64_to_128 (CT
, coefficient_a
,
266 reciprocals10_64
[extra_digits
]);
268 // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
269 amount
= short_recip_scale
[extra_digits
];
270 C64
= CT
.w
[1] >> amount
;
273 // coefficient_a*10^(exponent_a-exponent_b) is large
276 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
277 #ifndef IEEE_ROUND_NEAREST
278 rmode
= rounding_mode
;
279 if (sign_s
&& (unsigned) (rmode
- 1) < 2)
288 // check whether we can take faster path
289 scale_ca
= estimate_decimal_digits
[bin_expon_ca
];
291 sign_ab
= sign_a
^ sign_b
;
292 sign_ab
= ((SINT64
) sign_ab
) >> 63;
294 // T1 = 10^(16-diff_dec_expon)
295 T1
= power10_table_128
[16 - diff_dec_expon
].w
[0];
297 // get number of digits in coefficient_a
298 //P_ca = power10_table_128[scale_ca].w[0];
299 //P_ca_m1 = power10_table_128[scale_ca-1].w[0];
300 if (coefficient_a
>= power10_table_128
[scale_ca
].w
[0]) {
303 //P_ca = power10_table_128[scale_ca].w[0];
306 scale_k
= 16 - scale_ca
;
309 //Ts = (T1 + sign_ab) ^ sign_ab;
312 //X = coefficient_a + Ts - P_ca_m1;
315 saved_ca
= coefficient_a
- T1
;
317 (SINT64
) saved_ca
*(SINT64
) power10_table_128
[scale_k
].w
[0];
318 extra_digits
= diff_dec_expon
- scale_k
;
321 saved_cb
= (coefficient_b
+ sign_ab
) ^ sign_ab
;
322 // add 10^16 and rounding constant
324 saved_cb
+ 10000000000000000ull +
325 round_const_table
[rmode
][extra_digits
];
327 // get P*(2^M[extra_digits])/10^extra_digits
328 __mul_64x64_to_128 (CT
, coefficient_b
,
329 reciprocals10_64
[extra_digits
]);
331 // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
332 amount
= short_recip_scale
[extra_digits
];
333 C0_64
= CT
.w
[1] >> amount
;
335 // result coefficient
336 C64
= C0_64
+ coefficient_a
;
337 // filter out difficult (corner) cases
338 // the following test is equivalent to
339 // ( (initial_coefficient_a + Ts) < P_ca &&
340 // (initial_coefficient_a + Ts) > P_ca_m1 ),
341 // which ensures the number of digits in coefficient_a does not change
342 // after adding (the appropriately scaled and rounded) coefficient_b
343 if ((UINT64
) (C64
- 1000000000000000ull - 1) >
344 9000000000000000ull - 2) {
345 if (C64
>= 10000000000000000ull) {
346 // result has more than 16 digits
348 // must divide coeff_a by 10
349 saved_ca
= saved_ca
+ T1
;
350 __mul_64x64_to_128 (CA
, saved_ca
, 0x3333333333333334ull
);
351 //reciprocals10_64[1]);
352 coefficient_a
= CA
.w
[1] >> 1;
354 saved_ca
- (coefficient_a
<< 3) - (coefficient_a
<< 1);
355 coefficient_a
= coefficient_a
- T1
;
358 /*90000000000000000 */ +rem_a
*
359 power10_table_128
[diff_dec_expon
].w
[0];
362 (SINT64
) (saved_ca
- T1
-
363 (T1
<< 3)) * (SINT64
) power10_table_128
[scale_k
-
368 saved_cb
+ 100000000000000000ull +
369 round_const_table
[rmode
][extra_digits
];
371 // get P*(2^M[extra_digits])/10^extra_digits
372 __mul_64x64_to_128 (CT
, coefficient_b
,
373 reciprocals10_64
[extra_digits
]);
375 // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
376 amount
= short_recip_scale
[extra_digits
];
377 C0_64
= CT
.w
[1] >> amount
;
379 // result coefficient
380 C64
= C0_64
+ coefficient_a
;
381 } else if (C64
<= 1000000000000000ull) {
382 // less than 16 digits in result
384 (SINT64
) saved_ca
*(SINT64
) power10_table_128
[scale_k
+
389 (saved_cb
<< 3) + (saved_cb
<< 1) + 100000000000000000ull +
390 round_const_table
[rmode
][extra_digits
];
392 // get P*(2^M[extra_digits])/10^extra_digits
393 __mul_64x64_to_128 (CT_new
, coefficient_b
,
394 reciprocals10_64
[extra_digits
]);
396 // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
397 amount
= short_recip_scale
[extra_digits
];
398 C0_64
= CT_new
.w
[1] >> amount
;
400 // result coefficient
401 C64_new
= C0_64
+ coefficient_a
;
402 if (C64_new
< 10000000000000000ull) {
404 #ifdef SET_STATUS_FLAGS
415 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
416 #ifndef IEEE_ROUND_NEAREST
417 if (rmode
== 0) //ROUNDING_TO_NEAREST
420 // check whether fractional part of initial_P/10^extra_digits
422 // this is the same as fractional part of
423 // (initial_P + 0.5*10^extra_digits)/10^extra_digits is exactly zero
426 remainder_h
= CT
.w
[1] << (64 - amount
);
428 // test whether fractional part is 0
429 if (!remainder_h
&& (CT
.w
[0] < reciprocals10_64
[extra_digits
])) {
435 #ifdef SET_STATUS_FLAGS
436 status
= INEXACT_EXCEPTION
;
439 remainder_h
= CT
.w
[1] << (64 - amount
);
442 case ROUNDING_TO_NEAREST
:
443 case ROUNDING_TIES_AWAY
:
444 // test whether fractional part is 0
445 if ((remainder_h
== 0x8000000000000000ull
)
446 && (CT
.w
[0] < reciprocals10_64
[extra_digits
]))
447 status
= EXACT_STATUS
;
450 case ROUNDING_TO_ZERO
:
451 if (!remainder_h
&& (CT
.w
[0] < reciprocals10_64
[extra_digits
]))
452 status
= EXACT_STATUS
;
456 __add_carry_out (tmp
, carry
, CT
.w
[0],
457 reciprocals10_64
[extra_digits
]);
458 if ((remainder_h
>> (64 - amount
)) + carry
>=
459 (((UINT64
) 1) << amount
))
460 status
= EXACT_STATUS
;
463 __set_status_flags (fpsc
, status
);
467 return get_BID64 (sign_s
, exponent_b
+ extra_digits
, C64
,
468 rounding_mode
, fpsc
);
472 ///////////////////////////////////////////////////////////////////
473 // round 128-bit coefficient and return result in BID64 format
474 // do not worry about midpoint cases
475 //////////////////////////////////////////////////////////////////
477 __bid_simple_round64_sticky (UINT64 sign
, int exponent
, UINT128 P
,
478 int extra_digits
, int rounding_mode
,
480 UINT128 Q_high
, Q_low
, C128
;
484 rmode
= rounding_mode
;
485 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
486 #ifndef IEEE_ROUND_NEAREST
487 if (sign
&& (unsigned) (rmode
- 1) < 2)
491 __add_128_64 (P
, P
, round_const_table
[rmode
][extra_digits
]);
493 // get P*(2^M[extra_digits])/10^extra_digits
494 __mul_128x128_full (Q_high
, Q_low
, P
,
495 reciprocals10_128
[extra_digits
]);
497 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
498 amount
= recip_scale
[extra_digits
];
499 __shr_128 (C128
, Q_high
, amount
);
501 C64
= __low_64 (C128
);
503 #ifdef SET_STATUS_FLAGS
505 __set_status_flags (fpsc
, INEXACT_EXCEPTION
);
509 return get_BID64 (sign
, exponent
, C64
, rounding_mode
, fpsc
);
512 ///////////////////////////////////////////////////////////////////
513 // round 128-bit coefficient and return result in BID64 format
514 ///////////////////////////////////////////////////////////////////
516 __bid_full_round64 (UINT64 sign
, int exponent
, UINT128 P
,
517 int extra_digits
, int rounding_mode
,
519 UINT128 Q_high
, Q_low
, C128
, Stemp
, PU
;
520 UINT64 remainder_h
, C64
, carry
, CY
;
521 int amount
, amount2
, rmode
, status
= 0;
524 if (exponent
>= -16 && (extra_digits
+ exponent
< 0)) {
525 extra_digits
= -exponent
;
526 #ifdef SET_STATUS_FLAGS
527 if (extra_digits
> 0) {
528 rmode
= rounding_mode
;
529 if (sign
&& (unsigned) (rmode
- 1) < 2)
531 __add_128_128 (PU
, P
,
532 round_const_table_128
[rmode
][extra_digits
]);
533 if (__unsigned_compare_gt_128
534 (power10_table_128
[extra_digits
+ 15], PU
))
535 status
= UNDERFLOW_EXCEPTION
;
541 if (extra_digits
> 0) {
542 exponent
+= extra_digits
;
543 rmode
= rounding_mode
;
544 if (sign
&& (unsigned) (rmode
- 1) < 2)
546 __add_128_128 (P
, P
, round_const_table_128
[rmode
][extra_digits
]);
548 // get P*(2^M[extra_digits])/10^extra_digits
549 __mul_128x128_full (Q_high
, Q_low
, P
,
550 reciprocals10_128
[extra_digits
]);
552 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
553 amount
= recip_scale
[extra_digits
];
554 __shr_128_long (C128
, Q_high
, amount
);
556 C64
= __low_64 (C128
);
558 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
559 #ifndef IEEE_ROUND_NEAREST
560 if (rmode
== 0) //ROUNDING_TO_NEAREST
563 // check whether fractional part of initial_P/10^extra_digits
567 amount2
= 64 - amount
;
570 remainder_h
>>= amount2
;
571 remainder_h
= remainder_h
& Q_high
.w
[0];
574 && (Q_low
.w
[1] < reciprocals10_128
[extra_digits
].w
[1]
575 || (Q_low
.w
[1] == reciprocals10_128
[extra_digits
].w
[1]
577 reciprocals10_128
[extra_digits
].w
[0]))) {
583 #ifdef SET_STATUS_FLAGS
584 status
|= INEXACT_EXCEPTION
;
587 remainder_h
= Q_high
.w
[0] << (64 - amount
);
590 case ROUNDING_TO_NEAREST
:
591 case ROUNDING_TIES_AWAY
:
592 // test whether fractional part is 0
593 if (remainder_h
== 0x8000000000000000ull
594 && (Q_low
.w
[1] < reciprocals10_128
[extra_digits
].w
[1]
595 || (Q_low
.w
[1] == reciprocals10_128
[extra_digits
].w
[1]
597 reciprocals10_128
[extra_digits
].w
[0])))
598 status
= EXACT_STATUS
;
601 case ROUNDING_TO_ZERO
:
603 && (Q_low
.w
[1] < reciprocals10_128
[extra_digits
].w
[1]
604 || (Q_low
.w
[1] == reciprocals10_128
[extra_digits
].w
[1]
606 reciprocals10_128
[extra_digits
].w
[0])))
607 status
= EXACT_STATUS
;
611 __add_carry_out (Stemp
.w
[0], CY
, Q_low
.w
[0],
612 reciprocals10_128
[extra_digits
].w
[0]);
613 __add_carry_in_out (Stemp
.w
[1], carry
, Q_low
.w
[1],
614 reciprocals10_128
[extra_digits
].w
[1], CY
);
615 if ((remainder_h
>> (64 - amount
)) + carry
>=
616 (((UINT64
) 1) << amount
))
617 status
= EXACT_STATUS
;
620 __set_status_flags (fpsc
, status
);
627 if (rounding_mode
== ROUNDING_DOWN
)
628 sign
= 0x8000000000000000ull
;
631 return get_BID64 (sign
, exponent
, C64
, rounding_mode
, fpsc
);
634 /////////////////////////////////////////////////////////////////////////////////
635 // round 192-bit coefficient (P, remainder_P) and return result in BID64 format
636 // the lowest 64 bits (remainder_P) are used for midpoint checking only
637 ////////////////////////////////////////////////////////////////////////////////
639 __bid_full_round64_remainder (UINT64 sign
, int exponent
, UINT128 P
,
640 int extra_digits
, UINT64 remainder_P
,
641 int rounding_mode
, unsigned *fpsc
,
642 unsigned uf_status
) {
643 UINT128 Q_high
, Q_low
, C128
, Stemp
;
644 UINT64 remainder_h
, C64
, carry
, CY
;
645 int amount
, amount2
, rmode
, status
= uf_status
;
647 rmode
= rounding_mode
;
648 if (sign
&& (unsigned) (rmode
- 1) < 2)
650 if (rmode
== ROUNDING_UP
&& remainder_P
) {
657 __add_128_64 (P
, P
, round_const_table
[rmode
][extra_digits
]);
659 // get P*(2^M[extra_digits])/10^extra_digits
660 __mul_128x128_full (Q_high
, Q_low
, P
,
661 reciprocals10_128
[extra_digits
]);
663 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
664 amount
= recip_scale
[extra_digits
];
665 __shr_128 (C128
, Q_high
, amount
);
667 C64
= __low_64 (C128
);
669 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
670 #ifndef IEEE_ROUND_NEAREST
671 if (rmode
== 0) //ROUNDING_TO_NEAREST
673 if (!remainder_P
&& (C64
& 1)) {
674 // check whether fractional part of initial_P/10^extra_digits
678 amount2
= 64 - amount
;
681 remainder_h
>>= amount2
;
682 remainder_h
= remainder_h
& Q_high
.w
[0];
685 && (Q_low
.w
[1] < reciprocals10_128
[extra_digits
].w
[1]
686 || (Q_low
.w
[1] == reciprocals10_128
[extra_digits
].w
[1]
688 reciprocals10_128
[extra_digits
].w
[0]))) {
694 #ifdef SET_STATUS_FLAGS
695 status
|= INEXACT_EXCEPTION
;
699 remainder_h
= Q_high
.w
[0] << (64 - amount
);
702 case ROUNDING_TO_NEAREST
:
703 case ROUNDING_TIES_AWAY
:
704 // test whether fractional part is 0
705 if (remainder_h
== 0x8000000000000000ull
706 && (Q_low
.w
[1] < reciprocals10_128
[extra_digits
].w
[1]
707 || (Q_low
.w
[1] == reciprocals10_128
[extra_digits
].w
[1]
709 reciprocals10_128
[extra_digits
].w
[0])))
710 status
= EXACT_STATUS
;
713 case ROUNDING_TO_ZERO
:
715 && (Q_low
.w
[1] < reciprocals10_128
[extra_digits
].w
[1]
716 || (Q_low
.w
[1] == reciprocals10_128
[extra_digits
].w
[1]
718 reciprocals10_128
[extra_digits
].w
[0])))
719 status
= EXACT_STATUS
;
723 __add_carry_out (Stemp
.w
[0], CY
, Q_low
.w
[0],
724 reciprocals10_128
[extra_digits
].w
[0]);
725 __add_carry_in_out (Stemp
.w
[1], carry
, Q_low
.w
[1],
726 reciprocals10_128
[extra_digits
].w
[1], CY
);
727 if ((remainder_h
>> (64 - amount
)) + carry
>=
728 (((UINT64
) 1) << amount
))
729 status
= EXACT_STATUS
;
732 __set_status_flags (fpsc
, status
);
737 #ifdef SET_STATUS_FLAGS
739 __set_status_flags (fpsc
, uf_status
| INEXACT_EXCEPTION
);
744 return get_BID64 (sign
, exponent
+ extra_digits
, C64
, rounding_mode
,
749 ///////////////////////////////////////////////////////////////////
750 // get P/10^extra_digits
751 // result fits in 64 bits
752 ///////////////////////////////////////////////////////////////////
753 __BID_INLINE__ UINT64
754 __truncate (UINT128 P
, int extra_digits
)
755 // extra_digits <= 16
757 UINT128 Q_high
, Q_low
, C128
;
761 // get P*(2^M[extra_digits])/10^extra_digits
762 __mul_128x128_full (Q_high
, Q_low
, P
,
763 reciprocals10_128
[extra_digits
]);
765 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
766 amount
= recip_scale
[extra_digits
];
767 __shr_128 (C128
, Q_high
, amount
);
769 C64
= __low_64 (C128
);
775 ///////////////////////////////////////////////////////////////////
776 // return number of decimal digits in 128-bit value X
777 ///////////////////////////////////////////////////////////////////
779 __get_dec_digits64 (UINT128 X
) {
781 int digits_x
, bin_expon_cx
;
784 //--- get number of bits in the coefficients of x and y ---
785 tempx
.d
= (double) X
.w
[0];
786 bin_expon_cx
= ((tempx
.i
& MASK_BINARY_EXPONENT
) >> 52) - 0x3ff;
787 // get number of decimal digits in the coeff_x
788 digits_x
= estimate_decimal_digits
[bin_expon_cx
];
789 if (X
.w
[0] >= power10_table_128
[digits_x
].w
[0])
793 tempx
.d
= (double) X
.w
[1];
794 bin_expon_cx
= ((tempx
.i
& MASK_BINARY_EXPONENT
) >> 52) - 0x3ff;
795 // get number of decimal digits in the coeff_x
796 digits_x
= estimate_decimal_digits
[bin_expon_cx
+ 64];
797 if (__unsigned_compare_ge_128 (X
, power10_table_128
[digits_x
]))
804 ////////////////////////////////////////////////////////////////////////////////
806 // add 64-bit coefficient to 128-bit coefficient, return result in BID64 format
808 ////////////////////////////////////////////////////////////////////////////////
809 __BID_INLINE__ UINT64
810 get_add128 (UINT64 sign_x
, int exponent_x
, UINT64 coefficient_x
,
811 UINT64 sign_y
, int final_exponent_y
, UINT128 CY
,
812 int extra_digits
, int rounding_mode
, unsigned *fpsc
) {
813 UINT128 CY_L
, CX
, FS
, F
, CT
, ST
, T2
;
814 UINT64 CYh
, CY0L
, T
, S
, coefficient_y
, remainder_y
;
817 int diff_dec_expon
, extra_digits2
, exponent_y
, status
;
818 int extra_dx
, diff_dec2
, bin_expon_cx
, digits_x
, rmode
;
820 // CY has more than 16 decimal digits
822 exponent_y
= final_exponent_y
- extra_digits
;
824 #ifdef IEEE_ROUND_NEAREST_TIES_AWAY
827 #ifdef IEEE_ROUND_NEAREST
831 if (exponent_x
> exponent_y
) {
833 //--- get number of bits in the coefficients of x and y ---
834 tempx
.d
= (double) coefficient_x
;
835 bin_expon_cx
= ((tempx
.i
& MASK_BINARY_EXPONENT
) >> 52) - 0x3ff;
836 // get number of decimal digits in the coeff_x
837 digits_x
= estimate_decimal_digits
[bin_expon_cx
];
838 if (coefficient_x
>= power10_table_128
[digits_x
].w
[0])
841 extra_dx
= 16 - digits_x
;
842 coefficient_x
*= power10_table_128
[extra_dx
].w
[0];
843 if ((sign_x
^ sign_y
) && (coefficient_x
== 1000000000000000ull)) {
845 coefficient_x
= 10000000000000000ull;
847 exponent_x
-= extra_dx
;
849 if (exponent_x
> exponent_y
) {
851 // exponent_x > exponent_y
852 diff_dec_expon
= exponent_x
- exponent_y
;
854 if (exponent_x
<= final_exponent_y
+ 1) {
855 __mul_64x64_to_128 (CX
, coefficient_x
,
856 power10_table_128
[diff_dec_expon
].w
[0]);
858 if (sign_x
== sign_y
) {
859 __add_128_128 (CT
, CY
, CX
);
861 final_exponent_y
) /*&& (final_exponent_y>0) */ )
863 if (__unsigned_compare_ge_128
864 (CT
, power10_table_128
[16 + extra_digits
]))
867 __sub_128_128 (CT
, CY
, CX
);
868 if (((SINT64
) CT
.w
[1]) < 0) {
869 CT
.w
[0] = 0 - CT
.w
[0];
870 CT
.w
[1] = 0 - CT
.w
[1];
874 } else if (!(CT
.w
[1] | CT
.w
[0])) {
877 ROUNDING_DOWN
) ? 0 : 0x8000000000000000ull
;
879 if ((exponent_x
+ 1 >=
880 final_exponent_y
) /*&& (final_exponent_y>=0) */ ) {
881 extra_digits
= __get_dec_digits64 (CT
) - 16;
882 if (extra_digits
<= 0) {
883 if (!CT
.w
[0] && rounding_mode
== ROUNDING_DOWN
)
884 sign_y
= 0x8000000000000000ull
;
885 return get_BID64 (sign_y
, exponent_y
, CT
.w
[0],
886 rounding_mode
, fpsc
);
889 if (__unsigned_compare_gt_128
890 (power10_table_128
[15 + extra_digits
], CT
))
894 return __bid_full_round64 (sign_y
, exponent_y
, CT
, extra_digits
,
895 rounding_mode
, fpsc
);
897 // diff_dec2+extra_digits is the number of digits to eliminate from
899 diff_dec2
= exponent_x
- final_exponent_y
;
901 if (diff_dec2
>= 17) {
902 #ifndef IEEE_ROUND_NEAREST
903 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
904 if ((rounding_mode
) & 3) {
905 switch (rounding_mode
) {
908 D
= ((SINT64
) (sign_x
^ sign_y
)) >> 63;
915 D
= ((SINT64
) (sign_x
^ sign_y
)) >> 63;
920 case ROUNDING_TO_ZERO
:
921 if (sign_y
!= sign_x
) {
927 if (coefficient_x
< 1000000000000000ull) {
930 D
+ (coefficient_x
<< 1) + (coefficient_x
<< 3);
936 #ifdef SET_STATUS_FLAGS
937 if (CY
.w
[1] | CY
.w
[0])
938 __set_status_flags (fpsc
, INEXACT_EXCEPTION
);
940 return get_BID64 (sign_x
, exponent_x
, coefficient_x
,
941 rounding_mode
, fpsc
);
943 // here exponent_x <= 16+final_exponent_y
945 // truncate CY to 16 dec. digits
946 CYh
= __truncate (CY
, extra_digits
);
949 T
= power10_table_128
[extra_digits
].w
[0];
950 __mul_64x64_to_64 (CY0L
, CYh
, T
);
952 remainder_y
= CY
.w
[0] - CY0L
;
954 // align coeff_x, CYh
955 __mul_64x64_to_128 (CX
, coefficient_x
,
956 power10_table_128
[diff_dec2
].w
[0]);
958 if (sign_x
== sign_y
) {
959 __add_128_64 (CT
, CX
, CYh
);
960 if (__unsigned_compare_ge_128
961 (CT
, power10_table_128
[16 + diff_dec2
]))
966 __sub_128_64 (CT
, CX
, CYh
);
967 if (__unsigned_compare_gt_128
968 (power10_table_128
[15 + diff_dec2
], CT
))
972 return __bid_full_round64_remainder (sign_x
, final_exponent_y
, CT
,
973 diff_dec2
, remainder_y
,
974 rounding_mode
, fpsc
, 0);
977 // Here (exponent_x <= exponent_y)
979 diff_dec_expon
= exponent_y
- exponent_x
;
981 if (diff_dec_expon
> MAX_FORMAT_DIGITS
) {
982 rmode
= rounding_mode
;
984 if ((sign_x
^ sign_y
)) {
988 if (__unsigned_compare_gt_128
989 (power10_table_128
[15 + extra_digits
], CY
)) {
994 CY
.w
[0] = 1000000000000000ull;
1000 __scale128_10 (CY
, CY
);
1004 return __bid_simple_round64_sticky (sign_y
, final_exponent_y
, CY
,
1005 extra_digits
, rmode
, fpsc
);
1007 // apply sign to coeff_x
1009 sign_x
= ((SINT64
) sign_x
) >> 63;
1010 CX
.w
[0] = (coefficient_x
+ sign_x
) ^ sign_x
;
1013 // check whether CY (rounded to 16 digits) and CX have
1014 // any digits in the same position
1015 diff_dec2
= final_exponent_y
- exponent_x
;
1017 if (diff_dec2
<= 17) {
1018 // align CY to 10^ex
1019 S
= power10_table_128
[diff_dec_expon
].w
[0];
1020 __mul_64x128_short (CY_L
, S
, CY
);
1022 __add_128_128 (ST
, CY_L
, CX
);
1023 extra_digits2
= __get_dec_digits64 (ST
) - 16;
1024 return __bid_full_round64 (sign_y
, exponent_x
, ST
, extra_digits2
,
1025 rounding_mode
, fpsc
);
1027 // truncate CY to 16 dec. digits
1028 CYh
= __truncate (CY
, extra_digits
);
1031 T
= power10_table_128
[extra_digits
].w
[0];
1032 __mul_64x64_to_64 (CY0L
, CYh
, T
);
1034 coefficient_y
= CY
.w
[0] - CY0L
;
1035 // add rounding constant
1036 rmode
= rounding_mode
;
1037 if (sign_y
&& (unsigned) (rmode
- 1) < 2)
1039 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1040 #ifndef IEEE_ROUND_NEAREST
1041 if (!(rmode
& 3)) //ROUNDING_TO_NEAREST
1045 coefficient_y
+= round_const_table
[rmode
][extra_digits
];
1047 // align coefficient_y, coefficient_x
1048 S
= power10_table_128
[diff_dec_expon
].w
[0];
1049 __mul_64x64_to_128 (F
, coefficient_y
, S
);
1052 __add_128_128 (FS
, F
, CX
);
1054 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1055 #ifndef IEEE_ROUND_NEAREST
1056 if (rmode
== 0) //ROUNDING_TO_NEAREST
1059 // rounding code, here RN_EVEN
1060 // 10^(extra_digits+diff_dec_expon)
1061 T2
= power10_table_128
[diff_dec_expon
+ extra_digits
];
1062 if (__unsigned_compare_gt_128 (FS
, T2
)
1063 || ((CYh
& 1) && __test_equal_128 (FS
, T2
))) {
1065 __sub_128_128 (FS
, FS
, T2
);
1069 #ifndef IEEE_ROUND_NEAREST
1070 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1071 if (rmode
== 4) //ROUNDING_TO_NEAREST
1074 // rounding code, here RN_AWAY
1075 // 10^(extra_digits+diff_dec_expon)
1076 T2
= power10_table_128
[diff_dec_expon
+ extra_digits
];
1077 if (__unsigned_compare_ge_128 (FS
, T2
)) {
1079 __sub_128_128 (FS
, FS
, T2
);
1083 #ifndef IEEE_ROUND_NEAREST
1084 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1087 case ROUNDING_TO_ZERO
:
1088 if ((SINT64
) FS
.w
[1] < 0) {
1090 if (CYh
< 1000000000000000ull) {
1091 CYh
= 9999999999999999ull;
1095 T2
= power10_table_128
[diff_dec_expon
+ extra_digits
];
1096 if (__unsigned_compare_ge_128 (FS
, T2
)) {
1098 __sub_128_128 (FS
, FS
, T2
);
1103 if ((SINT64
) FS
.w
[1] < 0)
1105 T2
= power10_table_128
[diff_dec_expon
+ extra_digits
];
1106 if (__unsigned_compare_gt_128 (FS
, T2
)) {
1108 __sub_128_128 (FS
, FS
, T2
);
1109 } else if ((FS
.w
[1] == T2
.w
[1]) && (FS
.w
[0] == T2
.w
[0])) {
1111 FS
.w
[1] = FS
.w
[0] = 0;
1112 } else if (FS
.w
[1] | FS
.w
[0])
1119 #ifdef SET_STATUS_FLAGS
1120 status
= INEXACT_EXCEPTION
;
1121 #ifndef IEEE_ROUND_NEAREST
1122 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1129 round_const_table_128
[0][diff_dec_expon
+ extra_digits
].w
[1])
1131 round_const_table_128
[0][diff_dec_expon
+
1132 extra_digits
].w
[0]))
1133 status
= EXACT_STATUS
;
1135 #ifndef IEEE_ROUND_NEAREST
1136 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1137 else if (!FS
.w
[1] && !FS
.w
[0])
1138 status
= EXACT_STATUS
;
1142 __set_status_flags (fpsc
, status
);
1145 return get_BID64 (sign_y
, final_exponent_y
, CYh
, rounding_mode
,
1151 //////////////////////////////////////////////////////////////////////////
1153 // If coefficient_z is less than 16 digits long, normalize to 16 digits
1155 /////////////////////////////////////////////////////////////////////////
1157 BID_normalize (UINT64 sign_z
, int exponent_z
,
1158 UINT64 coefficient_z
, UINT64 round_dir
, int round_flag
,
1159 int rounding_mode
, unsigned *fpsc
) {
1162 int digits_z
, bin_expon
, scale
, rmode
;
1164 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1165 #ifndef IEEE_ROUND_NEAREST
1166 rmode
= rounding_mode
;
1167 if (sign_z
&& (unsigned) (rmode
- 1) < 2)
1170 if (coefficient_z
>= power10_table_128
[15].w
[0])
1175 //--- get number of bits in the coefficients of x and y ---
1176 tempx
.d
= (double) coefficient_z
;
1177 bin_expon
= ((tempx
.i
& MASK_BINARY_EXPONENT
) >> 52) - 0x3ff;
1178 // get number of decimal digits in the coeff_x
1179 digits_z
= estimate_decimal_digits
[bin_expon
];
1180 if (coefficient_z
>= power10_table_128
[digits_z
].w
[0])
1183 scale
= 16 - digits_z
;
1184 exponent_z
-= scale
;
1185 if (exponent_z
< 0) {
1186 scale
+= exponent_z
;
1189 coefficient_z
*= power10_table_128
[scale
].w
[0];
1191 #ifdef SET_STATUS_FLAGS
1193 __set_status_flags (fpsc
, INEXACT_EXCEPTION
);
1194 if (coefficient_z
< 1000000000000000ull)
1195 __set_status_flags (fpsc
, UNDERFLOW_EXCEPTION
);
1196 else if ((coefficient_z
== 1000000000000000ull) && !exponent_z
1197 && ((SINT64
) (round_dir
^ sign_z
) < 0) && round_flag
1198 && (rmode
== ROUNDING_DOWN
|| rmode
== ROUNDING_TO_ZERO
))
1199 __set_status_flags (fpsc
, UNDERFLOW_EXCEPTION
);
1203 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1204 #ifndef IEEE_ROUND_NEAREST
1205 if (round_flag
&& (rmode
& 3)) {
1206 D
= round_dir
^ sign_z
;
1208 if (rmode
== ROUNDING_UP
) {
1214 if (coefficient_z
< 1000000000000000ull && exponent_z
) {
1215 coefficient_z
= 9999999999999999ull;
1223 return get_BID64 (sign_z
, exponent_z
, coefficient_z
, rounding_mode
,
1228 //////////////////////////////////////////////////////////////////////////
1230 // 0*10^ey + cz*10^ez, ey<ez
1232 //////////////////////////////////////////////////////////////////////////
1234 __BID_INLINE__ UINT64
1235 add_zero64 (int exponent_y
, UINT64 sign_z
, int exponent_z
,
1236 UINT64 coefficient_z
, unsigned *prounding_mode
,
1239 int bin_expon
, scale_k
, scale_cz
;
1242 diff_expon
= exponent_z
- exponent_y
;
1244 tempx
.d
= (double) coefficient_z
;
1245 bin_expon
= ((tempx
.i
& MASK_BINARY_EXPONENT
) >> 52) - 0x3ff;
1246 scale_cz
= estimate_decimal_digits
[bin_expon
];
1247 if (coefficient_z
>= power10_table_128
[scale_cz
].w
[0])
1250 scale_k
= 16 - scale_cz
;
1251 if (diff_expon
< scale_k
)
1252 scale_k
= diff_expon
;
1253 coefficient_z
*= power10_table_128
[scale_k
].w
[0];
1255 return get_BID64 (sign_z
, exponent_z
- scale_k
, coefficient_z
,
1256 *prounding_mode
, fpsc
);