2007-09-27 H.J. Lu <hongjiu.lu@intel.com>
[official-gcc.git] / libgcc / config / libbid / bid_inline_add.h
blob791b6e2fd8badeffed322a0918a1d703466c5bc4
1 /* Copyright (C) 2007 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 2, or (at your option) any later
8 version.
10 In addition to the permissions in the GNU General Public License, the
11 Free Software Foundation gives you unlimited permission to link the
12 compiled version of this file into combinations with other programs,
13 and to distribute those combinations without any restriction coming
14 from the use of this file. (The General Public License restrictions
15 do apply in other respects; for example, they cover modification of
16 the file, and distribution when not linked into a combine
17 executable.)
19 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
20 WARRANTY; without even the implied warranty of MERCHANTABILITY or
21 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
22 for more details.
24 You should have received a copy of the GNU General Public License
25 along with GCC; see the file COPYING. If not, write to the Free
26 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
27 02110-1301, USA. */
29 /*****************************************************************************
31 * Helper add functions (for fma)
33 * __BID_INLINE__ UINT64 get_add64(
34 * UINT64 sign_x, int exponent_x, UINT64 coefficient_x,
35 * UINT64 sign_y, int exponent_y, UINT64 coefficient_y,
36 * int rounding_mode)
38 * __BID_INLINE__ UINT64 get_add128(
39 * UINT64 sign_x, int exponent_x, UINT64 coefficient_x,
40 * UINT64 sign_y, int final_exponent_y, UINT128 CY,
41 * int extra_digits, int rounding_mode)
43 *****************************************************************************
45 * Algorithm description:
47 * get_add64: same as BID64 add, but arguments are unpacked and there
48 * are no special case checks
50 * get_add128: add 64-bit coefficient to 128-bit product (which contains
51 * 16+extra_digits decimal digits),
52 * return BID64 result
53 * - the exponents are compared and the two coefficients are
54 * properly aligned for addition/subtraction
55 * - multiple paths are needed
56 * - final result exponent is calculated and the lower term is
57 * rounded first if necessary, to avoid manipulating
58 * coefficients longer than 128 bits
60 ****************************************************************************/
62 #ifndef _INLINE_BID_ADD_H_
63 #define _INLINE_BID_ADD_H_
65 #include "bid_internal.h"
67 #define MAX_FORMAT_DIGITS 16
68 #define DECIMAL_EXPONENT_BIAS 398
69 #define MASK_BINARY_EXPONENT 0x7ff0000000000000ull
70 #define BINARY_EXPONENT_BIAS 0x3ff
71 #define UPPER_EXPON_LIMIT 51
73 ///////////////////////////////////////////////////////////////////////
75 // get_add64() is essentially the same as bid_add(), except that
76 // the arguments are unpacked
78 //////////////////////////////////////////////////////////////////////
79 __BID_INLINE__ UINT64
80 get_add64 (UINT64 sign_x, int exponent_x, UINT64 coefficient_x,
81 UINT64 sign_y, int exponent_y, UINT64 coefficient_y,
82 int rounding_mode, unsigned *fpsc) {
83 UINT128 CA, CT, CT_new;
84 UINT64 sign_a, sign_b, coefficient_a, coefficient_b, sign_s, sign_ab,
85 rem_a;
86 UINT64 saved_ca, saved_cb, C0_64, C64, remainder_h, T1, carry, tmp,
87 C64_new;
88 int_double tempx;
89 int exponent_a, exponent_b, diff_dec_expon;
90 int bin_expon_ca, extra_digits, amount, scale_k, scale_ca;
91 unsigned rmode, status;
93 // sort arguments by exponent
94 if (exponent_x <= exponent_y) {
95 sign_a = sign_y;
96 exponent_a = exponent_y;
97 coefficient_a = coefficient_y;
98 sign_b = sign_x;
99 exponent_b = exponent_x;
100 coefficient_b = coefficient_x;
101 } else {
102 sign_a = sign_x;
103 exponent_a = exponent_x;
104 coefficient_a = coefficient_x;
105 sign_b = sign_y;
106 exponent_b = exponent_y;
107 coefficient_b = coefficient_y;
110 // exponent difference
111 diff_dec_expon = exponent_a - exponent_b;
113 /* get binary coefficients of x and y */
115 //--- get number of bits in the coefficients of x and y ---
117 tempx.d = (double) coefficient_a;
118 bin_expon_ca = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
120 if (!coefficient_a) {
121 return get_BID64 (sign_b, exponent_b, coefficient_b, rounding_mode,
122 fpsc);
124 if (diff_dec_expon > MAX_FORMAT_DIGITS) {
125 // normalize a to a 16-digit coefficient
127 scale_ca = estimate_decimal_digits[bin_expon_ca];
128 if (coefficient_a >= power10_table_128[scale_ca].w[0])
129 scale_ca++;
131 scale_k = 16 - scale_ca;
133 coefficient_a *= power10_table_128[scale_k].w[0];
135 diff_dec_expon -= scale_k;
136 exponent_a -= scale_k;
138 /* get binary coefficients of x and y */
140 //--- get number of bits in the coefficients of x and y ---
141 tempx.d = (double) coefficient_a;
142 bin_expon_ca = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
144 if (diff_dec_expon > MAX_FORMAT_DIGITS) {
145 #ifdef SET_STATUS_FLAGS
146 if (coefficient_b) {
147 __set_status_flags (fpsc, INEXACT_EXCEPTION);
149 #endif
151 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
152 #ifndef IEEE_ROUND_NEAREST
153 if (((rounding_mode) & 3) && coefficient_b) // not ROUNDING_TO_NEAREST
155 switch (rounding_mode) {
156 case ROUNDING_DOWN:
157 if (sign_b) {
158 coefficient_a -= ((((SINT64) sign_a) >> 63) | 1);
159 if (coefficient_a < 1000000000000000ull) {
160 exponent_a--;
161 coefficient_a = 9999999999999999ull;
162 } else if (coefficient_a >= 10000000000000000ull) {
163 exponent_a++;
164 coefficient_a = 1000000000000000ull;
167 break;
168 case ROUNDING_UP:
169 if (!sign_b) {
170 coefficient_a += ((((SINT64) sign_a) >> 63) | 1);
171 if (coefficient_a < 1000000000000000ull) {
172 exponent_a--;
173 coefficient_a = 9999999999999999ull;
174 } else if (coefficient_a >= 10000000000000000ull) {
175 exponent_a++;
176 coefficient_a = 1000000000000000ull;
179 break;
180 default: // RZ
181 if (sign_a != sign_b) {
182 coefficient_a--;
183 if (coefficient_a < 1000000000000000ull) {
184 exponent_a--;
185 coefficient_a = 9999999999999999ull;
188 break;
190 } else
191 #endif
192 #endif
193 // check special case here
194 if ((coefficient_a == 1000000000000000ull)
195 && (diff_dec_expon == MAX_FORMAT_DIGITS + 1)
196 && (sign_a ^ sign_b)
197 && (coefficient_b > 5000000000000000ull)) {
198 coefficient_a = 9999999999999999ull;
199 exponent_a--;
202 return get_BID64 (sign_a, exponent_a, coefficient_a,
203 rounding_mode, fpsc);
206 // test whether coefficient_a*10^(exponent_a-exponent_b) may exceed 2^62
207 if (bin_expon_ca + estimate_bin_expon[diff_dec_expon] < 60) {
208 // coefficient_a*10^(exponent_a-exponent_b)<2^63
210 // multiply by 10^(exponent_a-exponent_b)
211 coefficient_a *= power10_table_128[diff_dec_expon].w[0];
213 // sign mask
214 sign_b = ((SINT64) sign_b) >> 63;
215 // apply sign to coeff. of b
216 coefficient_b = (coefficient_b + sign_b) ^ sign_b;
218 // apply sign to coefficient a
219 sign_a = ((SINT64) sign_a) >> 63;
220 coefficient_a = (coefficient_a + sign_a) ^ sign_a;
222 coefficient_a += coefficient_b;
223 // get sign
224 sign_s = ((SINT64) coefficient_a) >> 63;
225 coefficient_a = (coefficient_a + sign_s) ^ sign_s;
226 sign_s &= 0x8000000000000000ull;
228 // coefficient_a < 10^16 ?
229 if (coefficient_a < power10_table_128[MAX_FORMAT_DIGITS].w[0]) {
230 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
231 #ifndef IEEE_ROUND_NEAREST
232 if (rounding_mode == ROUNDING_DOWN && (!coefficient_a)
233 && sign_a != sign_b)
234 sign_s = 0x8000000000000000ull;
235 #endif
236 #endif
237 return get_BID64 (sign_s, exponent_b, coefficient_a,
238 rounding_mode, fpsc);
240 // otherwise rounding is necessary
242 // already know coefficient_a<10^19
243 // coefficient_a < 10^17 ?
244 if (coefficient_a < power10_table_128[17].w[0])
245 extra_digits = 1;
246 else if (coefficient_a < power10_table_128[18].w[0])
247 extra_digits = 2;
248 else
249 extra_digits = 3;
251 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
252 #ifndef IEEE_ROUND_NEAREST
253 rmode = rounding_mode;
254 if (sign_s && (unsigned) (rmode - 1) < 2)
255 rmode = 3 - rmode;
256 #else
257 rmode = 0;
258 #endif
259 #else
260 rmode = 0;
261 #endif
262 coefficient_a += round_const_table[rmode][extra_digits];
264 // get P*(2^M[extra_digits])/10^extra_digits
265 __mul_64x64_to_128 (CT, coefficient_a,
266 reciprocals10_64[extra_digits]);
268 // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
269 amount = short_recip_scale[extra_digits];
270 C64 = CT.w[1] >> amount;
272 } else {
273 // coefficient_a*10^(exponent_a-exponent_b) is large
274 sign_s = sign_a;
276 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
277 #ifndef IEEE_ROUND_NEAREST
278 rmode = rounding_mode;
279 if (sign_s && (unsigned) (rmode - 1) < 2)
280 rmode = 3 - rmode;
281 #else
282 rmode = 0;
283 #endif
284 #else
285 rmode = 0;
286 #endif
288 // check whether we can take faster path
289 scale_ca = estimate_decimal_digits[bin_expon_ca];
291 sign_ab = sign_a ^ sign_b;
292 sign_ab = ((SINT64) sign_ab) >> 63;
294 // T1 = 10^(16-diff_dec_expon)
295 T1 = power10_table_128[16 - diff_dec_expon].w[0];
297 // get number of digits in coefficient_a
298 //P_ca = power10_table_128[scale_ca].w[0];
299 //P_ca_m1 = power10_table_128[scale_ca-1].w[0];
300 if (coefficient_a >= power10_table_128[scale_ca].w[0]) {
301 scale_ca++;
302 //P_ca_m1 = P_ca;
303 //P_ca = power10_table_128[scale_ca].w[0];
306 scale_k = 16 - scale_ca;
308 // apply sign
309 //Ts = (T1 + sign_ab) ^ sign_ab;
311 // test range of ca
312 //X = coefficient_a + Ts - P_ca_m1;
314 // addition
315 saved_ca = coefficient_a - T1;
316 coefficient_a =
317 (SINT64) saved_ca *(SINT64) power10_table_128[scale_k].w[0];
318 extra_digits = diff_dec_expon - scale_k;
320 // apply sign
321 saved_cb = (coefficient_b + sign_ab) ^ sign_ab;
322 // add 10^16 and rounding constant
323 coefficient_b =
324 saved_cb + 10000000000000000ull +
325 round_const_table[rmode][extra_digits];
327 // get P*(2^M[extra_digits])/10^extra_digits
328 __mul_64x64_to_128 (CT, coefficient_b,
329 reciprocals10_64[extra_digits]);
331 // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
332 amount = short_recip_scale[extra_digits];
333 C0_64 = CT.w[1] >> amount;
335 // result coefficient
336 C64 = C0_64 + coefficient_a;
337 // filter out difficult (corner) cases
338 // the following test is equivalent to
339 // ( (initial_coefficient_a + Ts) < P_ca &&
340 // (initial_coefficient_a + Ts) > P_ca_m1 ),
341 // which ensures the number of digits in coefficient_a does not change
342 // after adding (the appropriately scaled and rounded) coefficient_b
343 if ((UINT64) (C64 - 1000000000000000ull - 1) >
344 9000000000000000ull - 2) {
345 if (C64 >= 10000000000000000ull) {
346 // result has more than 16 digits
347 if (!scale_k) {
348 // must divide coeff_a by 10
349 saved_ca = saved_ca + T1;
350 __mul_64x64_to_128 (CA, saved_ca, 0x3333333333333334ull);
351 //reciprocals10_64[1]);
352 coefficient_a = CA.w[1] >> 1;
353 rem_a =
354 saved_ca - (coefficient_a << 3) - (coefficient_a << 1);
355 coefficient_a = coefficient_a - T1;
357 saved_cb +=
358 /*90000000000000000 */ +rem_a *
359 power10_table_128[diff_dec_expon].w[0];
360 } else
361 coefficient_a =
362 (SINT64) (saved_ca - T1 -
363 (T1 << 3)) * (SINT64) power10_table_128[scale_k -
364 1].w[0];
366 extra_digits++;
367 coefficient_b =
368 saved_cb + 100000000000000000ull +
369 round_const_table[rmode][extra_digits];
371 // get P*(2^M[extra_digits])/10^extra_digits
372 __mul_64x64_to_128 (CT, coefficient_b,
373 reciprocals10_64[extra_digits]);
375 // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
376 amount = short_recip_scale[extra_digits];
377 C0_64 = CT.w[1] >> amount;
379 // result coefficient
380 C64 = C0_64 + coefficient_a;
381 } else if (C64 <= 1000000000000000ull) {
382 // less than 16 digits in result
383 coefficient_a =
384 (SINT64) saved_ca *(SINT64) power10_table_128[scale_k +
385 1].w[0];
386 //extra_digits --;
387 exponent_b--;
388 coefficient_b =
389 (saved_cb << 3) + (saved_cb << 1) + 100000000000000000ull +
390 round_const_table[rmode][extra_digits];
392 // get P*(2^M[extra_digits])/10^extra_digits
393 __mul_64x64_to_128 (CT_new, coefficient_b,
394 reciprocals10_64[extra_digits]);
396 // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
397 amount = short_recip_scale[extra_digits];
398 C0_64 = CT_new.w[1] >> amount;
400 // result coefficient
401 C64_new = C0_64 + coefficient_a;
402 if (C64_new < 10000000000000000ull) {
403 C64 = C64_new;
404 #ifdef SET_STATUS_FLAGS
405 CT = CT_new;
406 #endif
407 } else
408 exponent_b++;
415 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
416 #ifndef IEEE_ROUND_NEAREST
417 if (rmode == 0) //ROUNDING_TO_NEAREST
418 #endif
419 if (C64 & 1) {
420 // check whether fractional part of initial_P/10^extra_digits
421 // is exactly .5
422 // this is the same as fractional part of
423 // (initial_P + 0.5*10^extra_digits)/10^extra_digits is exactly zero
425 // get remainder
426 remainder_h = CT.w[1] << (64 - amount);
428 // test whether fractional part is 0
429 if (!remainder_h && (CT.w[0] < reciprocals10_64[extra_digits])) {
430 C64--;
433 #endif
435 #ifdef SET_STATUS_FLAGS
436 status = INEXACT_EXCEPTION;
438 // get remainder
439 remainder_h = CT.w[1] << (64 - amount);
441 switch (rmode) {
442 case ROUNDING_TO_NEAREST:
443 case ROUNDING_TIES_AWAY:
444 // test whether fractional part is 0
445 if ((remainder_h == 0x8000000000000000ull)
446 && (CT.w[0] < reciprocals10_64[extra_digits]))
447 status = EXACT_STATUS;
448 break;
449 case ROUNDING_DOWN:
450 case ROUNDING_TO_ZERO:
451 if (!remainder_h && (CT.w[0] < reciprocals10_64[extra_digits]))
452 status = EXACT_STATUS;
453 break;
454 default:
455 // round up
456 __add_carry_out (tmp, carry, CT.w[0],
457 reciprocals10_64[extra_digits]);
458 if ((remainder_h >> (64 - amount)) + carry >=
459 (((UINT64) 1) << amount))
460 status = EXACT_STATUS;
461 break;
463 __set_status_flags (fpsc, status);
465 #endif
467 return get_BID64 (sign_s, exponent_b + extra_digits, C64,
468 rounding_mode, fpsc);
472 ///////////////////////////////////////////////////////////////////
473 // round 128-bit coefficient and return result in BID64 format
474 // do not worry about midpoint cases
475 //////////////////////////////////////////////////////////////////
476 static UINT64
477 __bid_simple_round64_sticky (UINT64 sign, int exponent, UINT128 P,
478 int extra_digits, int rounding_mode,
479 unsigned *fpsc) {
480 UINT128 Q_high, Q_low, C128;
481 UINT64 C64;
482 int amount, rmode;
484 rmode = rounding_mode;
485 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
486 #ifndef IEEE_ROUND_NEAREST
487 if (sign && (unsigned) (rmode - 1) < 2)
488 rmode = 3 - rmode;
489 #endif
490 #endif
491 __add_128_64 (P, P, round_const_table[rmode][extra_digits]);
493 // get P*(2^M[extra_digits])/10^extra_digits
494 __mul_128x128_full (Q_high, Q_low, P,
495 reciprocals10_128[extra_digits]);
497 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
498 amount = recip_scale[extra_digits];
499 __shr_128 (C128, Q_high, amount);
501 C64 = __low_64 (C128);
503 #ifdef SET_STATUS_FLAGS
505 __set_status_flags (fpsc, INEXACT_EXCEPTION);
507 #endif
509 return get_BID64 (sign, exponent, C64, rounding_mode, fpsc);
512 ///////////////////////////////////////////////////////////////////
513 // round 128-bit coefficient and return result in BID64 format
514 ///////////////////////////////////////////////////////////////////
515 static UINT64
516 __bid_full_round64 (UINT64 sign, int exponent, UINT128 P,
517 int extra_digits, int rounding_mode,
518 unsigned *fpsc) {
519 UINT128 Q_high, Q_low, C128, Stemp, PU;
520 UINT64 remainder_h, C64, carry, CY;
521 int amount, amount2, rmode, status = 0;
523 if (exponent < 0) {
524 if (exponent >= -16 && (extra_digits + exponent < 0)) {
525 extra_digits = -exponent;
526 #ifdef SET_STATUS_FLAGS
527 if (extra_digits > 0) {
528 rmode = rounding_mode;
529 if (sign && (unsigned) (rmode - 1) < 2)
530 rmode = 3 - rmode;
531 __add_128_128 (PU, P,
532 round_const_table_128[rmode][extra_digits]);
533 if (__unsigned_compare_gt_128
534 (power10_table_128[extra_digits + 15], PU))
535 status = UNDERFLOW_EXCEPTION;
537 #endif
541 if (extra_digits > 0) {
542 exponent += extra_digits;
543 rmode = rounding_mode;
544 if (sign && (unsigned) (rmode - 1) < 2)
545 rmode = 3 - rmode;
546 __add_128_128 (P, P, round_const_table_128[rmode][extra_digits]);
548 // get P*(2^M[extra_digits])/10^extra_digits
549 __mul_128x128_full (Q_high, Q_low, P,
550 reciprocals10_128[extra_digits]);
552 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
553 amount = recip_scale[extra_digits];
554 __shr_128_long (C128, Q_high, amount);
556 C64 = __low_64 (C128);
558 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
559 #ifndef IEEE_ROUND_NEAREST
560 if (rmode == 0) //ROUNDING_TO_NEAREST
561 #endif
562 if (C64 & 1) {
563 // check whether fractional part of initial_P/10^extra_digits
564 // is exactly .5
566 // get remainder
567 amount2 = 64 - amount;
568 remainder_h = 0;
569 remainder_h--;
570 remainder_h >>= amount2;
571 remainder_h = remainder_h & Q_high.w[0];
573 if (!remainder_h
574 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
575 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
576 && Q_low.w[0] <
577 reciprocals10_128[extra_digits].w[0]))) {
578 C64--;
581 #endif
583 #ifdef SET_STATUS_FLAGS
584 status |= INEXACT_EXCEPTION;
586 // get remainder
587 remainder_h = Q_high.w[0] << (64 - amount);
589 switch (rmode) {
590 case ROUNDING_TO_NEAREST:
591 case ROUNDING_TIES_AWAY:
592 // test whether fractional part is 0
593 if (remainder_h == 0x8000000000000000ull
594 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
595 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
596 && Q_low.w[0] <
597 reciprocals10_128[extra_digits].w[0])))
598 status = EXACT_STATUS;
599 break;
600 case ROUNDING_DOWN:
601 case ROUNDING_TO_ZERO:
602 if (!remainder_h
603 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
604 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
605 && Q_low.w[0] <
606 reciprocals10_128[extra_digits].w[0])))
607 status = EXACT_STATUS;
608 break;
609 default:
610 // round up
611 __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
612 reciprocals10_128[extra_digits].w[0]);
613 __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
614 reciprocals10_128[extra_digits].w[1], CY);
615 if ((remainder_h >> (64 - amount)) + carry >=
616 (((UINT64) 1) << amount))
617 status = EXACT_STATUS;
620 __set_status_flags (fpsc, status);
622 #endif
623 } else {
624 C64 = P.w[0];
625 if (!C64) {
626 sign = 0;
627 if (rounding_mode == ROUNDING_DOWN)
628 sign = 0x8000000000000000ull;
631 return get_BID64 (sign, exponent, C64, rounding_mode, fpsc);
634 /////////////////////////////////////////////////////////////////////////////////
635 // round 192-bit coefficient (P, remainder_P) and return result in BID64 format
636 // the lowest 64 bits (remainder_P) are used for midpoint checking only
637 ////////////////////////////////////////////////////////////////////////////////
638 static UINT64
639 __bid_full_round64_remainder (UINT64 sign, int exponent, UINT128 P,
640 int extra_digits, UINT64 remainder_P,
641 int rounding_mode, unsigned *fpsc,
642 unsigned uf_status) {
643 UINT128 Q_high, Q_low, C128, Stemp;
644 UINT64 remainder_h, C64, carry, CY;
645 int amount, amount2, rmode, status = uf_status;
647 rmode = rounding_mode;
648 if (sign && (unsigned) (rmode - 1) < 2)
649 rmode = 3 - rmode;
650 if (rmode == ROUNDING_UP && remainder_P) {
651 P.w[0]++;
652 if (!P.w[0])
653 P.w[1]++;
656 if (extra_digits) {
657 __add_128_64 (P, P, round_const_table[rmode][extra_digits]);
659 // get P*(2^M[extra_digits])/10^extra_digits
660 __mul_128x128_full (Q_high, Q_low, P,
661 reciprocals10_128[extra_digits]);
663 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
664 amount = recip_scale[extra_digits];
665 __shr_128 (C128, Q_high, amount);
667 C64 = __low_64 (C128);
669 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
670 #ifndef IEEE_ROUND_NEAREST
671 if (rmode == 0) //ROUNDING_TO_NEAREST
672 #endif
673 if (!remainder_P && (C64 & 1)) {
674 // check whether fractional part of initial_P/10^extra_digits
675 // is exactly .5
677 // get remainder
678 amount2 = 64 - amount;
679 remainder_h = 0;
680 remainder_h--;
681 remainder_h >>= amount2;
682 remainder_h = remainder_h & Q_high.w[0];
684 if (!remainder_h
685 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
686 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
687 && Q_low.w[0] <
688 reciprocals10_128[extra_digits].w[0]))) {
689 C64--;
692 #endif
694 #ifdef SET_STATUS_FLAGS
695 status |= INEXACT_EXCEPTION;
697 if (!remainder_P) {
698 // get remainder
699 remainder_h = Q_high.w[0] << (64 - amount);
701 switch (rmode) {
702 case ROUNDING_TO_NEAREST:
703 case ROUNDING_TIES_AWAY:
704 // test whether fractional part is 0
705 if (remainder_h == 0x8000000000000000ull
706 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
707 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
708 && Q_low.w[0] <
709 reciprocals10_128[extra_digits].w[0])))
710 status = EXACT_STATUS;
711 break;
712 case ROUNDING_DOWN:
713 case ROUNDING_TO_ZERO:
714 if (!remainder_h
715 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
716 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
717 && Q_low.w[0] <
718 reciprocals10_128[extra_digits].w[0])))
719 status = EXACT_STATUS;
720 break;
721 default:
722 // round up
723 __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
724 reciprocals10_128[extra_digits].w[0]);
725 __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
726 reciprocals10_128[extra_digits].w[1], CY);
727 if ((remainder_h >> (64 - amount)) + carry >=
728 (((UINT64) 1) << amount))
729 status = EXACT_STATUS;
732 __set_status_flags (fpsc, status);
734 #endif
735 } else {
736 C64 = P.w[0];
737 #ifdef SET_STATUS_FLAGS
738 if (remainder_P) {
739 __set_status_flags (fpsc, uf_status | INEXACT_EXCEPTION);
741 #endif
744 return get_BID64 (sign, exponent + extra_digits, C64, rounding_mode,
745 fpsc);
749 ///////////////////////////////////////////////////////////////////
750 // get P/10^extra_digits
751 // result fits in 64 bits
752 ///////////////////////////////////////////////////////////////////
753 __BID_INLINE__ UINT64
754 __truncate (UINT128 P, int extra_digits)
755 // extra_digits <= 16
757 UINT128 Q_high, Q_low, C128;
758 UINT64 C64;
759 int amount;
761 // get P*(2^M[extra_digits])/10^extra_digits
762 __mul_128x128_full (Q_high, Q_low, P,
763 reciprocals10_128[extra_digits]);
765 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
766 amount = recip_scale[extra_digits];
767 __shr_128 (C128, Q_high, amount);
769 C64 = __low_64 (C128);
771 return C64;
775 ///////////////////////////////////////////////////////////////////
776 // return number of decimal digits in 128-bit value X
777 ///////////////////////////////////////////////////////////////////
778 __BID_INLINE__ int
779 __get_dec_digits64 (UINT128 X) {
780 int_double tempx;
781 int digits_x, bin_expon_cx;
783 if (!X.w[1]) {
784 //--- get number of bits in the coefficients of x and y ---
785 tempx.d = (double) X.w[0];
786 bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
787 // get number of decimal digits in the coeff_x
788 digits_x = estimate_decimal_digits[bin_expon_cx];
789 if (X.w[0] >= power10_table_128[digits_x].w[0])
790 digits_x++;
791 return digits_x;
793 tempx.d = (double) X.w[1];
794 bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
795 // get number of decimal digits in the coeff_x
796 digits_x = estimate_decimal_digits[bin_expon_cx + 64];
797 if (__unsigned_compare_ge_128 (X, power10_table_128[digits_x]))
798 digits_x++;
800 return digits_x;
804 ////////////////////////////////////////////////////////////////////////////////
806 // add 64-bit coefficient to 128-bit coefficient, return result in BID64 format
808 ////////////////////////////////////////////////////////////////////////////////
809 __BID_INLINE__ UINT64
810 get_add128 (UINT64 sign_x, int exponent_x, UINT64 coefficient_x,
811 UINT64 sign_y, int final_exponent_y, UINT128 CY,
812 int extra_digits, int rounding_mode, unsigned *fpsc) {
813 UINT128 CY_L, CX, FS, F, CT, ST, T2;
814 UINT64 CYh, CY0L, T, S, coefficient_y, remainder_y;
815 SINT64 D = 0;
816 int_double tempx;
817 int diff_dec_expon, extra_digits2, exponent_y, status;
818 int extra_dx, diff_dec2, bin_expon_cx, digits_x, rmode;
820 // CY has more than 16 decimal digits
822 exponent_y = final_exponent_y - extra_digits;
824 #ifdef IEEE_ROUND_NEAREST_TIES_AWAY
825 rounding_mode = 0;
826 #endif
827 #ifdef IEEE_ROUND_NEAREST
828 rounding_mode = 0;
829 #endif
831 if (exponent_x > exponent_y) {
832 // normalize x
833 //--- get number of bits in the coefficients of x and y ---
834 tempx.d = (double) coefficient_x;
835 bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
836 // get number of decimal digits in the coeff_x
837 digits_x = estimate_decimal_digits[bin_expon_cx];
838 if (coefficient_x >= power10_table_128[digits_x].w[0])
839 digits_x++;
841 extra_dx = 16 - digits_x;
842 coefficient_x *= power10_table_128[extra_dx].w[0];
843 if ((sign_x ^ sign_y) && (coefficient_x == 1000000000000000ull)) {
844 extra_dx++;
845 coefficient_x = 10000000000000000ull;
847 exponent_x -= extra_dx;
849 if (exponent_x > exponent_y) {
851 // exponent_x > exponent_y
852 diff_dec_expon = exponent_x - exponent_y;
854 if (exponent_x <= final_exponent_y + 1) {
855 __mul_64x64_to_128 (CX, coefficient_x,
856 power10_table_128[diff_dec_expon].w[0]);
858 if (sign_x == sign_y) {
859 __add_128_128 (CT, CY, CX);
860 if ((exponent_x >
861 final_exponent_y) /*&& (final_exponent_y>0) */ )
862 extra_digits++;
863 if (__unsigned_compare_ge_128
864 (CT, power10_table_128[16 + extra_digits]))
865 extra_digits++;
866 } else {
867 __sub_128_128 (CT, CY, CX);
868 if (((SINT64) CT.w[1]) < 0) {
869 CT.w[0] = 0 - CT.w[0];
870 CT.w[1] = 0 - CT.w[1];
871 if (CT.w[0])
872 CT.w[1]--;
873 sign_y = sign_x;
874 } else if (!(CT.w[1] | CT.w[0])) {
875 sign_y =
876 (rounding_mode !=
877 ROUNDING_DOWN) ? 0 : 0x8000000000000000ull;
879 if ((exponent_x + 1 >=
880 final_exponent_y) /*&& (final_exponent_y>=0) */ ) {
881 extra_digits = __get_dec_digits64 (CT) - 16;
882 if (extra_digits <= 0) {
883 if (!CT.w[0] && rounding_mode == ROUNDING_DOWN)
884 sign_y = 0x8000000000000000ull;
885 return get_BID64 (sign_y, exponent_y, CT.w[0],
886 rounding_mode, fpsc);
888 } else
889 if (__unsigned_compare_gt_128
890 (power10_table_128[15 + extra_digits], CT))
891 extra_digits--;
894 return __bid_full_round64 (sign_y, exponent_y, CT, extra_digits,
895 rounding_mode, fpsc);
897 // diff_dec2+extra_digits is the number of digits to eliminate from
898 // argument CY
899 diff_dec2 = exponent_x - final_exponent_y;
901 if (diff_dec2 >= 17) {
902 #ifndef IEEE_ROUND_NEAREST
903 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
904 if ((rounding_mode) & 3) {
905 switch (rounding_mode) {
906 case ROUNDING_UP:
907 if (!sign_y) {
908 D = ((SINT64) (sign_x ^ sign_y)) >> 63;
909 D = D + D + 1;
910 coefficient_x += D;
912 break;
913 case ROUNDING_DOWN:
914 if (sign_y) {
915 D = ((SINT64) (sign_x ^ sign_y)) >> 63;
916 D = D + D + 1;
917 coefficient_x += D;
919 break;
920 case ROUNDING_TO_ZERO:
921 if (sign_y != sign_x) {
922 D = 0 - 1;
923 coefficient_x += D;
925 break;
927 if (coefficient_x < 1000000000000000ull) {
928 coefficient_x -= D;
929 coefficient_x =
930 D + (coefficient_x << 1) + (coefficient_x << 3);
931 exponent_x--;
934 #endif
935 #endif
936 #ifdef SET_STATUS_FLAGS
937 if (CY.w[1] | CY.w[0])
938 __set_status_flags (fpsc, INEXACT_EXCEPTION);
939 #endif
940 return get_BID64 (sign_x, exponent_x, coefficient_x,
941 rounding_mode, fpsc);
943 // here exponent_x <= 16+final_exponent_y
945 // truncate CY to 16 dec. digits
946 CYh = __truncate (CY, extra_digits);
948 // get remainder
949 T = power10_table_128[extra_digits].w[0];
950 __mul_64x64_to_64 (CY0L, CYh, T);
952 remainder_y = CY.w[0] - CY0L;
954 // align coeff_x, CYh
955 __mul_64x64_to_128 (CX, coefficient_x,
956 power10_table_128[diff_dec2].w[0]);
958 if (sign_x == sign_y) {
959 __add_128_64 (CT, CX, CYh);
960 if (__unsigned_compare_ge_128
961 (CT, power10_table_128[16 + diff_dec2]))
962 diff_dec2++;
963 } else {
964 if (remainder_y)
965 CYh++;
966 __sub_128_64 (CT, CX, CYh);
967 if (__unsigned_compare_gt_128
968 (power10_table_128[15 + diff_dec2], CT))
969 diff_dec2--;
972 return __bid_full_round64_remainder (sign_x, final_exponent_y, CT,
973 diff_dec2, remainder_y,
974 rounding_mode, fpsc, 0);
977 // Here (exponent_x <= exponent_y)
979 diff_dec_expon = exponent_y - exponent_x;
981 if (diff_dec_expon > MAX_FORMAT_DIGITS) {
982 rmode = rounding_mode;
984 if ((sign_x ^ sign_y)) {
985 if (!CY.w[0])
986 CY.w[1]--;
987 CY.w[0]--;
988 if (__unsigned_compare_gt_128
989 (power10_table_128[15 + extra_digits], CY)) {
990 if (rmode & 3) {
991 extra_digits--;
992 final_exponent_y--;
993 } else {
994 CY.w[0] = 1000000000000000ull;
995 CY.w[1] = 0;
996 extra_digits = 0;
1000 __scale128_10 (CY, CY);
1001 extra_digits++;
1002 CY.w[0] |= 1;
1004 return __bid_simple_round64_sticky (sign_y, final_exponent_y, CY,
1005 extra_digits, rmode, fpsc);
1007 // apply sign to coeff_x
1008 sign_x ^= sign_y;
1009 sign_x = ((SINT64) sign_x) >> 63;
1010 CX.w[0] = (coefficient_x + sign_x) ^ sign_x;
1011 CX.w[1] = sign_x;
1013 // check whether CY (rounded to 16 digits) and CX have
1014 // any digits in the same position
1015 diff_dec2 = final_exponent_y - exponent_x;
1017 if (diff_dec2 <= 17) {
1018 // align CY to 10^ex
1019 S = power10_table_128[diff_dec_expon].w[0];
1020 __mul_64x128_short (CY_L, S, CY);
1022 __add_128_128 (ST, CY_L, CX);
1023 extra_digits2 = __get_dec_digits64 (ST) - 16;
1024 return __bid_full_round64 (sign_y, exponent_x, ST, extra_digits2,
1025 rounding_mode, fpsc);
1027 // truncate CY to 16 dec. digits
1028 CYh = __truncate (CY, extra_digits);
1030 // get remainder
1031 T = power10_table_128[extra_digits].w[0];
1032 __mul_64x64_to_64 (CY0L, CYh, T);
1034 coefficient_y = CY.w[0] - CY0L;
1035 // add rounding constant
1036 rmode = rounding_mode;
1037 if (sign_y && (unsigned) (rmode - 1) < 2)
1038 rmode = 3 - rmode;
1039 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1040 #ifndef IEEE_ROUND_NEAREST
1041 if (!(rmode & 3)) //ROUNDING_TO_NEAREST
1042 #endif
1043 #endif
1045 coefficient_y += round_const_table[rmode][extra_digits];
1047 // align coefficient_y, coefficient_x
1048 S = power10_table_128[diff_dec_expon].w[0];
1049 __mul_64x64_to_128 (F, coefficient_y, S);
1051 // fraction
1052 __add_128_128 (FS, F, CX);
1054 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1055 #ifndef IEEE_ROUND_NEAREST
1056 if (rmode == 0) //ROUNDING_TO_NEAREST
1057 #endif
1059 // rounding code, here RN_EVEN
1060 // 10^(extra_digits+diff_dec_expon)
1061 T2 = power10_table_128[diff_dec_expon + extra_digits];
1062 if (__unsigned_compare_gt_128 (FS, T2)
1063 || ((CYh & 1) && __test_equal_128 (FS, T2))) {
1064 CYh++;
1065 __sub_128_128 (FS, FS, T2);
1068 #endif
1069 #ifndef IEEE_ROUND_NEAREST
1070 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1071 if (rmode == 4) //ROUNDING_TO_NEAREST
1072 #endif
1074 // rounding code, here RN_AWAY
1075 // 10^(extra_digits+diff_dec_expon)
1076 T2 = power10_table_128[diff_dec_expon + extra_digits];
1077 if (__unsigned_compare_ge_128 (FS, T2)) {
1078 CYh++;
1079 __sub_128_128 (FS, FS, T2);
1082 #endif
1083 #ifndef IEEE_ROUND_NEAREST
1084 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1085 switch (rmode) {
1086 case ROUNDING_DOWN:
1087 case ROUNDING_TO_ZERO:
1088 if ((SINT64) FS.w[1] < 0) {
1089 CYh--;
1090 if (CYh < 1000000000000000ull) {
1091 CYh = 9999999999999999ull;
1092 final_exponent_y--;
1094 } else {
1095 T2 = power10_table_128[diff_dec_expon + extra_digits];
1096 if (__unsigned_compare_ge_128 (FS, T2)) {
1097 CYh++;
1098 __sub_128_128 (FS, FS, T2);
1101 break;
1102 case ROUNDING_UP:
1103 if ((SINT64) FS.w[1] < 0)
1104 break;
1105 T2 = power10_table_128[diff_dec_expon + extra_digits];
1106 if (__unsigned_compare_gt_128 (FS, T2)) {
1107 CYh += 2;
1108 __sub_128_128 (FS, FS, T2);
1109 } else if ((FS.w[1] == T2.w[1]) && (FS.w[0] == T2.w[0])) {
1110 CYh++;
1111 FS.w[1] = FS.w[0] = 0;
1112 } else if (FS.w[1] | FS.w[0])
1113 CYh++;
1114 break;
1116 #endif
1117 #endif
1119 #ifdef SET_STATUS_FLAGS
1120 status = INEXACT_EXCEPTION;
1121 #ifndef IEEE_ROUND_NEAREST
1122 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1123 if (!(rmode & 3))
1124 #endif
1125 #endif
1127 // RN modes
1128 if ((FS.w[1] ==
1129 round_const_table_128[0][diff_dec_expon + extra_digits].w[1])
1130 && (FS.w[0] ==
1131 round_const_table_128[0][diff_dec_expon +
1132 extra_digits].w[0]))
1133 status = EXACT_STATUS;
1135 #ifndef IEEE_ROUND_NEAREST
1136 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1137 else if (!FS.w[1] && !FS.w[0])
1138 status = EXACT_STATUS;
1139 #endif
1140 #endif
1142 __set_status_flags (fpsc, status);
1143 #endif
1145 return get_BID64 (sign_y, final_exponent_y, CYh, rounding_mode,
1146 fpsc);
1151 //////////////////////////////////////////////////////////////////////////
1153 // If coefficient_z is less than 16 digits long, normalize to 16 digits
1155 /////////////////////////////////////////////////////////////////////////
1156 static UINT64
1157 BID_normalize (UINT64 sign_z, int exponent_z,
1158 UINT64 coefficient_z, UINT64 round_dir, int round_flag,
1159 int rounding_mode, unsigned *fpsc) {
1160 SINT64 D;
1161 int_double tempx;
1162 int digits_z, bin_expon, scale, rmode;
1164 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1165 #ifndef IEEE_ROUND_NEAREST
1166 rmode = rounding_mode;
1167 if (sign_z && (unsigned) (rmode - 1) < 2)
1168 rmode = 3 - rmode;
1169 #else
1170 if (coefficient_z >= power10_table_128[15].w[0])
1171 return z;
1172 #endif
1173 #endif
1175 //--- get number of bits in the coefficients of x and y ---
1176 tempx.d = (double) coefficient_z;
1177 bin_expon = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
1178 // get number of decimal digits in the coeff_x
1179 digits_z = estimate_decimal_digits[bin_expon];
1180 if (coefficient_z >= power10_table_128[digits_z].w[0])
1181 digits_z++;
1183 scale = 16 - digits_z;
1184 exponent_z -= scale;
1185 if (exponent_z < 0) {
1186 scale += exponent_z;
1187 exponent_z = 0;
1189 coefficient_z *= power10_table_128[scale].w[0];
1191 #ifdef SET_STATUS_FLAGS
1192 if (round_flag) {
1193 __set_status_flags (fpsc, INEXACT_EXCEPTION);
1194 if (coefficient_z < 1000000000000000ull)
1195 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1196 else if ((coefficient_z == 1000000000000000ull) && !exponent_z
1197 && ((SINT64) (round_dir ^ sign_z) < 0) && round_flag
1198 && (rmode == ROUNDING_DOWN || rmode == ROUNDING_TO_ZERO))
1199 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1201 #endif
1203 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1204 #ifndef IEEE_ROUND_NEAREST
1205 if (round_flag && (rmode & 3)) {
1206 D = round_dir ^ sign_z;
1208 if (rmode == ROUNDING_UP) {
1209 if (D >= 0)
1210 coefficient_z++;
1211 } else {
1212 if (D < 0)
1213 coefficient_z--;
1214 if (coefficient_z < 1000000000000000ull && exponent_z) {
1215 coefficient_z = 9999999999999999ull;
1216 exponent_z--;
1220 #endif
1221 #endif
1223 return get_BID64 (sign_z, exponent_z, coefficient_z, rounding_mode,
1224 fpsc);
1228 //////////////////////////////////////////////////////////////////////////
1230 // 0*10^ey + cz*10^ez, ey<ez
1232 //////////////////////////////////////////////////////////////////////////
1234 __BID_INLINE__ UINT64
1235 add_zero64 (int exponent_y, UINT64 sign_z, int exponent_z,
1236 UINT64 coefficient_z, unsigned *prounding_mode,
1237 unsigned *fpsc) {
1238 int_double tempx;
1239 int bin_expon, scale_k, scale_cz;
1240 int diff_expon;
1242 diff_expon = exponent_z - exponent_y;
1244 tempx.d = (double) coefficient_z;
1245 bin_expon = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
1246 scale_cz = estimate_decimal_digits[bin_expon];
1247 if (coefficient_z >= power10_table_128[scale_cz].w[0])
1248 scale_cz++;
1250 scale_k = 16 - scale_cz;
1251 if (diff_expon < scale_k)
1252 scale_k = diff_expon;
1253 coefficient_z *= power10_table_128[scale_k].w[0];
1255 return get_BID64 (sign_z, exponent_z - scale_k, coefficient_z,
1256 *prounding_mode, fpsc);
1258 #endif