1 /* Copyright (C) 2007 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 2, or (at your option) any later
10 In addition to the permissions in the GNU General Public License, the
11 Free Software Foundation gives you unlimited permission to link the
12 compiled version of this file into combinations with other programs,
13 and to distribute those combinations without any restriction coming
14 from the use of this file. (The General Public License restrictions
15 do apply in other respects; for example, they cover modification of
16 the file, and distribution when not linked into a combine
19 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
20 WARRANTY; without even the implied warranty of MERCHANTABILITY or
21 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
24 You should have received a copy of the GNU General Public License
25 along with GCC; see the file COPYING. If not, write to the Free
26 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
33 #include "bid_functions.h"
35 #define __BID_INLINE__ static __inline
37 /*********************************************************************
39 * Logical Shift Macros
41 *********************************************************************/
43 #define __shr_128(Q, A, k) \
45 (Q).w[0] = (A).w[0] >> k; \
46 (Q).w[0] |= (A).w[1] << (64-k); \
47 (Q).w[1] = (A).w[1] >> k; \
50 #define __shr_128_long(Q, A, k) \
53 (Q).w[0] = (A).w[0] >> k; \
54 (Q).w[0] |= (A).w[1] << (64-k); \
55 (Q).w[1] = (A).w[1] >> k; \
58 (Q).w[0] = (A).w[1]>>((k)-64); \
63 #define __shl_128_long(Q, A, k) \
66 (Q).w[1] = (A).w[1] << k; \
67 (Q).w[1] |= (A).w[0] >> (64-k); \
68 (Q).w[0] = (A).w[0] << k; \
71 (Q).w[1] = (A).w[0]<<((k)-64); \
76 #define __low_64(Q) (Q).w[0]
77 /*********************************************************************
81 *********************************************************************/
82 #define tolower_macro(x) (((unsigned char)((x)-'A')<=('Z'-'A'))?((x)-'A'+'a'):(x))
83 /*********************************************************************
87 *********************************************************************/
91 #define __unsigned_compare_gt_128(A, B) \
92 ((A.w[1]>B.w[1]) || ((A.w[1]==B.w[1]) && (A.w[0]>B.w[0])))
94 #define __unsigned_compare_ge_128(A, B) \
95 ((A.w[1]>B.w[1]) || ((A.w[1]==B.w[1]) && (A.w[0]>=B.w[0])))
96 #define __test_equal_128(A, B) (((A).w[1]==(B).w[1]) && ((A).w[0]==(B).w[0]))
97 // tighten exponent range
98 #define __tight_bin_range_128(bp, P, bin_expon) \
102 (bp) = (bin_expon); \
105 if((P).w[0] >= M) (bp)++; } \
108 if(((P).w[1]>M) ||((P).w[1]==M && (P).w[0]))\
110 else if((P).w[1]) (bp)++; \
112 /*********************************************************************
114 * Add/Subtract Macros
116 *********************************************************************/
117 // add 64-bit value to 128-bit
118 #define __add_128_64(R128, A128, B64) \
121 R64H = (A128).w[1]; \
122 (R128).w[0] = (B64) + (A128).w[0]; \
123 if((R128).w[0] < (B64)) \
125 (R128).w[1] = R64H; \
127 // subtract 64-bit value from 128-bit
128 #define __sub_128_64(R128, A128, B64) \
131 R64H = (A128).w[1]; \
132 if((A128).w[0] < (B64)) \
134 (R128).w[1] = R64H; \
135 (R128).w[0] = (A128).w[0] - (B64); \
137 // add 128-bit value to 128-bit
138 // assume no carry-out
139 #define __add_128_128(R128, A128, B128) \
142 Q128.w[1] = (A128).w[1]+(B128).w[1]; \
143 Q128.w[0] = (B128).w[0] + (A128).w[0]; \
144 if(Q128.w[0] < (B128).w[0]) \
146 (R128).w[1] = Q128.w[1]; \
147 (R128).w[0] = Q128.w[0]; \
149 #define __sub_128_128(R128, A128, B128) \
152 Q128.w[1] = (A128).w[1]-(B128).w[1]; \
153 Q128.w[0] = (A128).w[0] - (B128).w[0]; \
154 if((A128).w[0] < (B128).w[0]) \
156 (R128).w[1] = Q128.w[1]; \
157 (R128).w[0] = Q128.w[0]; \
159 #define __add_carry_out(S, CY, X, Y) \
163 CY = (S<X1) ? 1 : 0; \
165 #define __add_carry_in_out(S, CY, X, Y, CI) \
170 CY = ((S<X1) || (X1<CI)) ? 1 : 0; \
172 #define __sub_borrow_out(S, CY, X, Y) \
176 CY = (S>X1) ? 1 : 0; \
178 #define __sub_borrow_in_out(S, CY, X, Y, CI) \
183 CY = ((S>X1) || (X1>X0)) ? 1 : 0; \
185 // increment C128 and check for rounding overflow:
186 // if (C_128) = 10^34 then (C_128) = 10^33 and increment the exponent
187 #define INCREMENT(C_128, exp) \
190 if (C_128.w[0] == 0) C_128.w[1]++; \
191 if (C_128.w[1] == 0x0001ed09bead87c0ull && \
192 C_128.w[0] == 0x378d8e6400000000ull) { \
194 C_128.w[1] = 0x0000314dc6448d93ull; \
195 C_128.w[0] = 0x38c15b0a00000000ull; \
198 // decrement C128 and check for rounding underflow, but only at the
199 // boundary: if C_128 = 10^33 - 1 and exp > 0 then C_128 = 10^34 - 1
200 // and decrement the exponent
201 #define DECREMENT(C_128, exp) \
204 if (C_128.w[0] == 0xffffffffffffffffull) C_128.w[1]--; \
205 if (C_128.w[1] == 0x0000314dc6448d93ull && \
206 C_128.w[0] == 0x38c15b09ffffffffull && exp > 0) { \
208 C_128.w[1] = 0x0001ed09bead87c0ull; \
209 C_128.w[0] = 0x378d8e63ffffffffull; \
213 /*********************************************************************
217 *********************************************************************/
218 #define __mul_64x64_to_64(P64, CX, CY) (P64) = (CX) * (CY)
219 /***************************************
220 * Signed, Full 64x64-bit Multiply
221 ***************************************/
222 #define __imul_64x64_to_128(P, CX, CY) \
225 __mul_64x64_to_128(P, CX, CY); \
227 SX = ((SINT64)(CX))>>63; \
228 SY = ((SINT64)(CY))>>63; \
229 SX &= CY; SY &= CX; \
231 (P).w[1] = (P).w[1] - SX - SY; \
233 /***************************************
234 * Signed, Full 64x128-bit Multiply
235 ***************************************/
236 #define __imul_64x128_full(Ph, Ql, A, B) \
238 UINT128 ALBL, ALBH, QM2, QM; \
240 __imul_64x64_to_128(ALBH, (A), (B).w[1]); \
241 __imul_64x64_to_128(ALBL, (A), (B).w[0]); \
243 (Ql).w[0] = ALBL.w[0]; \
244 QM.w[0] = ALBL.w[1]; \
245 QM.w[1] = ((SINT64)ALBL.w[1])>>63; \
246 __add_128_128(QM2, ALBH, QM); \
247 (Ql).w[1] = QM2.w[0]; \
250 /*****************************************************
251 * Unsigned Multiply Macros
252 *****************************************************/
253 // get full 64x64bit product
255 #define __mul_64x64_to_128(P, CX, CY) \
257 UINT64 CXH, CXL, CYH,CYL,PL,PH,PM,PM2;\
259 CXL = (UINT32)(CX); \
261 CYL = (UINT32)(CY); \
268 PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
270 (P).w[1] = PH + (PM>>32); \
271 (P).w[0] = (PM<<32)+(UINT32)PL; \
273 // get full 64x64bit product
275 // This macro is used for CX < 2^61, CY < 2^61
277 #define __mul_64x64_to_128_fast(P, CX, CY) \
279 UINT64 CXH, CXL, CYH, CYL, PL, PH, PM; \
281 CXL = (UINT32)(CX); \
283 CYL = (UINT32)(CY); \
291 (P).w[1] = PH + (PM>>32); \
292 (P).w[0] = (PM<<32)+(UINT32)PL; \
295 #define __sqr64_fast(P, CX) \
297 UINT64 CXH, CXL, PL, PH, PM; \
299 CXL = (UINT32)(CX); \
307 (P).w[1] = PH + (PM>>32); \
308 (P).w[0] = (PM<<32)+(UINT32)PL; \
310 // get full 64x64bit product
312 // This implementation is used for CX < 2^61, CY < 2^61
314 #define __mul_64x64_to_64_high_fast(P, CX, CY) \
316 UINT64 CXH, CXL, CYH, CYL, PL, PH, PM; \
318 CXL = (UINT32)(CX); \
320 CYL = (UINT32)(CY); \
328 (P) = PH + (PM>>32); \
330 // get full 64x64bit product
332 #define __mul_64x64_to_128_full(P, CX, CY) \
334 UINT64 CXH, CXL, CYH,CYL,PL,PH,PM,PM2;\
336 CXL = (UINT32)(CX); \
338 CYL = (UINT32)(CY); \
345 PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
347 (P).w[1] = PH + (PM>>32); \
348 (P).w[0] = (PM<<32)+(UINT32)PL; \
350 #define __mul_128x128_high(Q, A, B) \
352 UINT128 ALBL, ALBH, AHBL, AHBH, QM, QM2; \
354 __mul_64x64_to_128(ALBH, (A).w[0], (B).w[1]); \
355 __mul_64x64_to_128(AHBL, (B).w[0], (A).w[1]); \
356 __mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]); \
357 __mul_64x64_to_128(AHBH, (A).w[1],(B).w[1]); \
359 __add_128_128(QM, ALBH, AHBL); \
360 __add_128_64(QM2, QM, ALBL.w[1]); \
361 __add_128_64((Q), AHBH, QM2.w[1]); \
363 #define __mul_128x128_full(Qh, Ql, A, B) \
365 UINT128 ALBL, ALBH, AHBL, AHBH, QM, QM2; \
367 __mul_64x64_to_128(ALBH, (A).w[0], (B).w[1]); \
368 __mul_64x64_to_128(AHBL, (B).w[0], (A).w[1]); \
369 __mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]); \
370 __mul_64x64_to_128(AHBH, (A).w[1],(B).w[1]); \
372 __add_128_128(QM, ALBH, AHBL); \
373 (Ql).w[0] = ALBL.w[0]; \
374 __add_128_64(QM2, QM, ALBL.w[1]); \
375 __add_128_64((Qh), AHBH, QM2.w[1]); \
376 (Ql).w[1] = QM2.w[0]; \
378 #define __mul_128x128_low(Ql, A, B) \
383 __mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]); \
384 QM64 = (B).w[0]*(A).w[1] + (A).w[0]*(B).w[1]; \
386 (Ql).w[0] = ALBL.w[0]; \
387 (Ql).w[1] = QM64 + ALBL.w[1]; \
389 #define __mul_64x128_low(Ql, A, B) \
391 UINT128 ALBL, ALBH, QM2; \
392 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
393 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
394 (Ql).w[0] = ALBL.w[0]; \
395 __add_128_64(QM2, ALBH, ALBL.w[1]); \
396 (Ql).w[1] = QM2.w[0]; \
398 #define __mul_64x128_full(Ph, Ql, A, B) \
400 UINT128 ALBL, ALBH, QM2; \
402 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
403 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
405 (Ql).w[0] = ALBL.w[0]; \
406 __add_128_64(QM2, ALBH, ALBL.w[1]); \
407 (Ql).w[1] = QM2.w[0]; \
410 #define __mul_64x128_to_192(Q, A, B) \
412 UINT128 ALBL, ALBH, QM2; \
414 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
415 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
417 (Q).w[0] = ALBL.w[0]; \
418 __add_128_64(QM2, ALBH, ALBL.w[1]); \
419 (Q).w[1] = QM2.w[0]; \
420 (Q).w[2] = QM2.w[1]; \
422 #define __mul_64x128_to192(Q, A, B) \
424 UINT128 ALBL, ALBH, QM2; \
426 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
427 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
429 (Q).w[0] = ALBL.w[0]; \
430 __add_128_64(QM2, ALBH, ALBL.w[1]); \
431 (Q).w[1] = QM2.w[0]; \
432 (Q).w[2] = QM2.w[1]; \
434 #define __mul_128x128_to_256(P256, A, B) \
437 UINT64 Phl, Phh, CY1, CY2; \
439 __mul_64x128_full(Phl, Qll, A.w[0], B); \
440 __mul_64x128_full(Phh, Qlh, A.w[1], B); \
441 (P256).w[0] = Qll.w[0]; \
442 __add_carry_out((P256).w[1],CY1, Qlh.w[0], Qll.w[1]); \
443 __add_carry_in_out((P256).w[2],CY2, Qlh.w[1], Phl, CY1); \
444 (P256).w[3] = Phh + CY2; \
447 // For better performance, will check A.w[1] against 0,
449 // Use this macro accordingly
450 #define __mul_128x128_to_256_check_A(P256, A, B) \
453 UINT64 Phl, Phh, CY1, CY2; \
455 __mul_64x128_full(Phl, Qll, A.w[0], B); \
456 (P256).w[0] = Qll.w[0]; \
458 __mul_64x128_full(Phh, Qlh, A.w[1], B); \
459 __add_carry_out((P256).w[1],CY1, Qlh.w[0], Qll.w[1]); \
460 __add_carry_in_out((P256).w[2],CY2, Qlh.w[1], Phl, CY1); \
461 (P256).w[3] = Phh + CY2; } \
463 (P256).w[1] = Qll.w[1]; \
467 #define __mul_64x192_to_256(lP, lA, lB) \
469 UINT128 lP0,lP1,lP2; \
471 __mul_64x64_to_128(lP0, lA, (lB).w[0]); \
472 __mul_64x64_to_128(lP1, lA, (lB).w[1]); \
473 __mul_64x64_to_128(lP2, lA, (lB).w[2]); \
474 (lP).w[0] = lP0.w[0]; \
475 __add_carry_out((lP).w[1],lC,lP1.w[0],lP0.w[1]); \
476 __add_carry_in_out((lP).w[2],lC,lP2.w[0],lP1.w[1],lC); \
477 (lP).w[3] = lP2.w[1] + lC; \
479 #define __mul_64x256_to_320(P, A, B) \
481 UINT128 lP0,lP1,lP2,lP3; \
483 __mul_64x64_to_128(lP0, A, (B).w[0]); \
484 __mul_64x64_to_128(lP1, A, (B).w[1]); \
485 __mul_64x64_to_128(lP2, A, (B).w[2]); \
486 __mul_64x64_to_128(lP3, A, (B).w[3]); \
487 (P).w[0] = lP0.w[0]; \
488 __add_carry_out((P).w[1],lC,lP1.w[0],lP0.w[1]); \
489 __add_carry_in_out((P).w[2],lC,lP2.w[0],lP1.w[1],lC); \
490 __add_carry_in_out((P).w[3],lC,lP3.w[0],lP2.w[1],lC); \
491 (P).w[4] = lP3.w[1] + lC; \
493 #define __mul_192x192_to_384(P, A, B) \
497 __mul_64x192_to_256(P0, (A).w[0], B); \
498 __mul_64x192_to_256(P1, (A).w[1], B); \
499 __mul_64x192_to_256(P2, (A).w[2], B); \
500 (P).w[0] = P0.w[0]; \
501 __add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]); \
502 __add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
503 __add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
504 (P).w[4] = P1.w[3] + CY; \
505 __add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]); \
506 __add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY); \
507 __add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY); \
508 (P).w[5] = P2.w[3] + CY; \
510 #define __mul_64x320_to_384(P, A, B) \
512 UINT128 lP0,lP1,lP2,lP3,lP4; \
514 __mul_64x64_to_128(lP0, A, (B).w[0]); \
515 __mul_64x64_to_128(lP1, A, (B).w[1]); \
516 __mul_64x64_to_128(lP2, A, (B).w[2]); \
517 __mul_64x64_to_128(lP3, A, (B).w[3]); \
518 __mul_64x64_to_128(lP4, A, (B).w[4]); \
519 (P).w[0] = lP0.w[0]; \
520 __add_carry_out((P).w[1],lC,lP1.w[0],lP0.w[1]); \
521 __add_carry_in_out((P).w[2],lC,lP2.w[0],lP1.w[1],lC); \
522 __add_carry_in_out((P).w[3],lC,lP3.w[0],lP2.w[1],lC); \
523 __add_carry_in_out((P).w[4],lC,lP4.w[0],lP3.w[1],lC); \
524 (P).w[5] = lP4.w[1] + lC; \
527 // Full 128x128-bit product
528 #define __sqr128_to_256(P256, A) \
530 UINT128 Qll, Qlh, Qhh; \
531 UINT64 TMP_C1, TMP_C2; \
533 __mul_64x64_to_128(Qhh, A.w[1], A.w[1]); \
534 __mul_64x64_to_128(Qlh, A.w[0], A.w[1]); \
535 Qhh.w[1] += (Qlh.w[1]>>63); \
536 Qlh.w[1] = (Qlh.w[1]+Qlh.w[1])|(Qlh.w[0]>>63); \
537 Qlh.w[0] += Qlh.w[0]; \
538 __mul_64x64_to_128(Qll, A.w[0], A.w[0]); \
540 __add_carry_out((P256).w[1],TMP_C1, Qlh.w[0], Qll.w[1]); \
541 (P256).w[0] = Qll.w[0]; \
542 __add_carry_in_out((P256).w[2],TMP_C2, Qlh.w[1], Qhh.w[0], TMP_C1); \
543 (P256).w[3] = Qhh.w[1]+TMP_C2; \
545 #define __mul_128x128_to_256_low_high(PQh, PQl, A, B) \
548 UINT64 Phl, Phh, C1, C2; \
550 __mul_64x128_full(Phl, Qll, A.w[0], B); \
551 __mul_64x128_full(Phh, Qlh, A.w[1], B); \
552 (PQl).w[0] = Qll.w[0]; \
553 __add_carry_out((PQl).w[1],C1, Qlh.w[0], Qll.w[1]); \
554 __add_carry_in_out((PQh).w[0],C2, Qlh.w[1], Phl, C1); \
555 (PQh).w[1] = Phh + C2; \
557 #define __mul_256x256_to_512(P, A, B) \
559 UINT512 P0,P1,P2,P3; \
561 __mul_64x256_to_320(P0, (A).w[0], B); \
562 __mul_64x256_to_320(P1, (A).w[1], B); \
563 __mul_64x256_to_320(P2, (A).w[2], B); \
564 __mul_64x256_to_320(P3, (A).w[3], B); \
565 (P).w[0] = P0.w[0]; \
566 __add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]); \
567 __add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
568 __add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
569 __add_carry_in_out((P).w[4],CY,P1.w[3],P0.w[4],CY); \
570 (P).w[5] = P1.w[4] + CY; \
571 __add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]); \
572 __add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY); \
573 __add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY); \
574 __add_carry_in_out((P).w[5],CY,P2.w[3],(P).w[5],CY); \
575 (P).w[6] = P2.w[4] + CY; \
576 __add_carry_out((P).w[3],CY,P3.w[0],(P).w[3]); \
577 __add_carry_in_out((P).w[4],CY,P3.w[1],(P).w[4],CY); \
578 __add_carry_in_out((P).w[5],CY,P3.w[2],(P).w[5],CY); \
579 __add_carry_in_out((P).w[6],CY,P3.w[3],(P).w[6],CY); \
580 (P).w[7] = P3.w[4] + CY; \
582 #define __mul_192x256_to_448(P, A, B) \
586 __mul_64x256_to_320(P0, (A).w[0], B); \
587 __mul_64x256_to_320(P1, (A).w[1], B); \
588 __mul_64x256_to_320(P2, (A).w[2], B); \
589 (P).w[0] = P0.w[0]; \
590 __add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]); \
591 __add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
592 __add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
593 __add_carry_in_out((P).w[4],CY,P1.w[3],P0.w[4],CY); \
594 (P).w[5] = P1.w[4] + CY; \
595 __add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]); \
596 __add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY); \
597 __add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY); \
598 __add_carry_in_out((P).w[5],CY,P2.w[3],(P).w[5],CY); \
599 (P).w[6] = P2.w[4] + CY; \
601 #define __mul_320x320_to_640(P, A, B) \
603 UINT512 P0,P1,P2,P3; \
605 __mul_256x256_to_512((P), (A), B); \
606 __mul_64x256_to_320(P1, (A).w[4], B); \
607 __mul_64x256_to_320(P2, (B).w[4], A); \
608 __mul_64x64_to_128(P3, (A).w[4], (B).w[4]); \
609 __add_carry_out((P0).w[0],CY,P1.w[0],P2.w[0]); \
610 __add_carry_in_out((P0).w[1],CY,P1.w[1],P2.w[1],CY); \
611 __add_carry_in_out((P0).w[2],CY,P1.w[2],P2.w[2],CY); \
612 __add_carry_in_out((P0).w[3],CY,P1.w[3],P2.w[3],CY); \
613 __add_carry_in_out((P0).w[4],CY,P1.w[4],P2.w[4],CY); \
615 __add_carry_out((P).w[4],CY,(P).w[4],P0.w[0]); \
616 __add_carry_in_out((P).w[5],CY,(P).w[5],P0.w[1],CY); \
617 __add_carry_in_out((P).w[6],CY,(P).w[6],P0.w[2],CY); \
618 __add_carry_in_out((P).w[7],CY,(P).w[7],P0.w[3],CY); \
619 __add_carry_in_out((P).w[8],CY,P3.w[0],P0.w[4],CY); \
620 (P).w[9] = P3.w[1] + CY; \
622 #define __mul_384x384_to_768(P, A, B) \
624 UINT512 P0,P1,P2,P3; \
626 __mul_320x320_to_640((P), (A), B); \
627 __mul_64x320_to_384(P1, (A).w[5], B); \
628 __mul_64x320_to_384(P2, (B).w[5], A); \
629 __mul_64x64_to_128(P3, (A).w[5], (B).w[5]); \
630 __add_carry_out((P0).w[0],CY,P1.w[0],P2.w[0]); \
631 __add_carry_in_out((P0).w[1],CY,P1.w[1],P2.w[1],CY); \
632 __add_carry_in_out((P0).w[2],CY,P1.w[2],P2.w[2],CY); \
633 __add_carry_in_out((P0).w[3],CY,P1.w[3],P2.w[3],CY); \
634 __add_carry_in_out((P0).w[4],CY,P1.w[4],P2.w[4],CY); \
635 __add_carry_in_out((P0).w[5],CY,P1.w[5],P2.w[5],CY); \
637 __add_carry_out((P).w[5],CY,(P).w[5],P0.w[0]); \
638 __add_carry_in_out((P).w[6],CY,(P).w[6],P0.w[1],CY); \
639 __add_carry_in_out((P).w[7],CY,(P).w[7],P0.w[2],CY); \
640 __add_carry_in_out((P).w[8],CY,(P).w[8],P0.w[3],CY); \
641 __add_carry_in_out((P).w[9],CY,(P).w[9],P0.w[4],CY); \
642 __add_carry_in_out((P).w[10],CY,P3.w[0],P0.w[5],CY); \
643 (P).w[11] = P3.w[1] + CY; \
645 #define __mul_64x128_short(Ql, A, B) \
649 __mul_64x64_to_64(ALBH_L, (A),(B).w[1]); \
650 __mul_64x64_to_128((Ql), (A), (B).w[0]); \
652 (Ql).w[1] += ALBH_L; \
654 #define __scale128_10(D,_TMP) \
656 UINT128 _TMP2,_TMP8; \
657 _TMP2.w[1] = (_TMP.w[1]<<1)|(_TMP.w[0]>>63); \
658 _TMP2.w[0] = _TMP.w[0]<<1; \
659 _TMP8.w[1] = (_TMP.w[1]<<3)|(_TMP.w[0]>>61); \
660 _TMP8.w[0] = _TMP.w[0]<<3; \
661 __add_128_128(D, _TMP2, _TMP8); \
664 #define __mul_64x64_to_128MACH(P128, CX64, CY64) \
666 UINT64 CXH,CXL,CYH,CYL,PL,PH,PM,PM2; \
667 CXH = (CX64) >> 32; \
668 CXL = (UINT32)(CX64); \
669 CYH = (CY64) >> 32; \
670 CYL = (UINT32)(CY64); \
676 PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
677 (P128).w[1] = PH + (PM>>32); \
678 (P128).w[0] = (PM<<32)+(UINT32)PL; \
681 #define __mul_64x64_to_128HIGH(P64, CX64, CY64) \
683 UINT64 CXH,CXL,CYH,CYL,PL,PH,PM,PM2; \
684 CXH = (CX64) >> 32; \
685 CXL = (UINT32)(CX64); \
686 CYH = (CY64) >> 32; \
687 CYL = (UINT32)(CY64); \
693 PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
694 P64 = PH + (PM>>32); \
696 #define __mul_128x64_to_128(Q128, A64, B128) \
699 ALBH_L = (A64) * (B128).w[1]; \
700 __mul_64x64_to_128MACH((Q128), (A64), (B128).w[0]); \
701 (Q128).w[1] += ALBH_L; \
703 // might simplify by calculating just QM2.w[0]
704 #define __mul_64x128_to_128(Ql, A, B) \
706 UINT128 ALBL, ALBH, QM2; \
707 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
708 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
709 (Ql).w[0] = ALBL.w[0]; \
710 __add_128_64(QM2, ALBH, ALBL.w[1]); \
711 (Ql).w[1] = QM2.w[0]; \
713 /*********************************************************************
715 * BID Pack/Unpack Macros
717 *********************************************************************/
718 /////////////////////////////////////////
720 ////////////////////////////////////////
721 #define DECIMAL_MAX_EXPON_64 767
722 #define DECIMAL_EXPONENT_BIAS 398
723 #define MAX_FORMAT_DIGITS 16
724 /////////////////////////////////////////
725 // BID128 definitions
726 ////////////////////////////////////////
727 #define DECIMAL_MAX_EXPON_128 12287
728 #define DECIMAL_EXPONENT_BIAS_128 6176
729 #define MAX_FORMAT_DIGITS_128 34
730 /////////////////////////////////////////
732 ////////////////////////////////////////
733 #define DECIMAL_MAX_EXPON_32 191
734 #define DECIMAL_EXPONENT_BIAS_32 101
735 #define MAX_FORMAT_DIGITS_32 7
736 ////////////////////////////////////////
737 // Constant Definitions
738 ///////////////////////////////////////
739 #define SPECIAL_ENCODING_MASK64 0x6000000000000000ull
740 #define INFINITY_MASK64 0x7800000000000000ull
741 #define SINFINITY_MASK64 0xf800000000000000ull
742 #define SSNAN_MASK64 0xfc00000000000000ull
743 #define NAN_MASK64 0x7c00000000000000ull
744 #define SNAN_MASK64 0x7e00000000000000ull
745 #define QUIET_MASK64 0xfdffffffffffffffull
746 #define LARGE_COEFF_MASK64 0x0007ffffffffffffull
747 #define LARGE_COEFF_HIGH_BIT64 0x0020000000000000ull
748 #define SMALL_COEFF_MASK64 0x001fffffffffffffull
749 #define EXPONENT_MASK64 0x3ff
750 #define EXPONENT_SHIFT_LARGE64 51
751 #define EXPONENT_SHIFT_SMALL64 53
752 #define LARGEST_BID64 0x77fb86f26fc0ffffull
753 #define SMALLEST_BID64 0xf7fb86f26fc0ffffull
754 #define SMALL_COEFF_MASK128 0x0001ffffffffffffull
755 #define LARGE_COEFF_MASK128 0x00007fffffffffffull
756 #define EXPONENT_MASK128 0x3fff
757 #define LARGEST_BID128_HIGH 0x5fffed09bead87c0ull
758 #define LARGEST_BID128_LOW 0x378d8e63ffffffffull
759 #define SPECIAL_ENCODING_MASK32 0x60000000ul
760 #define INFINITY_MASK32 0x78000000ul
761 #define LARGE_COEFF_MASK32 0x007ffffful
762 #define LARGE_COEFF_HIGH_BIT32 0x00800000ul
763 #define SMALL_COEFF_MASK32 0x001ffffful
764 #define EXPONENT_MASK32 0xff
765 #define LARGEST_BID32 0x77f8967f
766 #define NAN_MASK32 0x7c000000
767 #define SNAN_MASK32 0x7e000000
768 #define MASK_BINARY_EXPONENT 0x7ff0000000000000ull
769 #define BINARY_EXPONENT_BIAS 0x3ff
770 #define UPPER_EXPON_LIMIT 51
771 // data needed for BID pack/unpack macros
772 extern UINT64 round_const_table
[][19];
773 extern UINT128 reciprocals10_128
[];
774 extern int recip_scale
[];
775 extern UINT128 power10_table_128
[];
776 extern int estimate_decimal_digits
[];
777 extern int estimate_bin_expon
[];
778 extern UINT64 power10_index_binexp
[];
779 extern int short_recip_scale
[];
780 extern UINT64 reciprocals10_64
[];
781 extern UINT128 power10_index_binexp_128
[];
782 extern UINT128 round_const_table_128
[][36];
785 //////////////////////////////////////////////
786 // Status Flag Handling
787 /////////////////////////////////////////////
788 #define __set_status_flags(fpsc, status) *(fpsc) |= status
789 #define is_inexact(fpsc) ((*(fpsc))&INEXACT_EXCEPTION)
791 __BID_INLINE__ UINT64
792 unpack_BID64 (UINT64
* psign_x
, int *pexponent_x
,
793 UINT64
* pcoefficient_x
, UINT64 x
) {
796 *psign_x
= x
& 0x8000000000000000ull
;
798 if ((x
& SPECIAL_ENCODING_MASK64
) == SPECIAL_ENCODING_MASK64
) {
801 coeff
= (x
& LARGE_COEFF_MASK64
) | LARGE_COEFF_HIGH_BIT64
;
803 if ((x
& INFINITY_MASK64
) == INFINITY_MASK64
) {
805 *pcoefficient_x
= x
& 0xfe03ffffffffffffull
;
806 if ((x
& 0x0003ffffffffffffull
) >= 1000000000000000ull)
807 *pcoefficient_x
= x
& 0xfe00000000000000ull
;
808 if ((x
& NAN_MASK64
) == INFINITY_MASK64
)
809 *pcoefficient_x
= x
& SINFINITY_MASK64
;
810 return 0; // NaN or Infinity
812 // check for non-canonical values
813 if (coeff
>= 10000000000000000ull)
815 *pcoefficient_x
= coeff
;
817 tmp
= x
>> EXPONENT_SHIFT_LARGE64
;
818 *pexponent_x
= (int) (tmp
& EXPONENT_MASK64
);
822 tmp
= x
>> EXPONENT_SHIFT_SMALL64
;
823 *pexponent_x
= (int) (tmp
& EXPONENT_MASK64
);
825 *pcoefficient_x
= (x
& SMALL_COEFF_MASK64
);
827 return *pcoefficient_x
;
831 // BID64 pack macro (general form)
833 __BID_INLINE__ UINT64
834 get_BID64 (UINT64 sgn
, int expon
, UINT64 coeff
, int rmode
,
836 UINT128 Stemp
, Q_low
;
837 UINT64 QH
, r
, mask
, C64
, remainder_h
, CY
, carry
;
838 int extra_digits
, amount
, amount2
;
841 if (coeff
> 9999999999999999ull) {
843 coeff
= 1000000000000000ull;
845 // check for possible underflow/overflow
846 if (((unsigned) expon
) >= 3 * 256) {
849 if (expon
+ MAX_FORMAT_DIGITS
< 0) {
850 #ifdef SET_STATUS_FLAGS
851 __set_status_flags (fpsc
,
852 UNDERFLOW_EXCEPTION
| INEXACT_EXCEPTION
);
854 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
855 #ifndef IEEE_ROUND_NEAREST
856 if (rmode
== ROUNDING_DOWN
&& sgn
)
857 return 0x8000000000000001ull
;
858 if (rmode
== ROUNDING_UP
&& !sgn
)
865 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
866 #ifndef IEEE_ROUND_NEAREST
867 if (sgn
&& (unsigned) (rmode
- 1) < 2)
871 // get digits to be shifted out
872 extra_digits
= -expon
;
873 coeff
+= round_const_table
[rmode
][extra_digits
];
875 // get coeff*(2^M[extra_digits])/10^extra_digits
876 __mul_64x128_full (QH
, Q_low
, coeff
,
877 reciprocals10_128
[extra_digits
]);
879 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
880 amount
= recip_scale
[extra_digits
];
884 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
885 #ifndef IEEE_ROUND_NEAREST
886 if (rmode
== 0) //ROUNDING_TO_NEAREST
889 // check whether fractional part of initial_P/10^extra_digits is exactly .5
892 amount2
= 64 - amount
;
895 remainder_h
>>= amount2
;
896 remainder_h
= remainder_h
& QH
;
899 && (Q_low
.w
[1] < reciprocals10_128
[extra_digits
].w
[1]
900 || (Q_low
.w
[1] == reciprocals10_128
[extra_digits
].w
[1]
902 reciprocals10_128
[extra_digits
].w
[0]))) {
908 #ifdef SET_STATUS_FLAGS
910 if (is_inexact (fpsc
))
911 __set_status_flags (fpsc
, UNDERFLOW_EXCEPTION
);
913 status
= INEXACT_EXCEPTION
;
915 remainder_h
= QH
<< (64 - amount
);
918 case ROUNDING_TO_NEAREST
:
919 case ROUNDING_TIES_AWAY
:
920 // test whether fractional part is 0
921 if (remainder_h
== 0x8000000000000000ull
922 && (Q_low
.w
[1] < reciprocals10_128
[extra_digits
].w
[1]
923 || (Q_low
.w
[1] == reciprocals10_128
[extra_digits
].w
[1]
925 reciprocals10_128
[extra_digits
].w
[0])))
926 status
= EXACT_STATUS
;
929 case ROUNDING_TO_ZERO
:
931 && (Q_low
.w
[1] < reciprocals10_128
[extra_digits
].w
[1]
932 || (Q_low
.w
[1] == reciprocals10_128
[extra_digits
].w
[1]
934 reciprocals10_128
[extra_digits
].w
[0])))
935 status
= EXACT_STATUS
;
939 __add_carry_out (Stemp
.w
[0], CY
, Q_low
.w
[0],
940 reciprocals10_128
[extra_digits
].w
[0]);
941 __add_carry_in_out (Stemp
.w
[1], carry
, Q_low
.w
[1],
942 reciprocals10_128
[extra_digits
].w
[1], CY
);
943 if ((remainder_h
>> (64 - amount
)) + carry
>=
944 (((UINT64
) 1) << amount
))
945 status
= EXACT_STATUS
;
948 if (status
!= EXACT_STATUS
)
949 __set_status_flags (fpsc
, UNDERFLOW_EXCEPTION
| status
);
956 while (coeff
< 1000000000000000ull && expon
>= 3 * 256) {
958 coeff
= (coeff
<< 3) + (coeff
<< 1);
960 if (expon
> DECIMAL_MAX_EXPON_64
) {
961 #ifdef SET_STATUS_FLAGS
962 __set_status_flags (fpsc
, OVERFLOW_EXCEPTION
| INEXACT_EXCEPTION
);
965 r
= sgn
| INFINITY_MASK64
;
971 case ROUNDING_TO_ZERO
:
972 r
= sgn
| LARGEST_BID64
;
984 mask
<<= EXPONENT_SHIFT_SMALL64
;
986 // check whether coefficient fits in 10*5+3 bits
989 r
<<= EXPONENT_SHIFT_SMALL64
;
995 // eliminate the case coeff==10^16 after rounding
996 if (coeff
== 10000000000000000ull) {
998 r
<<= EXPONENT_SHIFT_SMALL64
;
999 r
|= (1000000000000000ull | sgn
);
1004 r
<<= EXPONENT_SHIFT_LARGE64
;
1005 r
|= (sgn
| SPECIAL_ENCODING_MASK64
);
1006 // add coeff, without leading bits
1007 mask
= (mask
>> 2) - 1;
1018 // No overflow/underflow checking
1020 __BID_INLINE__ UINT64
1021 fast_get_BID64 (UINT64 sgn
, int expon
, UINT64 coeff
) {
1025 mask
<<= EXPONENT_SHIFT_SMALL64
;
1027 // check whether coefficient fits in 10*5+3 bits
1030 r
<<= EXPONENT_SHIFT_SMALL64
;
1036 // eliminate the case coeff==10^16 after rounding
1037 if (coeff
== 10000000000000000ull) {
1039 r
<<= EXPONENT_SHIFT_SMALL64
;
1040 r
|= (1000000000000000ull | sgn
);
1045 r
<<= EXPONENT_SHIFT_LARGE64
;
1046 r
|= (sgn
| SPECIAL_ENCODING_MASK64
);
1047 // add coeff, without leading bits
1048 mask
= (mask
>> 2) - 1;
1057 // no underflow checking
1059 __BID_INLINE__ UINT64
1060 fast_get_BID64_check_OF (UINT64 sgn
, int expon
, UINT64 coeff
, int rmode
,
1064 if (((unsigned) expon
) >= 3 * 256 - 1) {
1065 if ((expon
== 3 * 256 - 1) && coeff
== 10000000000000000ull) {
1067 coeff
= 1000000000000000ull;
1070 if (((unsigned) expon
) >= 3 * 256) {
1071 while (coeff
< 1000000000000000ull && expon
>= 3 * 256) {
1073 coeff
= (coeff
<< 3) + (coeff
<< 1);
1075 if (expon
> DECIMAL_MAX_EXPON_64
) {
1076 #ifdef SET_STATUS_FLAGS
1077 __set_status_flags (fpsc
,
1078 OVERFLOW_EXCEPTION
| INEXACT_EXCEPTION
);
1081 r
= sgn
| INFINITY_MASK64
;
1087 case ROUNDING_TO_ZERO
:
1088 r
= sgn
| LARGEST_BID64
;
1101 mask
<<= EXPONENT_SHIFT_SMALL64
;
1103 // check whether coefficient fits in 10*5+3 bits
1106 r
<<= EXPONENT_SHIFT_SMALL64
;
1112 // eliminate the case coeff==10^16 after rounding
1113 if (coeff
== 10000000000000000ull) {
1115 r
<<= EXPONENT_SHIFT_SMALL64
;
1116 r
|= (1000000000000000ull | sgn
);
1121 r
<<= EXPONENT_SHIFT_LARGE64
;
1122 r
|= (sgn
| SPECIAL_ENCODING_MASK64
);
1123 // add coeff, without leading bits
1124 mask
= (mask
>> 2) - 1;
1133 // No overflow/underflow checking
1134 // or checking for coefficients equal to 10^16 (after rounding)
1136 __BID_INLINE__ UINT64
1137 very_fast_get_BID64 (UINT64 sgn
, int expon
, UINT64 coeff
) {
1141 mask
<<= EXPONENT_SHIFT_SMALL64
;
1143 // check whether coefficient fits in 10*5+3 bits
1146 r
<<= EXPONENT_SHIFT_SMALL64
;
1152 r
<<= EXPONENT_SHIFT_LARGE64
;
1153 r
|= (sgn
| SPECIAL_ENCODING_MASK64
);
1154 // add coeff, without leading bits
1155 mask
= (mask
>> 2) - 1;
1163 // No overflow/underflow checking or checking for coefficients above 2^53
1165 __BID_INLINE__ UINT64
1166 very_fast_get_BID64_small_mantissa (UINT64 sgn
, int expon
, UINT64 coeff
) {
1171 r
<<= EXPONENT_SHIFT_SMALL64
;
1178 // This pack macro is used when underflow is known to occur
1180 __BID_INLINE__ UINT64
1181 get_BID64_UF (UINT64 sgn
, int expon
, UINT64 coeff
, UINT64 R
, int rmode
,
1183 UINT128 C128
, Q_low
, Stemp
;
1184 UINT64 C64
, remainder_h
, QH
, carry
, CY
;
1185 int extra_digits
, amount
, amount2
;
1189 if (expon
+ MAX_FORMAT_DIGITS
< 0) {
1190 #ifdef SET_STATUS_FLAGS
1191 __set_status_flags (fpsc
, UNDERFLOW_EXCEPTION
| INEXACT_EXCEPTION
);
1193 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1194 #ifndef IEEE_ROUND_NEAREST
1195 if (rmode
== ROUNDING_DOWN
&& sgn
)
1196 return 0x8000000000000001ull
;
1197 if (rmode
== ROUNDING_UP
&& !sgn
)
1205 coeff
= (coeff
<< 3) + (coeff
<< 1);
1206 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1207 #ifndef IEEE_ROUND_NEAREST
1208 if (sgn
&& (unsigned) (rmode
- 1) < 2)
1214 // get digits to be shifted out
1215 extra_digits
= 1 - expon
;
1216 C128
.w
[0] = coeff
+ round_const_table
[rmode
][extra_digits
];
1218 // get coeff*(2^M[extra_digits])/10^extra_digits
1219 __mul_64x128_full (QH
, Q_low
, C128
.w
[0],
1220 reciprocals10_128
[extra_digits
]);
1222 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
1223 amount
= recip_scale
[extra_digits
];
1226 //__shr_128(C128, Q_high, amount);
1228 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1229 #ifndef IEEE_ROUND_NEAREST
1230 if (rmode
== 0) //ROUNDING_TO_NEAREST
1233 // check whether fractional part of initial_P/10^extra_digits is exactly .5
1236 amount2
= 64 - amount
;
1239 remainder_h
>>= amount2
;
1240 remainder_h
= remainder_h
& QH
;
1243 && (Q_low
.w
[1] < reciprocals10_128
[extra_digits
].w
[1]
1244 || (Q_low
.w
[1] == reciprocals10_128
[extra_digits
].w
[1]
1246 reciprocals10_128
[extra_digits
].w
[0]))) {
1252 #ifdef SET_STATUS_FLAGS
1254 if (is_inexact (fpsc
))
1255 __set_status_flags (fpsc
, UNDERFLOW_EXCEPTION
);
1257 status
= INEXACT_EXCEPTION
;
1259 remainder_h
= QH
<< (64 - amount
);
1262 case ROUNDING_TO_NEAREST
:
1263 case ROUNDING_TIES_AWAY
:
1264 // test whether fractional part is 0
1265 if (remainder_h
== 0x8000000000000000ull
1266 && (Q_low
.w
[1] < reciprocals10_128
[extra_digits
].w
[1]
1267 || (Q_low
.w
[1] == reciprocals10_128
[extra_digits
].w
[1]
1269 reciprocals10_128
[extra_digits
].w
[0])))
1270 status
= EXACT_STATUS
;
1273 case ROUNDING_TO_ZERO
:
1275 && (Q_low
.w
[1] < reciprocals10_128
[extra_digits
].w
[1]
1276 || (Q_low
.w
[1] == reciprocals10_128
[extra_digits
].w
[1]
1278 reciprocals10_128
[extra_digits
].w
[0])))
1279 status
= EXACT_STATUS
;
1283 __add_carry_out (Stemp
.w
[0], CY
, Q_low
.w
[0],
1284 reciprocals10_128
[extra_digits
].w
[0]);
1285 __add_carry_in_out (Stemp
.w
[1], carry
, Q_low
.w
[1],
1286 reciprocals10_128
[extra_digits
].w
[1], CY
);
1287 if ((remainder_h
>> (64 - amount
)) + carry
>=
1288 (((UINT64
) 1) << amount
))
1289 status
= EXACT_STATUS
;
1292 if (status
!= EXACT_STATUS
)
1293 __set_status_flags (fpsc
, UNDERFLOW_EXCEPTION
| status
);
1305 // This pack macro doesnot check for coefficients above 2^53
1307 __BID_INLINE__ UINT64
1308 get_BID64_small_mantissa (UINT64 sgn
, int expon
, UINT64 coeff
,
1309 int rmode
, unsigned *fpsc
) {
1310 UINT128 C128
, Q_low
, Stemp
;
1311 UINT64 r
, mask
, C64
, remainder_h
, QH
, carry
, CY
;
1312 int extra_digits
, amount
, amount2
;
1315 // check for possible underflow/overflow
1316 if (((unsigned) expon
) >= 3 * 256) {
1319 if (expon
+ MAX_FORMAT_DIGITS
< 0) {
1320 #ifdef SET_STATUS_FLAGS
1321 __set_status_flags (fpsc
,
1322 UNDERFLOW_EXCEPTION
| INEXACT_EXCEPTION
);
1324 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1325 #ifndef IEEE_ROUND_NEAREST
1326 if (rmode
== ROUNDING_DOWN
&& sgn
)
1327 return 0x8000000000000001ull
;
1328 if (rmode
== ROUNDING_UP
&& !sgn
)
1335 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1336 #ifndef IEEE_ROUND_NEAREST
1337 if (sgn
&& (unsigned) (rmode
- 1) < 2)
1341 // get digits to be shifted out
1342 extra_digits
= -expon
;
1343 C128
.w
[0] = coeff
+ round_const_table
[rmode
][extra_digits
];
1345 // get coeff*(2^M[extra_digits])/10^extra_digits
1346 __mul_64x128_full (QH
, Q_low
, C128
.w
[0],
1347 reciprocals10_128
[extra_digits
]);
1349 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
1350 amount
= recip_scale
[extra_digits
];
1354 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1355 #ifndef IEEE_ROUND_NEAREST
1356 if (rmode
== 0) //ROUNDING_TO_NEAREST
1359 // check whether fractional part of initial_P/10^extra_digits is exactly .5
1362 amount2
= 64 - amount
;
1365 remainder_h
>>= amount2
;
1366 remainder_h
= remainder_h
& QH
;
1369 && (Q_low
.w
[1] < reciprocals10_128
[extra_digits
].w
[1]
1370 || (Q_low
.w
[1] == reciprocals10_128
[extra_digits
].w
[1]
1372 reciprocals10_128
[extra_digits
].w
[0]))) {
1378 #ifdef SET_STATUS_FLAGS
1380 if (is_inexact (fpsc
))
1381 __set_status_flags (fpsc
, UNDERFLOW_EXCEPTION
);
1383 status
= INEXACT_EXCEPTION
;
1385 remainder_h
= QH
<< (64 - amount
);
1388 case ROUNDING_TO_NEAREST
:
1389 case ROUNDING_TIES_AWAY
:
1390 // test whether fractional part is 0
1391 if (remainder_h
== 0x8000000000000000ull
1392 && (Q_low
.w
[1] < reciprocals10_128
[extra_digits
].w
[1]
1393 || (Q_low
.w
[1] == reciprocals10_128
[extra_digits
].w
[1]
1395 reciprocals10_128
[extra_digits
].w
[0])))
1396 status
= EXACT_STATUS
;
1399 case ROUNDING_TO_ZERO
:
1401 && (Q_low
.w
[1] < reciprocals10_128
[extra_digits
].w
[1]
1402 || (Q_low
.w
[1] == reciprocals10_128
[extra_digits
].w
[1]
1404 reciprocals10_128
[extra_digits
].w
[0])))
1405 status
= EXACT_STATUS
;
1409 __add_carry_out (Stemp
.w
[0], CY
, Q_low
.w
[0],
1410 reciprocals10_128
[extra_digits
].w
[0]);
1411 __add_carry_in_out (Stemp
.w
[1], carry
, Q_low
.w
[1],
1412 reciprocals10_128
[extra_digits
].w
[1], CY
);
1413 if ((remainder_h
>> (64 - amount
)) + carry
>=
1414 (((UINT64
) 1) << amount
))
1415 status
= EXACT_STATUS
;
1418 if (status
!= EXACT_STATUS
)
1419 __set_status_flags (fpsc
, UNDERFLOW_EXCEPTION
| status
);
1427 while (coeff
< 1000000000000000ull && expon
>= 3 * 256) {
1429 coeff
= (coeff
<< 3) + (coeff
<< 1);
1431 if (expon
> DECIMAL_MAX_EXPON_64
) {
1432 #ifdef SET_STATUS_FLAGS
1433 __set_status_flags (fpsc
, OVERFLOW_EXCEPTION
| INEXACT_EXCEPTION
);
1436 r
= sgn
| INFINITY_MASK64
;
1442 case ROUNDING_TO_ZERO
:
1443 r
= sgn
| LARGEST_BID64
;
1453 mask
<<= EXPONENT_SHIFT_SMALL64
;
1454 if (coeff
>= mask
) {
1456 r
<<= EXPONENT_SHIFT_LARGE64
;
1457 r
|= (sgn
| SPECIAL_ENCODING_MASK64
);
1458 // add coeff, without leading bits
1459 mask
= (mask
>> 2) - 1;
1468 r
<<= EXPONENT_SHIFT_SMALL64
;
1475 /*****************************************************************************
1477 * BID128 pack/unpack macros
1479 *****************************************************************************/
1482 // Macro for handling BID128 underflow
1483 // sticky bit given as additional argument
1485 __BID_INLINE__ UINT128
*
1486 handle_UF_128_rem (UINT128
* pres
, UINT64 sgn
, int expon
, UINT128 CQ
,
1487 UINT64 R
, unsigned *prounding_mode
, unsigned *fpsc
) {
1488 UINT128 T128
, TP128
, Qh
, Ql
, Qh1
, Stemp
, Tmp
, Tmp1
, CQ2
, CQ8
;
1491 unsigned rmode
, status
;
1494 if (expon
+ MAX_FORMAT_DIGITS_128
< 0) {
1495 #ifdef SET_STATUS_FLAGS
1496 __set_status_flags (fpsc
, UNDERFLOW_EXCEPTION
| INEXACT_EXCEPTION
);
1500 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1501 #ifndef IEEE_ROUND_NEAREST
1502 if ((sgn
&& *prounding_mode
== ROUNDING_DOWN
)
1503 || (!sgn
&& *prounding_mode
== ROUNDING_UP
))
1510 CQ2
.w
[1] = (CQ
.w
[1] << 1) | (CQ
.w
[0] >> 63);
1511 CQ2
.w
[0] = CQ
.w
[0] << 1;
1512 CQ8
.w
[1] = (CQ
.w
[1] << 3) | (CQ
.w
[0] >> 61);
1513 CQ8
.w
[0] = CQ
.w
[0] << 3;
1514 __add_128_128 (CQ
, CQ2
, CQ8
);
1521 // add rounding constant to CQ
1522 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1523 #ifndef IEEE_ROUND_NEAREST
1524 rmode
= *prounding_mode
;
1525 if (sgn
&& (unsigned) (rmode
- 1) < 2)
1533 T128
= round_const_table_128
[rmode
][ed2
];
1534 __add_carry_out (CQ
.w
[0], carry
, T128
.w
[0], CQ
.w
[0]);
1535 CQ
.w
[1] = CQ
.w
[1] + T128
.w
[1] + carry
;
1537 TP128
= reciprocals10_128
[ed2
];
1538 __mul_128x128_full (Qh
, Ql
, CQ
, TP128
);
1539 amount
= recip_scale
[ed2
];
1542 CQ
.w
[0] = Qh
.w
[1] >> (amount
- 64);
1545 __shr_128 (CQ
, Qh
, amount
);
1550 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1551 #ifndef IEEE_ROUND_NEAREST
1552 if (!(*prounding_mode
))
1555 // check whether fractional part of initial_P/10^ed1 is exactly .5
1558 __shl_128_long (Qh1
, Qh
, (128 - amount
));
1560 if (!Qh1
.w
[1] && !Qh1
.w
[0]
1561 && (Ql
.w
[1] < reciprocals10_128
[ed2
].w
[1]
1562 || (Ql
.w
[1] == reciprocals10_128
[ed2
].w
[1]
1563 && Ql
.w
[0] < reciprocals10_128
[ed2
].w
[0]))) {
1569 #ifdef SET_STATUS_FLAGS
1571 if (is_inexact (fpsc
))
1572 __set_status_flags (fpsc
, UNDERFLOW_EXCEPTION
);
1574 status
= INEXACT_EXCEPTION
;
1576 __shl_128_long (Qh1
, Qh
, (128 - amount
));
1579 case ROUNDING_TO_NEAREST
:
1580 case ROUNDING_TIES_AWAY
:
1581 // test whether fractional part is 0
1582 if (Qh1
.w
[1] == 0x8000000000000000ull
&& (!Qh1
.w
[0])
1583 && (Ql
.w
[1] < reciprocals10_128
[ed2
].w
[1]
1584 || (Ql
.w
[1] == reciprocals10_128
[ed2
].w
[1]
1585 && Ql
.w
[0] < reciprocals10_128
[ed2
].w
[0])))
1586 status
= EXACT_STATUS
;
1589 case ROUNDING_TO_ZERO
:
1590 if ((!Qh1
.w
[1]) && (!Qh1
.w
[0])
1591 && (Ql
.w
[1] < reciprocals10_128
[ed2
].w
[1]
1592 || (Ql
.w
[1] == reciprocals10_128
[ed2
].w
[1]
1593 && Ql
.w
[0] < reciprocals10_128
[ed2
].w
[0])))
1594 status
= EXACT_STATUS
;
1598 __add_carry_out (Stemp
.w
[0], CY
, Ql
.w
[0],
1599 reciprocals10_128
[ed2
].w
[0]);
1600 __add_carry_in_out (Stemp
.w
[1], carry
, Ql
.w
[1],
1601 reciprocals10_128
[ed2
].w
[1], CY
);
1602 __shr_128_long (Qh
, Qh1
, (128 - amount
));
1605 __shl_128_long (Tmp1
, Tmp
, amount
);
1607 if (Qh
.w
[0] < carry
)
1609 if (__unsigned_compare_ge_128 (Qh
, Tmp1
))
1610 status
= EXACT_STATUS
;
1613 if (status
!= EXACT_STATUS
)
1614 __set_status_flags (fpsc
, UNDERFLOW_EXCEPTION
| status
);
1619 pres
->w
[1] = sgn
| CQ
.w
[1];
1620 pres
->w
[0] = CQ
.w
[0];
1628 // Macro for handling BID128 underflow
1630 __BID_INLINE__ UINT128
*
1631 handle_UF_128 (UINT128
* pres
, UINT64 sgn
, int expon
, UINT128 CQ
,
1632 unsigned *prounding_mode
, unsigned *fpsc
) {
1633 UINT128 T128
, TP128
, Qh
, Ql
, Qh1
, Stemp
, Tmp
, Tmp1
;
1636 unsigned rmode
, status
;
1639 if (expon
+ MAX_FORMAT_DIGITS_128
< 0) {
1640 #ifdef SET_STATUS_FLAGS
1641 __set_status_flags (fpsc
, UNDERFLOW_EXCEPTION
| INEXACT_EXCEPTION
);
1645 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1646 #ifndef IEEE_ROUND_NEAREST
1647 if ((sgn
&& *prounding_mode
== ROUNDING_DOWN
)
1648 || (!sgn
&& *prounding_mode
== ROUNDING_UP
))
1656 // add rounding constant to CQ
1657 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1658 #ifndef IEEE_ROUND_NEAREST
1659 rmode
= *prounding_mode
;
1660 if (sgn
&& (unsigned) (rmode
- 1) < 2)
1669 T128
= round_const_table_128
[rmode
][ed2
];
1670 __add_carry_out (CQ
.w
[0], carry
, T128
.w
[0], CQ
.w
[0]);
1671 CQ
.w
[1] = CQ
.w
[1] + T128
.w
[1] + carry
;
1673 TP128
= reciprocals10_128
[ed2
];
1674 __mul_128x128_full (Qh
, Ql
, CQ
, TP128
);
1675 amount
= recip_scale
[ed2
];
1678 CQ
.w
[0] = Qh
.w
[1] >> (amount
- 64);
1681 __shr_128 (CQ
, Qh
, amount
);
1686 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1687 #ifndef IEEE_ROUND_NEAREST
1688 if (!(*prounding_mode
))
1691 // check whether fractional part of initial_P/10^ed1 is exactly .5
1694 __shl_128_long (Qh1
, Qh
, (128 - amount
));
1696 if (!Qh1
.w
[1] && !Qh1
.w
[0]
1697 && (Ql
.w
[1] < reciprocals10_128
[ed2
].w
[1]
1698 || (Ql
.w
[1] == reciprocals10_128
[ed2
].w
[1]
1699 && Ql
.w
[0] < reciprocals10_128
[ed2
].w
[0]))) {
1705 #ifdef SET_STATUS_FLAGS
1707 if (is_inexact (fpsc
))
1708 __set_status_flags (fpsc
, UNDERFLOW_EXCEPTION
);
1710 status
= INEXACT_EXCEPTION
;
1712 __shl_128_long (Qh1
, Qh
, (128 - amount
));
1715 case ROUNDING_TO_NEAREST
:
1716 case ROUNDING_TIES_AWAY
:
1717 // test whether fractional part is 0
1718 if (Qh1
.w
[1] == 0x8000000000000000ull
&& (!Qh1
.w
[0])
1719 && (Ql
.w
[1] < reciprocals10_128
[ed2
].w
[1]
1720 || (Ql
.w
[1] == reciprocals10_128
[ed2
].w
[1]
1721 && Ql
.w
[0] < reciprocals10_128
[ed2
].w
[0])))
1722 status
= EXACT_STATUS
;
1725 case ROUNDING_TO_ZERO
:
1726 if ((!Qh1
.w
[1]) && (!Qh1
.w
[0])
1727 && (Ql
.w
[1] < reciprocals10_128
[ed2
].w
[1]
1728 || (Ql
.w
[1] == reciprocals10_128
[ed2
].w
[1]
1729 && Ql
.w
[0] < reciprocals10_128
[ed2
].w
[0])))
1730 status
= EXACT_STATUS
;
1734 __add_carry_out (Stemp
.w
[0], CY
, Ql
.w
[0],
1735 reciprocals10_128
[ed2
].w
[0]);
1736 __add_carry_in_out (Stemp
.w
[1], carry
, Ql
.w
[1],
1737 reciprocals10_128
[ed2
].w
[1], CY
);
1738 __shr_128_long (Qh
, Qh1
, (128 - amount
));
1741 __shl_128_long (Tmp1
, Tmp
, amount
);
1743 if (Qh
.w
[0] < carry
)
1745 if (__unsigned_compare_ge_128 (Qh
, Tmp1
))
1746 status
= EXACT_STATUS
;
1749 if (status
!= EXACT_STATUS
)
1750 __set_status_flags (fpsc
, UNDERFLOW_EXCEPTION
| status
);
1755 pres
->w
[1] = sgn
| CQ
.w
[1];
1756 pres
->w
[0] = CQ
.w
[0];
1765 // BID128 unpack, input passed by value
1767 __BID_INLINE__ UINT64
1768 unpack_BID128_value (UINT64
* psign_x
, int *pexponent_x
,
1769 UINT128
* pcoefficient_x
, UINT128 x
) {
1770 UINT128 coeff
, T33
, T34
;
1773 *psign_x
= (x
.w
[1]) & 0x8000000000000000ull
;
1775 // special encodings
1776 if ((x
.w
[1] & INFINITY_MASK64
) >= SPECIAL_ENCODING_MASK64
) {
1777 if ((x
.w
[1] & INFINITY_MASK64
) < INFINITY_MASK64
) {
1778 // non-canonical input
1779 pcoefficient_x
->w
[0] = 0;
1780 pcoefficient_x
->w
[1] = 0;
1781 ex
= (x
.w
[1]) >> 47;
1782 *pexponent_x
= ((int) ex
) & EXPONENT_MASK128
;
1786 T33
= power10_table_128
[33];
1787 /*coeff.w[0] = x.w[0];
1788 coeff.w[1] = (x.w[1]) & LARGE_COEFF_MASK128;
1789 pcoefficient_x->w[0] = x.w[0];
1790 pcoefficient_x->w[1] = x.w[1];
1791 if (__unsigned_compare_ge_128 (coeff, T33)) // non-canonical
1792 pcoefficient_x->w[1] &= (~LARGE_COEFF_MASK128); */
1794 pcoefficient_x
->w
[0] = x
.w
[0];
1795 pcoefficient_x
->w
[1] = (x
.w
[1]) & 0x00003fffffffffffull
;
1796 if (__unsigned_compare_ge_128 ((*pcoefficient_x
), T33
)) // non-canonical
1798 pcoefficient_x
->w
[1] = (x
.w
[1]) & 0xfe00000000000000ull
;
1799 pcoefficient_x
->w
[0] = 0;
1801 pcoefficient_x
->w
[1] = (x
.w
[1]) & 0xfe003fffffffffffull
;
1802 if ((x
.w
[1] & NAN_MASK64
) == INFINITY_MASK64
) {
1803 pcoefficient_x
->w
[0] = 0;
1804 pcoefficient_x
->w
[1] = x
.w
[1] & SINFINITY_MASK64
;
1807 return 0; // NaN or Infinity
1810 coeff
.w
[0] = x
.w
[0];
1811 coeff
.w
[1] = (x
.w
[1]) & SMALL_COEFF_MASK128
;
1814 T34
= power10_table_128
[34];
1815 // check for non-canonical values
1816 if (__unsigned_compare_ge_128 (coeff
, T34
))
1817 coeff
.w
[0] = coeff
.w
[1] = 0;
1819 pcoefficient_x
->w
[0] = coeff
.w
[0];
1820 pcoefficient_x
->w
[1] = coeff
.w
[1];
1822 ex
= (x
.w
[1]) >> 49;
1823 *pexponent_x
= ((int) ex
) & EXPONENT_MASK128
;
1825 return coeff
.w
[0] | coeff
.w
[1];
1830 // BID128 unpack, input pased by reference
1832 __BID_INLINE__ UINT64
1833 unpack_BID128 (UINT64
* psign_x
, int *pexponent_x
,
1834 UINT128
* pcoefficient_x
, UINT128
* px
) {
1835 UINT128 coeff
, T33
, T34
;
1838 *psign_x
= (px
->w
[1]) & 0x8000000000000000ull
;
1840 // special encodings
1841 if ((px
->w
[1] & INFINITY_MASK64
) >= SPECIAL_ENCODING_MASK64
) {
1842 if ((px
->w
[1] & INFINITY_MASK64
) < INFINITY_MASK64
) {
1843 // non-canonical input
1844 pcoefficient_x
->w
[0] = 0;
1845 pcoefficient_x
->w
[1] = 0;
1846 ex
= (px
->w
[1]) >> 47;
1847 *pexponent_x
= ((int) ex
) & EXPONENT_MASK128
;
1851 T33
= power10_table_128
[33];
1852 coeff
.w
[0] = px
->w
[0];
1853 coeff
.w
[1] = (px
->w
[1]) & LARGE_COEFF_MASK128
;
1854 pcoefficient_x
->w
[0] = px
->w
[0];
1855 pcoefficient_x
->w
[1] = px
->w
[1];
1856 if (__unsigned_compare_ge_128 (coeff
, T33
)) { // non-canonical
1857 pcoefficient_x
->w
[1] &= (~LARGE_COEFF_MASK128
);
1858 pcoefficient_x
->w
[0] = 0;
1861 return 0; // NaN or Infinity
1864 coeff
.w
[0] = px
->w
[0];
1865 coeff
.w
[1] = (px
->w
[1]) & SMALL_COEFF_MASK128
;
1868 T34
= power10_table_128
[34];
1869 // check for non-canonical values
1870 if (__unsigned_compare_ge_128 (coeff
, T34
))
1871 coeff
.w
[0] = coeff
.w
[1] = 0;
1873 pcoefficient_x
->w
[0] = coeff
.w
[0];
1874 pcoefficient_x
->w
[1] = coeff
.w
[1];
1876 ex
= (px
->w
[1]) >> 49;
1877 *pexponent_x
= ((int) ex
) & EXPONENT_MASK128
;
1879 return coeff
.w
[0] | coeff
.w
[1];
1883 // Pack macro checks for overflow, but not underflow
1885 __BID_INLINE__ UINT128
*
1886 get_BID128_very_fast_OF (UINT128
* pres
, UINT64 sgn
, int expon
,
1887 UINT128 coeff
, unsigned *prounding_mode
,
1892 if ((unsigned) expon
> DECIMAL_MAX_EXPON_128
) {
1894 if (expon
- MAX_FORMAT_DIGITS_128
<= DECIMAL_MAX_EXPON_128
) {
1895 T
= power10_table_128
[MAX_FORMAT_DIGITS_128
- 1];
1896 while (__unsigned_compare_gt_128 (T
, coeff
)
1897 && expon
> DECIMAL_MAX_EXPON_128
) {
1899 (coeff
.w
[1] << 3) + (coeff
.w
[1] << 1) + (coeff
.w
[0] >> 61) +
1901 tmp2
= coeff
.w
[0] << 3;
1902 coeff
.w
[0] = (coeff
.w
[0] << 1) + tmp2
;
1903 if (coeff
.w
[0] < tmp2
)
1909 if ((unsigned) expon
> DECIMAL_MAX_EXPON_128
) {
1911 #ifdef SET_STATUS_FLAGS
1912 __set_status_flags (fpsc
, OVERFLOW_EXCEPTION
| INEXACT_EXCEPTION
);
1914 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1915 #ifndef IEEE_ROUND_NEAREST
1916 if (*prounding_mode
== ROUNDING_TO_ZERO
1917 || (sgn
&& *prounding_mode
== ROUNDING_UP
) || (!sgn
1923 pres
->w
[1] = sgn
| LARGEST_BID128_HIGH
;
1924 pres
->w
[0] = LARGEST_BID128_LOW
;
1929 pres
->w
[1] = sgn
| INFINITY_MASK64
;
1936 pres
->w
[0] = coeff
.w
[0];
1939 pres
->w
[1] = sgn
| tmp
| coeff
.w
[1];
1946 // No overflow/underflow checks
1947 // No checking for coefficient == 10^34 (rounding artifact)
1949 __BID_INLINE__ UINT128
*
1950 get_BID128_very_fast (UINT128
* pres
, UINT64 sgn
, int expon
,
1954 pres
->w
[0] = coeff
.w
[0];
1957 pres
->w
[1] = sgn
| tmp
| coeff
.w
[1];
1963 // No overflow/underflow checks
1965 __BID_INLINE__ UINT128
*
1966 get_BID128_fast (UINT128
* pres
, UINT64 sgn
, int expon
, UINT128 coeff
) {
1970 if (coeff
.w
[1] == 0x0001ed09bead87c0ull
1971 && coeff
.w
[0] == 0x378d8e6400000000ull
) {
1973 // set coefficient to 10^33
1974 coeff
.w
[1] = 0x0000314dc6448d93ull
;
1975 coeff
.w
[0] = 0x38c15b0a00000000ull
;
1978 pres
->w
[0] = coeff
.w
[0];
1981 pres
->w
[1] = sgn
| tmp
| coeff
.w
[1];
1987 // General BID128 pack macro
1989 __BID_INLINE__ UINT128
*
1990 get_BID128 (UINT128
* pres
, UINT64 sgn
, int expon
, UINT128 coeff
,
1991 unsigned *prounding_mode
, unsigned *fpsc
) {
1996 if (coeff
.w
[1] == 0x0001ed09bead87c0ull
1997 && coeff
.w
[0] == 0x378d8e6400000000ull
) {
1999 // set coefficient to 10^33
2000 coeff
.w
[1] = 0x0000314dc6448d93ull
;
2001 coeff
.w
[0] = 0x38c15b0a00000000ull
;
2004 if (expon
< 0 || expon
> DECIMAL_MAX_EXPON_128
) {
2007 return handle_UF_128 (pres
, sgn
, expon
, coeff
, prounding_mode
,
2011 if (expon
- MAX_FORMAT_DIGITS_128
<= DECIMAL_MAX_EXPON_128
) {
2012 T
= power10_table_128
[MAX_FORMAT_DIGITS_128
- 1];
2013 while (__unsigned_compare_gt_128 (T
, coeff
)
2014 && expon
> DECIMAL_MAX_EXPON_128
) {
2016 (coeff
.w
[1] << 3) + (coeff
.w
[1] << 1) + (coeff
.w
[0] >> 61) +
2018 tmp2
= coeff
.w
[0] << 3;
2019 coeff
.w
[0] = (coeff
.w
[0] << 1) + tmp2
;
2020 if (coeff
.w
[0] < tmp2
)
2026 if (expon
> DECIMAL_MAX_EXPON_128
) {
2027 if (!(coeff
.w
[1] | coeff
.w
[0])) {
2028 pres
->w
[1] = sgn
| (((UINT64
) DECIMAL_MAX_EXPON_128
) << 49);
2033 #ifdef SET_STATUS_FLAGS
2034 __set_status_flags (fpsc
, OVERFLOW_EXCEPTION
| INEXACT_EXCEPTION
);
2036 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2037 #ifndef IEEE_ROUND_NEAREST
2038 if (*prounding_mode
== ROUNDING_TO_ZERO
2039 || (sgn
&& *prounding_mode
== ROUNDING_UP
) || (!sgn
2045 pres
->w
[1] = sgn
| LARGEST_BID128_HIGH
;
2046 pres
->w
[0] = LARGEST_BID128_LOW
;
2051 pres
->w
[1] = sgn
| INFINITY_MASK64
;
2058 pres
->w
[0] = coeff
.w
[0];
2061 pres
->w
[1] = sgn
| tmp
| coeff
.w
[1];
2068 // Macro used for conversions from string
2069 // (no additional arguments given for rounding mode, status flags)
2071 __BID_INLINE__ UINT128
*
2072 get_BID128_string (UINT128
* pres
, UINT64 sgn
, int expon
, UINT128 coeff
) {
2075 unsigned rmode
= 0, status
;
2078 if (coeff
.w
[1] == 0x0001ed09bead87c0ull
2079 && coeff
.w
[0] == 0x378d8e6400000000ull
) {
2081 // set coefficient to 10^33
2082 coeff
.w
[1] = 0x0000314dc6448d93ull
;
2083 coeff
.w
[0] = 0x38c15b0a00000000ull
;
2086 if ((unsigned) expon
> DECIMAL_MAX_EXPON_128
) {
2089 return handle_UF_128 (pres
, sgn
, expon
, coeff
, &rmode
, &status
);
2093 if (expon
< DECIMAL_MAX_EXPON_128
+ 34) {
2094 while (expon
> DECIMAL_MAX_EXPON_128
&&
2095 (coeff
.w
[1] < power10_table_128
[33].w
[1] ||
2096 (coeff
.w
[1] == power10_table_128
[33].w
[1]
2097 && coeff
.w
[0] < power10_table_128
[33].w
[0]))) {
2098 D2
.w
[1] = (coeff
.w
[1] << 1) | (coeff
.w
[0] >> 63);
2099 D2
.w
[0] = coeff
.w
[0] << 1;
2100 D8
.w
[1] = (coeff
.w
[1] << 3) | (coeff
.w
[0] >> 61);
2101 D8
.w
[0] = coeff
.w
[0] << 3;
2103 __add_128_128 (coeff
, D2
, D8
);
2106 } else if (!(coeff
.w
[0] | coeff
.w
[1]))
2107 expon
= DECIMAL_MAX_EXPON_128
;
2109 if (expon
> DECIMAL_MAX_EXPON_128
) {
2110 pres
->w
[1] = sgn
| INFINITY_MASK64
;
2115 pres
->w
[1] = LARGEST_BID128_HIGH
;
2116 pres
->w
[0] = LARGEST_BID128_LOW
;
2119 case ROUNDING_TO_ZERO
:
2120 pres
->w
[1] = sgn
| LARGEST_BID128_HIGH
;
2121 pres
->w
[0] = LARGEST_BID128_LOW
;
2126 pres
->w
[1] = sgn
| LARGEST_BID128_HIGH
;
2127 pres
->w
[0] = LARGEST_BID128_LOW
;
2136 pres
->w
[0] = coeff
.w
[0];
2139 pres
->w
[1] = sgn
| tmp
| coeff
.w
[1];
2146 /*****************************************************************************
2148 * BID32 pack/unpack macros
2150 *****************************************************************************/
2153 __BID_INLINE__ UINT32
2154 unpack_BID32 (UINT32
* psign_x
, int *pexponent_x
,
2155 UINT32
* pcoefficient_x
, UINT32 x
) {
2158 *psign_x
= x
& 0x80000000;
2160 if ((x
& SPECIAL_ENCODING_MASK32
) == SPECIAL_ENCODING_MASK32
) {
2161 // special encodings
2162 if ((x
& INFINITY_MASK32
) == INFINITY_MASK32
) {
2163 *pcoefficient_x
= x
& 0xfe0fffff;
2164 if ((x
& 0x000fffff) >= 1000000)
2165 *pcoefficient_x
= x
& 0xfe000000;
2166 if ((x
& NAN_MASK32
) == INFINITY_MASK32
)
2167 *pcoefficient_x
= x
& 0xf8000000;
2169 return 0; // NaN or Infinity
2172 *pcoefficient_x
= (x
& SMALL_COEFF_MASK32
) | LARGE_COEFF_HIGH_BIT32
;
2173 // check for non-canonical value
2174 if (*pcoefficient_x
>= 10000000)
2175 *pcoefficient_x
= 0;
2178 *pexponent_x
= tmp
& EXPONENT_MASK32
;
2183 *pexponent_x
= tmp
& EXPONENT_MASK32
;
2185 *pcoefficient_x
= (x
& LARGE_COEFF_MASK32
);
2187 return *pcoefficient_x
;
2191 // General pack macro for BID32
2193 __BID_INLINE__ UINT32
2194 get_BID32 (UINT32 sgn
, int expon
, UINT64 coeff
, int rmode
,
2197 UINT64 C64
, remainder_h
, carry
, Stemp
;
2199 int extra_digits
, amount
, amount2
;
2202 if (coeff
> 9999999ull) {
2206 // check for possible underflow/overflow
2207 if (((unsigned) expon
) > DECIMAL_MAX_EXPON_32
) {
2210 if (expon
+ MAX_FORMAT_DIGITS_32
< 0) {
2211 #ifdef SET_STATUS_FLAGS
2212 __set_status_flags (fpsc
,
2213 UNDERFLOW_EXCEPTION
| INEXACT_EXCEPTION
);
2215 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2216 #ifndef IEEE_ROUND_NEAREST
2217 if (rmode
== ROUNDING_DOWN
&& sgn
)
2219 if (rmode
== ROUNDING_UP
&& !sgn
)
2226 // get digits to be shifted out
2227 #ifdef IEEE_ROUND_NEAREST_TIES_AWAY
2230 #ifdef IEEE_ROUND_NEAREST
2233 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2234 #ifndef IEEE_ROUND_NEAREST
2235 if (sgn
&& (unsigned) (rmode
- 1) < 2)
2240 extra_digits
= -expon
;
2241 coeff
+= round_const_table
[rmode
][extra_digits
];
2243 // get coeff*(2^M[extra_digits])/10^extra_digits
2244 __mul_64x64_to_128 (Q
, coeff
, reciprocals10_64
[extra_digits
]);
2246 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
2247 amount
= short_recip_scale
[extra_digits
];
2249 C64
= Q
.w
[1] >> amount
;
2251 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2252 #ifndef IEEE_ROUND_NEAREST
2253 if (rmode
== 0) //ROUNDING_TO_NEAREST
2256 // check whether fractional part of initial_P/10^extra_digits is exactly .5
2259 amount2
= 64 - amount
;
2262 remainder_h
>>= amount2
;
2263 remainder_h
= remainder_h
& Q
.w
[1];
2265 if (!remainder_h
&& (Q
.w
[0] < reciprocals10_64
[extra_digits
])) {
2271 #ifdef SET_STATUS_FLAGS
2273 if (is_inexact (fpsc
))
2274 __set_status_flags (fpsc
, UNDERFLOW_EXCEPTION
);
2276 status
= INEXACT_EXCEPTION
;
2278 remainder_h
= Q
.w
[1] << (64 - amount
);
2281 case ROUNDING_TO_NEAREST
:
2282 case ROUNDING_TIES_AWAY
:
2283 // test whether fractional part is 0
2284 if (remainder_h
== 0x8000000000000000ull
2285 && (Q
.w
[0] < reciprocals10_64
[extra_digits
]))
2286 status
= EXACT_STATUS
;
2289 case ROUNDING_TO_ZERO
:
2290 if (!remainder_h
&& (Q
.w
[0] < reciprocals10_64
[extra_digits
]))
2291 status
= EXACT_STATUS
;
2295 __add_carry_out (Stemp
, carry
, Q
.w
[0],
2296 reciprocals10_64
[extra_digits
]);
2297 if ((remainder_h
>> (64 - amount
)) + carry
>=
2298 (((UINT64
) 1) << amount
))
2299 status
= EXACT_STATUS
;
2302 if (status
!= EXACT_STATUS
)
2303 __set_status_flags (fpsc
, UNDERFLOW_EXCEPTION
| status
);
2308 return sgn
| (UINT32
) C64
;
2311 while (coeff
< 1000000 && expon
> DECIMAL_MAX_EXPON_32
) {
2312 coeff
= (coeff
<< 3) + (coeff
<< 1);
2315 if (((unsigned) expon
) > DECIMAL_MAX_EXPON_32
) {
2316 __set_status_flags (fpsc
, OVERFLOW_EXCEPTION
| INEXACT_EXCEPTION
);
2318 r
= sgn
| INFINITY_MASK32
;
2324 case ROUNDING_TO_ZERO
:
2325 r
= sgn
| LARGEST_BID32
;
2330 r
= sgn
| LARGEST_BID32
;
2338 // check whether coefficient fits in DECIMAL_COEFF_FIT bits
2342 r
|= ((UINT32
) coeff
| sgn
);
2349 r
|= (sgn
| SPECIAL_ENCODING_MASK32
);
2350 // add coeff, without leading bits
2351 mask
= (1 << 21) - 1;
2352 r
|= (((UINT32
) coeff
) & mask
);
2360 // no overflow/underflow checks
2362 __BID_INLINE__ UINT32
2363 very_fast_get_BID32 (UINT32 sgn
, int expon
, UINT32 coeff
) {
2368 // check whether coefficient fits in 10*2+3 bits
2378 r
|= (sgn
| SPECIAL_ENCODING_MASK32
);
2379 // add coeff, without leading bits
2380 mask
= (1 << 21) - 1;
2389 /*************************************************************
2391 *************************************************************/
2403 #define MASK_STEERING_BITS 0x6000000000000000ull
2404 #define MASK_BINARY_EXPONENT1 0x7fe0000000000000ull
2405 #define MASK_BINARY_SIG1 0x001fffffffffffffull
2406 #define MASK_BINARY_EXPONENT2 0x1ff8000000000000ull
2407 //used to take G[2:w+3] (sec 3.3)
2408 #define MASK_BINARY_SIG2 0x0007ffffffffffffull
2409 //used to mask out G4:T0 (sec 3.3)
2410 #define MASK_BINARY_OR2 0x0020000000000000ull
2411 //used to prefix 8+G4 to T (sec 3.3)
2412 #define UPPER_EXPON_LIMIT 51
2413 #define MASK_EXP 0x7ffe000000000000ull
2414 #define MASK_SPECIAL 0x7800000000000000ull
2415 #define MASK_NAN 0x7c00000000000000ull
2416 #define MASK_SNAN 0x7e00000000000000ull
2417 #define MASK_ANY_INF 0x7c00000000000000ull
2418 #define MASK_INF 0x7800000000000000ull
2419 #define MASK_SIGN 0x8000000000000000ull
2420 #define MASK_COEFF 0x0001ffffffffffffull
2421 #define BIN_EXP_BIAS (0x1820ull << 49)
2423 #define EXP_MIN 0x0000000000000000ull
2424 // EXP_MIN = (-6176 + 6176) << 49
2425 #define EXP_MAX 0x5ffe000000000000ull
2426 // EXP_MAX = (6111 + 6176) << 49
2427 #define EXP_MAX_P1 0x6000000000000000ull
2428 // EXP_MAX + 1 = (6111 + 6176 + 1) << 49
2429 #define EXP_P1 0x0002000000000000ull
2431 #define expmin -6176
2432 // min unbiased exponent
2434 // max unbiased exponent
2435 #define expmin16 -398
2436 // min unbiased exponent
2437 #define expmax16 369
2438 // max unbiased exponent
2440 #define SIGNMASK32 0x80000000
2441 #define BID64_SIG_MAX 0x002386F26FC0ffffull
2442 #define SIGNMASK64 0x8000000000000000ull
2444 // typedef unsigned int FPSC; // floating-point status and control
2478 #define ROUNDING_MODE_MASK 0x00007000
2480 typedef struct _DEC_DIGITS
{
2481 unsigned int digits
;
2482 UINT64 threshold_hi
;
2483 UINT64 threshold_lo
;
2484 unsigned int digits1
;
2487 extern DEC_DIGITS nr_digits
[];
2488 extern UINT64 midpoint64
[];
2489 extern UINT128 midpoint128
[];
2490 extern UINT192 midpoint192
[];
2491 extern UINT256 midpoint256
[];
2492 extern UINT64 ten2k64
[];
2493 extern UINT128 ten2k128
[];
2494 extern UINT256 ten2k256
[];
2495 extern UINT128 ten2mk128
[];
2496 extern UINT64 ten2mk64
[];
2497 extern UINT128 ten2mk128trunc
[];
2498 extern int shiftright128
[];
2499 extern UINT64 maskhigh128
[];
2500 extern UINT64 maskhigh128M
[];
2501 extern UINT64 maskhigh192M
[];
2502 extern UINT64 maskhigh256M
[];
2503 extern UINT64 onehalf128
[];
2504 extern UINT64 onehalf128M
[];
2505 extern UINT64 onehalf192M
[];
2506 extern UINT64 onehalf256M
[];
2507 extern UINT128 ten2mk128M
[];
2508 extern UINT128 ten2mk128truncM
[];
2509 extern UINT192 ten2mk192truncM
[];
2510 extern UINT256 ten2mk256truncM
[];
2511 extern int shiftright128M
[];
2512 extern int shiftright192M
[];
2513 extern int shiftright256M
[];
2514 extern UINT192 ten2mk192M
[];
2515 extern UINT256 ten2mk256M
[];
2516 extern unsigned char char_table2
[];
2517 extern unsigned char char_table3
[];
2519 extern UINT64 ten2m3k64
[];
2520 extern unsigned int shift_ten2m3k64
[];
2521 extern UINT128 ten2m3k128
[];
2522 extern unsigned int shift_ten2m3k128
[];
2526 /***************************************************************************
2527 *************** TABLES FOR GENERAL ROUNDING FUNCTIONS *********************
2528 ***************************************************************************/
2530 extern UINT64 Kx64
[];
2531 extern unsigned int Ex64m64
[];
2532 extern UINT64 half64
[];
2533 extern UINT64 mask64
[];
2534 extern UINT64 ten2mxtrunc64
[];
2536 extern UINT128 Kx128
[];
2537 extern unsigned int Ex128m128
[];
2538 extern UINT64 half128
[];
2539 extern UINT64 mask128
[];
2540 extern UINT128 ten2mxtrunc128
[];
2542 extern UINT192 Kx192
[];
2543 extern unsigned int Ex192m192
[];
2544 extern UINT64 half192
[];
2545 extern UINT64 mask192
[];
2546 extern UINT192 ten2mxtrunc192
[];
2548 extern UINT256 Kx256
[];
2549 extern unsigned int Ex256m256
[];
2550 extern UINT64 half256
[];
2551 extern UINT64 mask256
[];
2552 extern UINT256 ten2mxtrunc256
[];
2554 typedef union __bid64_128
{
2559 BID64_128
bid_fma (unsigned int P0
,
2560 BID64_128 x1
, unsigned int P1
,
2561 BID64_128 y1
, unsigned int P2
,
2562 BID64_128 z1
, unsigned int P3
,
2563 unsigned int rnd_mode
, FPSC
* fpsc
);
2568 union __int_double
{
2572 typedef union __int_double int_double
;
2579 typedef union __int_float int_float
;
2581 #define SWAP(A,B,T) {\
2587 // this macro will find coefficient_x to be in [2^A, 2^(A+1) )
2588 // ie it knows that it is A bits long
2589 #define NUMBITS(A, coefficient_x, tempx){\
2590 temp_x.d=(float)coefficient_x;\
2591 A=((tempx.i >>23) & EXPONENT_MASK32) - 0x7f;\