1 /* Copyright (C) 2007 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 2, or (at your option) any later
10 In addition to the permissions in the GNU General Public License, the
11 Free Software Foundation gives you unlimited permission to link the
12 compiled version of this file into combinations with other programs,
13 and to distribute those combinations without any restriction coming
14 from the use of this file. (The General Public License restrictions
15 do apply in other respects; for example, they cover modification of
16 the file, and distribution when not linked into a combine
19 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
20 WARRANTY; without even the implied warranty of MERCHANTABILITY or
21 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
24 You should have received a copy of the GNU General Public License
25 along with GCC; see the file COPYING. If not, write to the Free
26 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
29 /*****************************************************************************
31 * BID128 fma x * y + z
33 ****************************************************************************/
35 #include "bid_internal.h"
38 rounding_correction (unsigned int rnd_mode
,
39 unsigned int is_inexact_lt_midpoint
,
40 unsigned int is_inexact_gt_midpoint
,
41 unsigned int is_midpoint_lt_even
,
42 unsigned int is_midpoint_gt_even
,
44 UINT128
* ptrres
, _IDEC_flags
* ptrfpsf
) {
45 // unbiased true exponent unbexp may be larger than emax
47 UINT128 res
= *ptrres
; // expected to have the correct sign and coefficient
48 // (the exponent field is ignored, as unbexp is used instead)
52 // general correction from RN to RA, RM, RP, RZ
53 // Note: if the result is negative, then is_inexact_lt_midpoint,
54 // is_inexact_gt_midpoint, is_midpoint_lt_even, and is_midpoint_gt_even
55 // have to be considered as if determined for the absolute value of the
56 // result (so they seem to be reversed)
58 if (is_inexact_lt_midpoint
|| is_inexact_gt_midpoint
||
59 is_midpoint_lt_even
|| is_midpoint_gt_even
) {
60 *ptrfpsf
|= INEXACT_EXCEPTION
;
62 // apply correction to result calculated with unbounded exponent
63 sign
= res
.w
[1] & MASK_SIGN
;
64 exp
= (UINT64
) (unbexp
+ 6176) << 49; // valid only if expmin<=unbexp<=expmax
65 C_hi
= res
.w
[1] & MASK_COEFF
;
67 if ((!sign
&& ((rnd_mode
== ROUNDING_UP
&& is_inexact_lt_midpoint
) ||
68 ((rnd_mode
== ROUNDING_TIES_AWAY
|| rnd_mode
== ROUNDING_UP
) &&
69 is_midpoint_gt_even
))) ||
70 (sign
&& ((rnd_mode
== ROUNDING_DOWN
&& is_inexact_lt_midpoint
) ||
71 ((rnd_mode
== ROUNDING_TIES_AWAY
|| rnd_mode
== ROUNDING_DOWN
) &&
72 is_midpoint_gt_even
)))) {
77 if (C_hi
== 0x0001ed09bead87c0ull
&& C_lo
== 0x378d8e6400000000ull
) {
78 // C = 10^34 => rounding overflow
79 C_hi
= 0x0000314dc6448d93ull
;
80 C_lo
= 0x38c15b0a00000000ull
; // 10^33
81 // exp = exp + EXP_P1;
83 exp
= (UINT64
) (unbexp
+ 6176) << 49;
85 } else if ((is_midpoint_lt_even
|| is_inexact_gt_midpoint
) &&
86 ((sign
&& (rnd_mode
== ROUNDING_UP
|| rnd_mode
== ROUNDING_TO_ZERO
)) ||
87 (!sign
&& (rnd_mode
== ROUNDING_DOWN
|| rnd_mode
== ROUNDING_TO_ZERO
)))) {
90 if (C_lo
== 0xffffffffffffffffull
)
92 // check if we crossed into the lower decade
93 if (C_hi
== 0x0000314dc6448d93ull
&& C_lo
== 0x38c15b09ffffffffull
) {
96 C_hi
= 0x0001ed09bead87c0ull
; // 10^34 - 1
97 C_lo
= 0x378d8e63ffffffffull
;
98 // exp = exp - EXP_P1;
100 exp
= (UINT64
) (unbexp
+ 6176) << 49;
101 } else { // if exp = 0
102 if (is_midpoint_lt_even
|| is_midpoint_lt_even
||
103 is_inexact_gt_midpoint
|| is_inexact_gt_midpoint
) // tiny & inexact
104 *ptrfpsf
|= UNDERFLOW_EXCEPTION
;
108 ; // the result is already correct
110 if (unbexp
> expmax
) { // 6111
111 *ptrfpsf
|= (INEXACT_EXCEPTION
| OVERFLOW_EXCEPTION
);
113 if (!sign
) { // result is positive
114 if (rnd_mode
== ROUNDING_UP
|| rnd_mode
== ROUNDING_TIES_AWAY
) { // +inf
115 C_hi
= 0x7800000000000000ull
;
116 C_lo
= 0x0000000000000000ull
;
117 } else { // res = +MAXFP = (10^34-1) * 10^emax
118 C_hi
= 0x5fffed09bead87c0ull
;
119 C_lo
= 0x378d8e63ffffffffull
;
121 } else { // result is negative
122 if (rnd_mode
== ROUNDING_DOWN
|| rnd_mode
== ROUNDING_TIES_AWAY
) { // -inf
123 C_hi
= 0xf800000000000000ull
;
124 C_lo
= 0x0000000000000000ull
;
125 } else { // res = -MAXFP = -(10^34-1) * 10^emax
126 C_hi
= 0xdfffed09bead87c0ull
;
127 C_lo
= 0x378d8e63ffffffffull
;
131 // assemble the result
132 res
.w
[1] = sign
| exp
| C_hi
;
138 add256 (UINT256 x
, UINT256 y
, UINT256
* pz
) {
139 // *z = x + yl assume the sum fits in 256 bits
141 z
.w
[0] = x
.w
[0] + y
.w
[0];
142 if (z
.w
[0] < x
.w
[0]) {
144 if (x
.w
[1] == 0x0000000000000000ull
) {
146 if (x
.w
[2] == 0x0000000000000000ull
) {
151 z
.w
[1] = x
.w
[1] + y
.w
[1];
152 if (z
.w
[1] < x
.w
[1]) {
154 if (x
.w
[2] == 0x0000000000000000ull
) {
158 z
.w
[2] = x
.w
[2] + y
.w
[2];
159 if (z
.w
[2] < x
.w
[2]) {
162 z
.w
[3] = x
.w
[3] + y
.w
[3]; // it was assumed that no carry is possible
167 sub256 (UINT256 x
, UINT256 y
, UINT256
* pz
) {
168 // *z = x - y; assume x >= y
170 z
.w
[0] = x
.w
[0] - y
.w
[0];
171 if (z
.w
[0] > x
.w
[0]) {
173 if (x
.w
[1] == 0xffffffffffffffffull
) {
175 if (x
.w
[2] == 0xffffffffffffffffull
) {
180 z
.w
[1] = x
.w
[1] - y
.w
[1];
181 if (z
.w
[1] > x
.w
[1]) {
183 if (x
.w
[2] == 0xffffffffffffffffull
) {
187 z
.w
[2] = x
.w
[2] - y
.w
[2];
188 if (z
.w
[2] > x
.w
[2]) {
191 z
.w
[3] = x
.w
[3] - y
.w
[3]; // no borrow possible, because x >= y
197 nr_digits256 (UINT256 R256
) {
199 // determine the number of decimal digits in R256
200 if (R256
.w
[3] == 0x0 && R256
.w
[2] == 0x0 && R256
.w
[1] == 0x0) {
201 // between 1 and 19 digits
202 for (ind
= 1; ind
<= 19; ind
++) {
203 if (R256
.w
[0] < ten2k64
[ind
]) {
208 } else if (R256
.w
[3] == 0x0 && R256
.w
[2] == 0x0 &&
209 (R256
.w
[1] < ten2k128
[0].w
[1] ||
210 (R256
.w
[1] == ten2k128
[0].w
[1]
211 && R256
.w
[0] < ten2k128
[0].w
[0]))) {
214 } else if (R256
.w
[3] == 0x0 && R256
.w
[2] == 0x0) {
215 // between 21 and 38 digits
216 for (ind
= 1; ind
<= 18; ind
++) {
217 if (R256
.w
[1] < ten2k128
[ind
].w
[1] ||
218 (R256
.w
[1] == ten2k128
[ind
].w
[1] &&
219 R256
.w
[0] < ten2k128
[ind
].w
[0])) {
225 } else if (R256
.w
[3] == 0x0 &&
226 (R256
.w
[2] < ten2k256
[0].w
[2] ||
227 (R256
.w
[2] == ten2k256
[0].w
[2] &&
228 R256
.w
[1] < ten2k256
[0].w
[1]) ||
229 (R256
.w
[2] == ten2k256
[0].w
[2] &&
230 R256
.w
[1] == ten2k256
[0].w
[1] &&
231 R256
.w
[0] < ten2k256
[0].w
[0]))) {
235 // between 40 and 68 digits
236 for (ind
= 1; ind
<= 29; ind
++) {
237 if (R256
.w
[3] < ten2k256
[ind
].w
[3] ||
238 (R256
.w
[3] == ten2k256
[ind
].w
[3] &&
239 R256
.w
[2] < ten2k256
[ind
].w
[2]) ||
240 (R256
.w
[3] == ten2k256
[ind
].w
[3] &&
241 R256
.w
[2] == ten2k256
[ind
].w
[2] &&
242 R256
.w
[1] < ten2k256
[ind
].w
[1]) ||
243 (R256
.w
[3] == ten2k256
[ind
].w
[3] &&
244 R256
.w
[2] == ten2k256
[ind
].w
[2] &&
245 R256
.w
[1] == ten2k256
[ind
].w
[1] &&
246 R256
.w
[0] < ten2k256
[ind
].w
[0])) {
256 // add/subtract C4 and C3 * 10^scale; this may follow a previous rounding, so
257 // use the rounding information from ptr_is_* to avoid a double rounding error
259 add_and_round (int q3
,
269 int *ptr_is_midpoint_lt_even
,
270 int *ptr_is_midpoint_gt_even
,
271 int *ptr_is_inexact_lt_midpoint
,
272 int *ptr_is_inexact_gt_midpoint
,
273 _IDEC_flags
* ptrfpsf
, UINT128
* ptrres
) {
282 int is_midpoint_lt_even
= 0;
283 int is_midpoint_gt_even
= 0;
284 int is_inexact_lt_midpoint
= 0;
285 int is_inexact_gt_midpoint
= 0;
286 int is_midpoint_lt_even0
= 0;
287 int is_midpoint_gt_even0
= 0;
288 int is_inexact_lt_midpoint0
= 0;
289 int is_inexact_gt_midpoint0
= 0;
294 // int gt_half_ulp = 0;
295 UINT128 res
= *ptrres
;
297 // scale C3 up by 10^(q4-delta-q3), 0 <= q4-delta-q3 <= 2*P34-2 = 66
298 scale
= q4
- delta
- q3
; // 0 <= scale <= 66 (or 0 <= scale <= 68 if this
299 // comes from Cases (2), (3), (4), (5), (6), with 0 <= |delta| <= 1
301 // calculate C3 * 10^scale in R256 (it has at most 67 decimal digits for
302 // Cases (15),(16),(17) and at most 69 for Cases (2),(3),(4),(5),(6))
308 } else if (scale
<= 19) { // 10^scale fits in 64 bits
310 P128
.w
[0] = ten2k64
[scale
];
311 __mul_128x128_to_256 (R256
, P128
, C3
);
312 } else if (scale
<= 38) { // 10^scale fits in 128 bits
313 __mul_128x128_to_256 (R256
, ten2k128
[scale
- 20], C3
);
314 } else if (scale
<= 57) { // 39 <= scale <= 57
315 // 10^scale fits in 192 bits but C3 * 10^scale fits in 223 or 230 bits
316 // (10^67 has 223 bits; 10^69 has 230 bits);
317 // must split the computation:
318 // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127
319 // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty
320 // Note that 1 <= scale - 38 <= 19 => 10^(scale-38) fits in 64 bits
321 __mul_64x128_to_128 (R128
, ten2k64
[scale
- 38], C3
);
322 // now multiply R128 by 10^38
323 __mul_128x128_to_256 (R256
, R128
, ten2k128
[18]);
324 } else { // 58 <= scale <= 66
325 // 10^scale takes between 193 and 220 bits,
326 // and C3 * 10^scale fits in 223 bits (10^67/10^69 has 223/230 bits)
327 // must split the computation:
328 // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127
329 // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty
330 // Note that 20 <= scale - 38 <= 30 => 10^(scale-38) fits in 128 bits
331 // Calculate first 10^(scale-38) * C3, which fits in 128 bits; because
332 // 10^(scale-38) takes more than 64 bits, C3 will take less than 64
333 __mul_64x128_to_128 (R128
, C3
.w
[0], ten2k128
[scale
- 58]);
334 // now calculate 10*38 * 10^(scale-38) * C3
335 __mul_128x128_to_256 (R256
, R128
, ten2k128
[18]);
337 // C3 * 10^scale is now in R256
339 // for Cases (15), (16), (17) C4 > C3 * 10^scale because C4 has at least
340 // one extra digit; for Cases (2), (3), (4), (5), or (6) any order is
342 // add/subtract C4 and C3 * 10^scale; the exponent is e4
343 if (p_sign
== z_sign
) { // R256 = C4 + R256
344 // calculate R256 = C4 + C3 * 10^scale = C4 + R256 which is exact,
345 // but may require rounding
346 add256 (C4
, R256
, &R256
);
347 } else { // if (p_sign != z_sign) { // R256 = C4 - R256
348 // calculate R256 = C4 - C3 * 10^scale = C4 - R256 or
349 // R256 = C3 * 10^scale - C4 = R256 - C4 which is exact,
350 // but may require rounding
352 // compare first R256 = C3 * 10^scale and C4
353 if (R256
.w
[3] > C4
.w
[3] || (R256
.w
[3] == C4
.w
[3] && R256
.w
[2] > C4
.w
[2]) ||
354 (R256
.w
[3] == C4
.w
[3] && R256
.w
[2] == C4
.w
[2] && R256
.w
[1] > C4
.w
[1]) ||
355 (R256
.w
[3] == C4
.w
[3] && R256
.w
[2] == C4
.w
[2] && R256
.w
[1] == C4
.w
[1] &&
356 R256
.w
[0] >= C4
.w
[0])) { // C3 * 10^scale >= C4
357 // calculate R256 = C3 * 10^scale - C4 = R256 - C4, which is exact,
358 // but may require rounding
359 sub256 (R256
, C4
, &R256
);
360 // flip p_sign too, because the result has the sign of z
362 } else { // if C4 > C3 * 10^scale
363 // calculate R256 = C4 - C3 * 10^scale = C4 - R256, which is exact,
364 // but may require rounding
365 sub256 (C4
, R256
, &R256
);
367 // if the result is pure zero, the sign depends on the rounding mode
368 // (x*y and z had opposite signs)
369 if (R256
.w
[3] == 0x0ull
&& R256
.w
[2] == 0x0ull
&&
370 R256
.w
[1] == 0x0ull
&& R256
.w
[0] == 0x0ull
) {
371 if (rnd_mode
!= ROUNDING_DOWN
)
372 p_sign
= 0x0000000000000000ull
;
374 p_sign
= 0x8000000000000000ull
;
375 // the exponent is max (e4, expmin)
379 res
.w
[1] = p_sign
| ((UINT64
) (e4
+ 6176) << 49);
386 // determine the number of decimal digits in R256
387 ind
= nr_digits256 (R256
);
389 // the exact result is (-1)^p_sign * R256 * 10^e4 where q (R256) = ind;
390 // round to the destination precision, with unbounded exponent
393 // result rounded to the destination precision with unbounded exponent
395 if (ind
+ e4
< p34
+ expmin
) {
396 is_tiny
= 1; // applies to all rounding modes
398 res
.w
[1] = p_sign
| ((UINT64
) (e4
+ 6176) << 49) | R256
.w
[1];
399 res
.w
[0] = R256
.w
[0];
400 // Note: res is correct only if expmin <= e4 <= expmax
401 } else { // if (ind > p34)
402 // if more than P digits, round to nearest to P digits
403 // round R256 to p34 digits
404 x0
= ind
- p34
; // 1 <= x0 <= 34 as 35 <= ind <= 68
406 P128
.w
[1] = R256
.w
[1];
407 P128
.w
[0] = R256
.w
[0];
408 round128_19_38 (ind
, x0
, P128
, &R128
, &incr_exp
,
409 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
410 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
);
411 } else if (ind
<= 57) {
412 P192
.w
[2] = R256
.w
[2];
413 P192
.w
[1] = R256
.w
[1];
414 P192
.w
[0] = R256
.w
[0];
415 round192_39_57 (ind
, x0
, P192
, &R192
, &incr_exp
,
416 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
417 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
);
418 R128
.w
[1] = R192
.w
[1];
419 R128
.w
[0] = R192
.w
[0];
420 } else { // if (ind <= 68)
421 round256_58_76 (ind
, x0
, R256
, &R256
, &incr_exp
,
422 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
423 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
);
424 R128
.w
[1] = R256
.w
[1];
425 R128
.w
[0] = R256
.w
[0];
427 // the rounded result has p34 = 34 digits
428 e4
= e4
+ x0
+ incr_exp
;
429 if (rnd_mode
== ROUNDING_TO_NEAREST
) {
431 is_tiny
= 1; // for other rounding modes apply correction
434 // for RM, RP, RZ, RA apply correction in order to determine tininess
435 // but do not save the result; apply the correction to
436 // (-1)^p_sign * significand * 10^0
437 P128
.w
[1] = p_sign
| 0x3040000000000000ull
| R128
.w
[1];
438 P128
.w
[0] = R128
.w
[0];
439 rounding_correction (rnd_mode
,
440 is_inexact_lt_midpoint
,
441 is_inexact_gt_midpoint
, is_midpoint_lt_even
,
442 is_midpoint_gt_even
, 0, &P128
, ptrfpsf
);
443 scale
= ((P128
.w
[1] & MASK_EXP
) >> 49) - 6176; // -1, 0, or +1
444 // the number of digits in the significand is p34 = 34
445 if (e4
+ scale
< expmin
) {
449 ind
= p34
; // the number of decimal digits in the signifcand of res
450 res
.w
[1] = p_sign
| ((UINT64
) (e4
+ 6176) << 49) | R128
.w
[1]; // RN
451 res
.w
[0] = R128
.w
[0];
452 // Note: res is correct only if expmin <= e4 <= expmax
453 // set the inexact flag after rounding with bounded exponent, if any
455 // at this point we have the result rounded with unbounded exponent in
456 // res and we know its tininess:
457 // res = (-1)^p_sign * significand * 10^e4,
458 // where q (significand) = ind <= p34
459 // Note: res is correct only if expmin <= e4 <= expmax
461 // check for overflow if RN
462 if (rnd_mode
== ROUNDING_TO_NEAREST
&& (ind
+ e4
) > (p34
+ expmax
)) {
463 res
.w
[1] = p_sign
| 0x7800000000000000ull
;
464 res
.w
[0] = 0x0000000000000000ull
;
466 *ptrfpsf
|= (INEXACT_EXCEPTION
| OVERFLOW_EXCEPTION
);
467 return; // BID_RETURN (res)
468 } // else not overflow or not RN, so continue
470 // if (e4 >= expmin) we have the result rounded with bounded exponent
472 x0
= expmin
- e4
; // x0 >= 1; the number of digits to chop off of res
473 // where the result rounded [at most] once is
474 // (-1)^p_sign * significand_res * 10^e4
476 // avoid double rounding error
477 is_inexact_lt_midpoint0
= is_inexact_lt_midpoint
;
478 is_inexact_gt_midpoint0
= is_inexact_gt_midpoint
;
479 is_midpoint_lt_even0
= is_midpoint_lt_even
;
480 is_midpoint_gt_even0
= is_midpoint_gt_even
;
481 is_inexact_lt_midpoint
= 0;
482 is_inexact_gt_midpoint
= 0;
483 is_midpoint_lt_even
= 0;
484 is_midpoint_gt_even
= 0;
487 // nothing is left of res when moving the decimal point left x0 digits
488 is_inexact_lt_midpoint
= 1;
489 res
.w
[1] = p_sign
| 0x0000000000000000ull
;
490 res
.w
[0] = 0x0000000000000000ull
;
492 } else if (x0
== ind
) { // 1 <= x0 = ind <= p34 = 34
493 // this is <, =, or > 1/2 ulp
494 // compare the ind-digit value in the significand of res with
495 // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is
496 // less than, equal to, or greater than 1/2 ulp (significand of res)
497 R128
.w
[1] = res
.w
[1] & MASK_COEFF
;
498 R128
.w
[0] = res
.w
[0];
500 if (R128
.w
[0] < midpoint64
[ind
- 1]) { // < 1/2 ulp
502 is_inexact_lt_midpoint
= 1;
503 } else if (R128
.w
[0] == midpoint64
[ind
- 1]) { // = 1/2 ulp
505 is_midpoint_gt_even
= 1;
506 } else { // > 1/2 ulp
508 is_inexact_gt_midpoint
= 1;
510 } else { // if (ind <= 38) {
511 if (R128
.w
[1] < midpoint128
[ind
- 20].w
[1] ||
512 (R128
.w
[1] == midpoint128
[ind
- 20].w
[1] &&
513 R128
.w
[0] < midpoint128
[ind
- 20].w
[0])) { // < 1/2 ulp
515 is_inexact_lt_midpoint
= 1;
516 } else if (R128
.w
[1] == midpoint128
[ind
- 20].w
[1] &&
517 R128
.w
[0] == midpoint128
[ind
- 20].w
[0]) { // = 1/2 ulp
519 is_midpoint_gt_even
= 1;
520 } else { // > 1/2 ulp
522 is_inexact_gt_midpoint
= 1;
525 if (lt_half_ulp
|| eq_half_ulp
) {
526 // res = +0.0 * 10^expmin
527 res
.w
[1] = 0x0000000000000000ull
;
528 res
.w
[0] = 0x0000000000000000ull
;
529 } else { // if (gt_half_ulp)
530 // res = +1 * 10^expmin
531 res
.w
[1] = 0x0000000000000000ull
;
532 res
.w
[0] = 0x0000000000000001ull
;
534 res
.w
[1] = p_sign
| res
.w
[1];
536 } else { // if (1 <= x0 <= ind - 1 <= 33)
537 // round the ind-digit result to ind - x0 digits
539 if (ind
<= 18) { // 2 <= ind <= 18
540 round64_2_18 (ind
, x0
, res
.w
[0], &R64
, &incr_exp
,
541 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
542 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
);
545 } else if (ind
<= 38) {
546 P128
.w
[1] = res
.w
[1] & MASK_COEFF
;
547 P128
.w
[0] = res
.w
[0];
548 round128_19_38 (ind
, x0
, P128
, &res
, &incr_exp
,
549 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
550 &is_inexact_lt_midpoint
,
551 &is_inexact_gt_midpoint
);
553 e4
= e4
+ x0
; // expmin
554 // we want the exponent to be expmin, so if incr_exp = 1 then
555 // multiply the rounded result by 10 - it will still fit in 113 bits
558 P128
.w
[1] = res
.w
[1] & MASK_COEFF
;
559 P128
.w
[0] = res
.w
[0];
560 __mul_64x128_to_128 (res
, ten2k64
[1], P128
);
563 p_sign
| ((UINT64
) (e4
+ 6176) << 49) | (res
.w
[1] & MASK_COEFF
);
564 // avoid a double rounding error
565 if ((is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) &&
566 is_midpoint_lt_even
) { // double rounding error upward
569 if (res
.w
[0] == 0xffffffffffffffffull
)
571 // Note: a double rounding error upward is not possible; for this
572 // the result after the first rounding would have to be 99...95
573 // (35 digits in all), possibly followed by a number of zeros; this
574 // is not possible in Cases (2)-(6) or (15)-(17) which may get here
575 is_midpoint_lt_even
= 0;
576 is_inexact_lt_midpoint
= 1;
577 } else if ((is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) &&
578 is_midpoint_gt_even
) { // double rounding error downward
583 is_midpoint_gt_even
= 0;
584 is_inexact_gt_midpoint
= 1;
585 } else if (!is_midpoint_lt_even
&& !is_midpoint_gt_even
&&
586 !is_inexact_lt_midpoint
&& !is_inexact_gt_midpoint
) {
587 // if this second rounding was exact the result may still be
588 // inexact because of the first rounding
589 if (is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) {
590 is_inexact_gt_midpoint
= 1;
592 if (is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) {
593 is_inexact_lt_midpoint
= 1;
595 } else if (is_midpoint_gt_even
&&
596 (is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
)) {
597 // pulled up to a midpoint
598 is_inexact_lt_midpoint
= 1;
599 is_inexact_gt_midpoint
= 0;
600 is_midpoint_lt_even
= 0;
601 is_midpoint_gt_even
= 0;
602 } else if (is_midpoint_lt_even
&&
603 (is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
)) {
604 // pulled down to a midpoint
605 is_inexact_lt_midpoint
= 0;
606 is_inexact_gt_midpoint
= 1;
607 is_midpoint_lt_even
= 0;
608 is_midpoint_gt_even
= 0;
614 // res contains the correct result
615 // apply correction if not rounding to nearest
616 if (rnd_mode
!= ROUNDING_TO_NEAREST
) {
617 rounding_correction (rnd_mode
,
618 is_inexact_lt_midpoint
, is_inexact_gt_midpoint
,
619 is_midpoint_lt_even
, is_midpoint_gt_even
,
622 if (is_midpoint_lt_even
|| is_midpoint_gt_even
||
623 is_inexact_lt_midpoint
|| is_inexact_gt_midpoint
) {
624 // set the inexact flag
625 *ptrfpsf
|= INEXACT_EXCEPTION
;
627 *ptrfpsf
|= UNDERFLOW_EXCEPTION
;
630 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
631 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
632 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
633 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
639 #if DECIMAL_CALL_BY_REFERENCE
641 bid128_ext_fma (int *ptr_is_midpoint_lt_even
,
642 int *ptr_is_midpoint_gt_even
,
643 int *ptr_is_inexact_lt_midpoint
,
644 int *ptr_is_inexact_gt_midpoint
, UINT128
* pres
,
645 UINT128
* px
, UINT128
* py
,
647 pz _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
649 UINT128 x
= *px
, y
= *py
, z
= *pz
;
650 #if !DECIMAL_GLOBAL_ROUNDING
651 unsigned int rnd_mode
= *prnd_mode
;
655 bid128_ext_fma (int *ptr_is_midpoint_lt_even
,
656 int *ptr_is_midpoint_gt_even
,
657 int *ptr_is_inexact_lt_midpoint
,
658 int *ptr_is_inexact_gt_midpoint
, UINT128 x
, UINT128 y
,
659 UINT128 z _RND_MODE_PARAM _EXC_FLAGS_PARAM
660 _EXC_MASKS_PARAM _EXC_INFO_PARAM
) {
663 UINT128 res
= { {0xbaddbaddbaddbaddull
, 0xbaddbaddbaddbaddull
} };
664 UINT64 x_sign
, y_sign
, z_sign
, p_sign
, tmp_sign
;
665 UINT64 x_exp
= 0, y_exp
= 0, z_exp
= 0, p_exp
;
669 int q1
= 0, q2
= 0, q3
= 0, q4
;
671 int scale
, ind
, delta
, x0
;
672 int p34
= P34
; // used to modify the limit on the number of digits
674 int x_nr_bits
, y_nr_bits
, z_nr_bits
;
675 unsigned int save_fpsf
;
676 int is_midpoint_lt_even
= 0, is_midpoint_gt_even
= 0;
677 int is_inexact_lt_midpoint
= 0, is_inexact_gt_midpoint
= 0;
678 int is_midpoint_lt_even0
= 0, is_midpoint_gt_even0
= 0;
679 int is_inexact_lt_midpoint0
= 0, is_inexact_gt_midpoint0
= 0;
691 // the following are based on the table of special cases for fma; the NaN
692 // behavior is similar to that of the IA-64 Architecture fma
694 // identify cases where at least one operand is NaN
699 if ((y
.w
[1] & MASK_NAN
) == MASK_NAN
) { // y is NAN
700 // if x = {0, f, inf, NaN}, y = NaN, z = {0, f, inf, NaN} then res = Q (y)
701 // check first for non-canonical NaN payload
702 if (((y
.w
[1] & 0x00003fffffffffffull
) > 0x0000314dc6448d93ull
) ||
703 (((y
.w
[1] & 0x00003fffffffffffull
) == 0x0000314dc6448d93ull
) &&
704 (y
.w
[0] > 0x38c15b09ffffffffull
))) {
705 y
.w
[1] = y
.w
[1] & 0xffffc00000000000ull
;
708 if ((y
.w
[1] & MASK_SNAN
) == MASK_SNAN
) { // y is SNAN
710 *pfpsf
|= INVALID_EXCEPTION
;
712 res
.w
[1] = y
.w
[1] & 0xfc003fffffffffffull
; // clear out also G[6]-G[16]
714 } else { // y is QNaN
716 res
.w
[1] = y
.w
[1] & 0xfc003fffffffffffull
; // clear out G[6]-G[16]
718 // if z = SNaN or x = SNaN signal invalid exception
719 if ((z
.w
[1] & MASK_SNAN
) == MASK_SNAN
||
720 (x
.w
[1] & MASK_SNAN
) == MASK_SNAN
) {
722 *pfpsf
|= INVALID_EXCEPTION
;
725 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
726 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
727 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
728 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
731 } else if ((z
.w
[1] & MASK_NAN
) == MASK_NAN
) { // z is NAN
732 // if x = {0, f, inf, NaN}, y = {0, f, inf}, z = NaN then res = Q (z)
733 // check first for non-canonical NaN payload
734 if (((z
.w
[1] & 0x00003fffffffffffull
) > 0x0000314dc6448d93ull
) ||
735 (((z
.w
[1] & 0x00003fffffffffffull
) == 0x0000314dc6448d93ull
) &&
736 (z
.w
[0] > 0x38c15b09ffffffffull
))) {
737 z
.w
[1] = z
.w
[1] & 0xffffc00000000000ull
;
740 if ((z
.w
[1] & MASK_SNAN
) == MASK_SNAN
) { // z is SNAN
742 *pfpsf
|= INVALID_EXCEPTION
;
744 res
.w
[1] = z
.w
[1] & 0xfc003fffffffffffull
; // clear out also G[6]-G[16]
746 } else { // z is QNaN
748 res
.w
[1] = z
.w
[1] & 0xfc003fffffffffffull
; // clear out G[6]-G[16]
750 // if x = SNaN signal invalid exception
751 if ((x
.w
[1] & MASK_SNAN
) == MASK_SNAN
) {
753 *pfpsf
|= INVALID_EXCEPTION
;
756 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
757 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
758 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
759 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
762 } else if ((x
.w
[1] & MASK_NAN
) == MASK_NAN
) { // x is NAN
763 // if x = NaN, y = {0, f, inf}, z = {0, f, inf} then res = Q (x)
764 // check first for non-canonical NaN payload
765 if (((x
.w
[1] & 0x00003fffffffffffull
) > 0x0000314dc6448d93ull
) ||
766 (((x
.w
[1] & 0x00003fffffffffffull
) == 0x0000314dc6448d93ull
) &&
767 (x
.w
[0] > 0x38c15b09ffffffffull
))) {
768 x
.w
[1] = x
.w
[1] & 0xffffc00000000000ull
;
771 if ((x
.w
[1] & MASK_SNAN
) == MASK_SNAN
) { // x is SNAN
773 *pfpsf
|= INVALID_EXCEPTION
;
775 res
.w
[1] = x
.w
[1] & 0xfc003fffffffffffull
; // clear out also G[6]-G[16]
777 } else { // x is QNaN
779 res
.w
[1] = x
.w
[1] & 0xfc003fffffffffffull
; // clear out G[6]-G[16]
782 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
783 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
784 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
785 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
789 // x, y, z are 0, f, or inf but not NaN => unpack the arguments and check
790 // for non-canonical values
792 x_sign
= x
.w
[1] & MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
793 C1
.w
[1] = x
.w
[1] & MASK_COEFF
;
795 if ((x
.w
[1] & MASK_ANY_INF
) != MASK_INF
) { // x != inf
796 // if x is not infinity check for non-canonical values - treated as zero
797 if ((x
.w
[1] & 0x6000000000000000ull
) == 0x6000000000000000ull
) { // G0_G1=11
799 x_exp
= (x
.w
[1] << 2) & MASK_EXP
; // biased and shifted left 49 bits
800 C1
.w
[1] = 0; // significand high
801 C1
.w
[0] = 0; // significand low
802 } else { // G0_G1 != 11
803 x_exp
= x
.w
[1] & MASK_EXP
; // biased and shifted left 49 bits
804 if (C1
.w
[1] > 0x0001ed09bead87c0ull
||
805 (C1
.w
[1] == 0x0001ed09bead87c0ull
&&
806 C1
.w
[0] > 0x378d8e63ffffffffull
)) {
807 // x is non-canonical if coefficient is larger than 10^34 -1
810 } else { // canonical
815 y_sign
= y
.w
[1] & MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
816 C2
.w
[1] = y
.w
[1] & MASK_COEFF
;
818 if ((y
.w
[1] & MASK_ANY_INF
) != MASK_INF
) { // y != inf
819 // if y is not infinity check for non-canonical values - treated as zero
820 if ((y
.w
[1] & 0x6000000000000000ull
) == 0x6000000000000000ull
) { // G0_G1=11
822 y_exp
= (y
.w
[1] << 2) & MASK_EXP
; // biased and shifted left 49 bits
823 C2
.w
[1] = 0; // significand high
824 C2
.w
[0] = 0; // significand low
825 } else { // G0_G1 != 11
826 y_exp
= y
.w
[1] & MASK_EXP
; // biased and shifted left 49 bits
827 if (C2
.w
[1] > 0x0001ed09bead87c0ull
||
828 (C2
.w
[1] == 0x0001ed09bead87c0ull
&&
829 C2
.w
[0] > 0x378d8e63ffffffffull
)) {
830 // y is non-canonical if coefficient is larger than 10^34 -1
833 } else { // canonical
838 z_sign
= z
.w
[1] & MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
839 C3
.w
[1] = z
.w
[1] & MASK_COEFF
;
841 if ((z
.w
[1] & MASK_ANY_INF
) != MASK_INF
) { // z != inf
842 // if z is not infinity check for non-canonical values - treated as zero
843 if ((z
.w
[1] & 0x6000000000000000ull
) == 0x6000000000000000ull
) { // G0_G1=11
845 z_exp
= (z
.w
[1] << 2) & MASK_EXP
; // biased and shifted left 49 bits
846 C3
.w
[1] = 0; // significand high
847 C3
.w
[0] = 0; // significand low
848 } else { // G0_G1 != 11
849 z_exp
= z
.w
[1] & MASK_EXP
; // biased and shifted left 49 bits
850 if (C3
.w
[1] > 0x0001ed09bead87c0ull
||
851 (C3
.w
[1] == 0x0001ed09bead87c0ull
&&
852 C3
.w
[0] > 0x378d8e63ffffffffull
)) {
853 // z is non-canonical if coefficient is larger than 10^34 -1
856 } else { // canonical
862 p_sign
= x_sign
^ y_sign
; // sign of the product
864 // identify cases where at least one operand is infinity
866 if ((x
.w
[1] & MASK_ANY_INF
) == MASK_INF
) { // x = inf
867 if ((y
.w
[1] & MASK_ANY_INF
) == MASK_INF
) { // y = inf
868 if ((z
.w
[1] & MASK_ANY_INF
) == MASK_INF
) { // z = inf
869 if (p_sign
== z_sign
) {
870 res
.w
[1] = z_sign
| MASK_INF
;
873 // return QNaN Indefinite
874 res
.w
[1] = 0x7c00000000000000ull
;
875 res
.w
[0] = 0x0000000000000000ull
;
877 *pfpsf
|= INVALID_EXCEPTION
;
879 } else { // z = 0 or z = f
880 res
.w
[1] = p_sign
| MASK_INF
;
883 } else if (C2
.w
[1] != 0 || C2
.w
[0] != 0) { // y = f
884 if ((z
.w
[1] & MASK_ANY_INF
) == MASK_INF
) { // z = inf
885 if (p_sign
== z_sign
) {
886 res
.w
[1] = z_sign
| MASK_INF
;
889 // return QNaN Indefinite
890 res
.w
[1] = 0x7c00000000000000ull
;
891 res
.w
[0] = 0x0000000000000000ull
;
893 *pfpsf
|= INVALID_EXCEPTION
;
895 } else { // z = 0 or z = f
896 res
.w
[1] = p_sign
| MASK_INF
;
900 // return QNaN Indefinite
901 res
.w
[1] = 0x7c00000000000000ull
;
902 res
.w
[0] = 0x0000000000000000ull
;
904 *pfpsf
|= INVALID_EXCEPTION
;
906 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
907 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
908 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
909 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
912 } else if ((y
.w
[1] & MASK_ANY_INF
) == MASK_INF
) { // y = inf
913 if ((z
.w
[1] & MASK_ANY_INF
) == MASK_INF
) { // z = inf
914 // x = f, necessarily
915 if ((p_sign
!= z_sign
)
916 || (C1
.w
[1] == 0x0ull
&& C1
.w
[0] == 0x0ull
)) {
917 // return QNaN Indefinite
918 res
.w
[1] = 0x7c00000000000000ull
;
919 res
.w
[0] = 0x0000000000000000ull
;
921 *pfpsf
|= INVALID_EXCEPTION
;
923 res
.w
[1] = z_sign
| MASK_INF
;
926 } else if (C1
.w
[1] == 0x0 && C1
.w
[0] == 0x0) { // x = 0
928 // return QNaN Indefinite
929 res
.w
[1] = 0x7c00000000000000ull
;
930 res
.w
[0] = 0x0000000000000000ull
;
932 *pfpsf
|= INVALID_EXCEPTION
;
934 // x = f and z = 0, f, necessarily
935 res
.w
[1] = p_sign
| MASK_INF
;
938 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
939 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
940 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
941 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
944 } else if ((z
.w
[1] & MASK_ANY_INF
) == MASK_INF
) { // z = inf
945 // x = 0, f and y = 0, f, necessarily
946 res
.w
[1] = z_sign
| MASK_INF
;
948 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
949 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
950 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
951 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
956 true_p_exp
= (x_exp
>> 49) - 6176 + (y_exp
>> 49) - 6176;
957 if (true_p_exp
< -6176)
958 p_exp
= 0; // cannot be less than EXP_MIN
960 p_exp
= (UINT64
) (true_p_exp
+ 6176) << 49;
962 if (((C1
.w
[1] == 0x0 && C1
.w
[0] == 0x0) || (C2
.w
[1] == 0x0 && C2
.w
[0] == 0x0)) && C3
.w
[1] == 0x0 && C3
.w
[0] == 0x0) { // (x = 0 or y = 0) and z = 0
965 res
.w
[1] = p_exp
; // preferred exponent
967 res
.w
[1] = z_exp
; // preferred exponent
968 if (p_sign
== z_sign
) {
971 } else { // x * y and z have opposite signs
972 if (rnd_mode
== ROUNDING_DOWN
) {
974 res
.w
[1] |= MASK_SIGN
;
982 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
983 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
984 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
985 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
989 // from this point on, we may need to know the number of decimal digits
990 // in the significands of x, y, z when x, y, z != 0
992 if (C1
.w
[1] != 0 || C1
.w
[0] != 0) { // x = f (non-zero finite)
993 // q1 = nr. of decimal digits in x
994 // determine first the nr. of bits in x
996 if (C1
.w
[0] >= 0x0020000000000000ull
) { // x >= 2^53
997 // split the 64-bit value in two 32-bit halves to avoid rounding errors
998 if (C1
.w
[0] >= 0x0000000100000000ull
) { // x >= 2^32
999 tmp
.d
= (double) (C1
.w
[0] >> 32); // exact conversion
1001 33 + ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1002 } else { // x < 2^32
1003 tmp
.d
= (double) (C1
.w
[0]); // exact conversion
1005 1 + ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1007 } else { // if x < 2^53
1008 tmp
.d
= (double) C1
.w
[0]; // exact conversion
1010 1 + ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1012 } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
1013 tmp
.d
= (double) C1
.w
[1]; // exact conversion
1015 65 + ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1017 q1
= nr_digits
[x_nr_bits
- 1].digits
;
1019 q1
= nr_digits
[x_nr_bits
- 1].digits1
;
1020 if (C1
.w
[1] > nr_digits
[x_nr_bits
- 1].threshold_hi
||
1021 (C1
.w
[1] == nr_digits
[x_nr_bits
- 1].threshold_hi
&&
1022 C1
.w
[0] >= nr_digits
[x_nr_bits
- 1].threshold_lo
))
1027 if (C2
.w
[1] != 0 || C2
.w
[0] != 0) { // y = f (non-zero finite)
1029 if (C2
.w
[0] >= 0x0020000000000000ull
) { // y >= 2^53
1030 // split the 64-bit value in two 32-bit halves to avoid rounding errors
1031 if (C2
.w
[0] >= 0x0000000100000000ull
) { // y >= 2^32
1032 tmp
.d
= (double) (C2
.w
[0] >> 32); // exact conversion
1034 32 + ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1035 } else { // y < 2^32
1036 tmp
.d
= (double) C2
.w
[0]; // exact conversion
1038 ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1040 } else { // if y < 2^53
1041 tmp
.d
= (double) C2
.w
[0]; // exact conversion
1043 ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1045 } else { // C2.w[1] != 0 => nr. bits = 64 + nr_bits (C2.w[1])
1046 tmp
.d
= (double) C2
.w
[1]; // exact conversion
1048 64 + ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1051 q2
= nr_digits
[y_nr_bits
].digits
;
1053 q2
= nr_digits
[y_nr_bits
].digits1
;
1054 if (C2
.w
[1] > nr_digits
[y_nr_bits
].threshold_hi
||
1055 (C2
.w
[1] == nr_digits
[y_nr_bits
].threshold_hi
&&
1056 C2
.w
[0] >= nr_digits
[y_nr_bits
].threshold_lo
))
1061 if (C3
.w
[1] != 0 || C3
.w
[0] != 0) { // z = f (non-zero finite)
1063 if (C3
.w
[0] >= 0x0020000000000000ull
) { // z >= 2^53
1064 // split the 64-bit value in two 32-bit halves to avoid rounding errors
1065 if (C3
.w
[0] >= 0x0000000100000000ull
) { // z >= 2^32
1066 tmp
.d
= (double) (C3
.w
[0] >> 32); // exact conversion
1068 32 + ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1069 } else { // z < 2^32
1070 tmp
.d
= (double) C3
.w
[0]; // exact conversion
1072 ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1074 } else { // if z < 2^53
1075 tmp
.d
= (double) C3
.w
[0]; // exact conversion
1077 ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1079 } else { // C3.w[1] != 0 => nr. bits = 64 + nr_bits (C3.w[1])
1080 tmp
.d
= (double) C3
.w
[1]; // exact conversion
1082 64 + ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1085 q3
= nr_digits
[z_nr_bits
].digits
;
1087 q3
= nr_digits
[z_nr_bits
].digits1
;
1088 if (C3
.w
[1] > nr_digits
[z_nr_bits
].threshold_hi
||
1089 (C3
.w
[1] == nr_digits
[z_nr_bits
].threshold_hi
&&
1090 C3
.w
[0] >= nr_digits
[z_nr_bits
].threshold_lo
))
1095 if ((C1
.w
[1] == 0x0 && C1
.w
[0] == 0x0) ||
1096 (C2
.w
[1] == 0x0 && C2
.w
[0] == 0x0)) {
1098 // z = f, necessarily; for 0 + z return z, with the preferred exponent
1099 // the result is z, but need to get the preferred exponent
1100 if (z_exp
<= p_exp
) { // the preferred exponent is z_exp
1101 res
.w
[1] = z_sign
| (z_exp
& MASK_EXP
) | C3
.w
[1];
1103 } else { // if (p_exp < z_exp) the preferred exponent is p_exp
1104 // return (C3 * 10^scale) * 10^(z_exp - scale)
1105 // where scale = min (p34-q3, (z_exp-p_exp) >> 49)
1107 ind
= (z_exp
- p_exp
) >> 49;
1111 res
.w
[1] = z
.w
[1]; // & MASK_COEFF, which is redundant
1113 } else if (q3
<= 19) { // z fits in 64 bits
1114 if (scale
<= 19) { // 10^scale fits in 64 bits
1115 // 64 x 64 C3.w[0] * ten2k64[scale]
1116 __mul_64x64_to_128MACH (res
, C3
.w
[0], ten2k64
[scale
]);
1117 } else { // 10^scale fits in 128 bits
1118 // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1119 __mul_128x64_to_128 (res
, C3
.w
[0], ten2k128
[scale
- 20]);
1121 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
1122 // 64 x 128 ten2k64[scale] * C3
1123 __mul_128x64_to_128 (res
, ten2k64
[scale
], C3
);
1125 // subtract scale from the exponent
1126 z_exp
= z_exp
- ((UINT64
) scale
<< 49);
1127 res
.w
[1] = z_sign
| (z_exp
& MASK_EXP
) | res
.w
[1];
1129 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
1130 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
1131 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
1132 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
1136 ; // continue with x = f, y = f, z = 0 or x = f, y = f, z = f
1139 e1
= (x_exp
>> 49) - 6176; // unbiased exponent of x
1140 e2
= (y_exp
>> 49) - 6176; // unbiased exponent of y
1141 e3
= (z_exp
>> 49) - 6176; // unbiased exponent of z
1142 e4
= e1
+ e2
; // unbiased exponent of the exact x * y
1144 // calculate C1 * C2 and its number of decimal digits, q4
1146 // the exact product has either q1 + q2 - 1 or q1 + q2 decimal digits
1147 // where 2 <= q1 + q2 <= 68
1148 // calculate C4 = C1 * C2 and determine q
1149 C4
.w
[3] = C4
.w
[2] = C4
.w
[1] = C4
.w
[0] = 0;
1150 if (q1
+ q2
<= 19) { // if 2 <= q1 + q2 <= 19, C4 = C1 * C2 fits in 64 bits
1151 C4
.w
[0] = C1
.w
[0] * C2
.w
[0];
1152 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
1153 if (C4
.w
[0] < ten2k64
[q1
+ q2
- 1])
1154 q4
= q1
+ q2
- 1; // q4 in [1, 18]
1156 q4
= q1
+ q2
; // q4 in [2, 19]
1157 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1158 } else if (q1
+ q2
== 20) { // C4 = C1 * C2 fits in 64 or 128 bits
1159 // q1 <= 19 and q2 <= 19 so both C1 and C2 fit in 64 bits
1160 __mul_64x64_to_128MACH (C4
, C1
.w
[0], C2
.w
[0]);
1161 // if C4 < 10^(q1+q2-1) = 10^19 then q4 = q1+q2-1 = 19 else q4 = q1+q2 = 20
1162 if (C4
.w
[1] == 0 && C4
.w
[0] < ten2k64
[19]) { // 19 = q1+q2-1
1163 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1164 q4
= 19; // 19 = q1 + q2 - 1
1166 // if (C4.w[1] == 0)
1167 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1169 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1170 q4
= 20; // 20 = q1 + q2
1172 } else if (q1
+ q2
<= 38) { // 21 <= q1 + q2 <= 38
1173 // C4 = C1 * C2 fits in 64 or 128 bits
1174 // (64 bits possibly, but only when q1 + q2 = 21 and C4 has 20 digits)
1175 // at least one of C1, C2 has at most 19 decimal digits & fits in 64 bits
1177 __mul_128x64_to_128 (C4
, C1
.w
[0], C2
);
1178 } else { // q2 <= 19
1179 __mul_128x64_to_128 (C4
, C2
.w
[0], C1
);
1181 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
1182 if (C4
.w
[1] < ten2k128
[q1
+ q2
- 21].w
[1] ||
1183 (C4
.w
[1] == ten2k128
[q1
+ q2
- 21].w
[1] &&
1184 C4
.w
[0] < ten2k128
[q1
+ q2
- 21].w
[0])) {
1185 // if (C4.w[1] == 0) // q4 = 20, necessarily
1186 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1188 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1189 q4
= q1
+ q2
- 1; // q4 in [20, 37]
1191 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1192 q4
= q1
+ q2
; // q4 in [21, 38]
1194 } else if (q1
+ q2
== 39) { // C4 = C1 * C2 fits in 128 or 192 bits
1195 // both C1 and C2 fit in 128 bits (actually in 113 bits)
1196 // may replace this by 128x128_to192
1197 __mul_128x128_to_256 (C4
, C1
, C2
); // C4.w[3] is 0
1198 // if C4 < 10^(q1+q2-1) = 10^38 then q4 = q1+q2-1 = 38 else q4 = q1+q2 = 39
1199 if (C4
.w
[2] == 0 && (C4
.w
[1] < ten2k128
[18].w
[1] ||
1200 (C4
.w
[1] == ten2k128
[18].w
[1]
1201 && C4
.w
[0] < ten2k128
[18].w
[0]))) {
1202 // 18 = 38 - 20 = q1+q2-1 - 20
1203 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1204 q4
= 38; // 38 = q1 + q2 - 1
1206 // if (C4.w[2] == 0)
1207 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1209 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1210 q4
= 39; // 39 = q1 + q2
1212 } else if (q1
+ q2
<= 57) { // 40 <= q1 + q2 <= 57
1213 // C4 = C1 * C2 fits in 128 or 192 bits
1214 // (128 bits possibly, but only when q1 + q2 = 40 and C4 has 39 digits)
1215 // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one
1216 // may fit in 64 bits
1217 if (C1
.w
[1] == 0) { // C1 fits in 64 bits
1218 // __mul_64x128_full (REShi64, RESlo128, A64, B128)
1219 __mul_64x128_full (C4
.w
[2], C4
, C1
.w
[0], C2
);
1220 } else if (C2
.w
[1] == 0) { // C2 fits in 64 bits
1221 // __mul_64x128_full (REShi64, RESlo128, A64, B128)
1222 __mul_64x128_full (C4
.w
[2], C4
, C2
.w
[0], C1
);
1223 } else { // both C1 and C2 require 128 bits
1224 // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1);
1225 __mul_128x128_to_256 (C4
, C1
, C2
); // C4.w[3] = 0
1227 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
1228 if (C4
.w
[2] < ten2k256
[q1
+ q2
- 40].w
[2] ||
1229 (C4
.w
[2] == ten2k256
[q1
+ q2
- 40].w
[2] &&
1230 (C4
.w
[1] < ten2k256
[q1
+ q2
- 40].w
[1] ||
1231 (C4
.w
[1] == ten2k256
[q1
+ q2
- 40].w
[1] &&
1232 C4
.w
[0] < ten2k256
[q1
+ q2
- 40].w
[0])))) {
1233 // if (C4.w[2] == 0) // q4 = 39, necessarily
1234 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1236 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1237 q4
= q1
+ q2
- 1; // q4 in [39, 56]
1239 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1240 q4
= q1
+ q2
; // q4 in [40, 57]
1242 } else if (q1
+ q2
== 58) { // C4 = C1 * C2 fits in 192 or 256 bits
1243 // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one
1244 // may fit in 64 bits
1245 if (C1
.w
[1] == 0) { // C1 * C2 will fit in 192 bits
1246 __mul_64x128_full (C4
.w
[2], C4
, C1
.w
[0], C2
); // may use 64x128_to_192
1247 } else if (C2
.w
[1] == 0) { // C1 * C2 will fit in 192 bits
1248 __mul_64x128_full (C4
.w
[2], C4
, C2
.w
[0], C1
); // may use 64x128_to_192
1249 } else { // C1 * C2 will fit in 192 bits or in 256 bits
1250 __mul_128x128_to_256 (C4
, C1
, C2
);
1252 // if C4 < 10^(q1+q2-1) = 10^57 then q4 = q1+q2-1 = 57 else q4 = q1+q2 = 58
1253 if (C4
.w
[3] == 0 && (C4
.w
[2] < ten2k256
[18].w
[2] ||
1254 (C4
.w
[2] == ten2k256
[18].w
[2]
1255 && (C4
.w
[1] < ten2k256
[18].w
[1]
1256 || (C4
.w
[1] == ten2k256
[18].w
[1]
1257 && C4
.w
[0] < ten2k256
[18].w
[0]))))) {
1258 // 18 = 57 - 39 = q1+q2-1 - 39
1259 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1260 q4
= 57; // 57 = q1 + q2 - 1
1262 // if (C4.w[3] == 0)
1263 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1265 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
1266 q4
= 58; // 58 = q1 + q2
1268 } else { // if 59 <= q1 + q2 <= 68
1269 // C4 = C1 * C2 fits in 192 or 256 bits
1270 // (192 bits possibly, but only when q1 + q2 = 59 and C4 has 58 digits)
1271 // both C1 and C2 fit in 128 bits (actually in 113 bits); none fits in
1273 // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1);
1274 __mul_128x128_to_256 (C4
, C1
, C2
); // C4.w[3] = 0
1275 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
1276 if (C4
.w
[3] < ten2k256
[q1
+ q2
- 40].w
[3] ||
1277 (C4
.w
[3] == ten2k256
[q1
+ q2
- 40].w
[3] &&
1278 (C4
.w
[2] < ten2k256
[q1
+ q2
- 40].w
[2] ||
1279 (C4
.w
[2] == ten2k256
[q1
+ q2
- 40].w
[2] &&
1280 (C4
.w
[1] < ten2k256
[q1
+ q2
- 40].w
[1] ||
1281 (C4
.w
[1] == ten2k256
[q1
+ q2
- 40].w
[1] &&
1282 C4
.w
[0] < ten2k256
[q1
+ q2
- 40].w
[0])))))) {
1283 // if (C4.w[3] == 0) // q4 = 58, necessarily
1284 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1286 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
1287 q4
= q1
+ q2
- 1; // q4 in [58, 67]
1289 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
1290 q4
= q1
+ q2
; // q4 in [59, 68]
1294 if (C3
.w
[1] == 0x0 && C3
.w
[0] == 0x0) { // x = f, y = f, z = 0
1295 save_fpsf
= *pfpsf
; // sticky bits - caller value must be preserved
1300 // truncate C4 to p34 digits into res
1301 // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68
1304 P128
.w
[1] = C4
.w
[1];
1305 P128
.w
[0] = C4
.w
[0];
1306 round128_19_38 (q4
, x0
, P128
, &res
, &incr_exp
,
1307 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
1308 &is_inexact_lt_midpoint
,
1309 &is_inexact_gt_midpoint
);
1310 } else if (q4
<= 57) { // 35 <= q4 <= 57
1311 P192
.w
[2] = C4
.w
[2];
1312 P192
.w
[1] = C4
.w
[1];
1313 P192
.w
[0] = C4
.w
[0];
1314 round192_39_57 (q4
, x0
, P192
, &R192
, &incr_exp
,
1315 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
1316 &is_inexact_lt_midpoint
,
1317 &is_inexact_gt_midpoint
);
1318 res
.w
[0] = R192
.w
[0];
1319 res
.w
[1] = R192
.w
[1];
1320 } else { // if (q4 <= 68)
1321 round256_58_76 (q4
, x0
, C4
, &R256
, &incr_exp
,
1322 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
1323 &is_inexact_lt_midpoint
,
1324 &is_inexact_gt_midpoint
);
1325 res
.w
[0] = R256
.w
[0];
1326 res
.w
[1] = R256
.w
[1];
1333 // res is now the coefficient of the result rounded to the destination
1334 // precision, with unbounded exponent; the exponent is e4; q4=digits(res)
1335 } else { // if (q4 <= p34)
1336 // C4 * 10^e4 is the result rounded to the destination precision, with
1337 // unbounded exponent (which is exact)
1339 if ((q4
+ e4
<= p34
+ expmax
) && (e4
> expmax
)) {
1340 // e4 is too large, but can be brought within range by scaling up C4
1341 scale
= e4
- expmax
; // 1 <= scale < P-q4 <= P-1 => 1 <= scale <= P-2
1342 // res = (C4 * 10^scale) * 10^expmax
1343 if (q4
<= 19) { // C4 fits in 64 bits
1344 if (scale
<= 19) { // 10^scale fits in 64 bits
1345 // 64 x 64 C4.w[0] * ten2k64[scale]
1346 __mul_64x64_to_128MACH (res
, C4
.w
[0], ten2k64
[scale
]);
1347 } else { // 10^scale fits in 128 bits
1348 // 64 x 128 C4.w[0] * ten2k128[scale - 20]
1349 __mul_128x64_to_128 (res
, C4
.w
[0], ten2k128
[scale
- 20]);
1351 } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits
1352 // 64 x 128 ten2k64[scale] * CC43
1353 __mul_128x64_to_128 (res
, ten2k64
[scale
], C4
);
1355 e4
= e4
- scale
; // expmax
1361 // res is the coefficient of the result rounded to the destination
1362 // precision, with unbounded exponent (it has q4 digits); the exponent
1363 // is e4 (exact result)
1366 // check for overflow
1367 if (q4
+ e4
> p34
+ expmax
) {
1368 if (rnd_mode
== ROUNDING_TO_NEAREST
) {
1369 res
.w
[1] = p_sign
| 0x7800000000000000ull
; // +/-inf
1370 res
.w
[0] = 0x0000000000000000ull
;
1371 *pfpsf
|= (INEXACT_EXCEPTION
| OVERFLOW_EXCEPTION
);
1373 res
.w
[1] = p_sign
| res
.w
[1];
1374 rounding_correction (rnd_mode
,
1375 is_inexact_lt_midpoint
,
1376 is_inexact_gt_midpoint
,
1377 is_midpoint_lt_even
, is_midpoint_gt_even
,
1380 *pfpsf
|= save_fpsf
;
1381 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
1382 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
1383 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
1384 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
1388 // check for underflow
1389 if (q4
+ e4
< expmin
+ P34
) {
1390 is_tiny
= 1; // the result is tiny
1392 // if e4 < expmin, we must truncate more of res
1393 x0
= expmin
- e4
; // x0 >= 1
1394 is_inexact_lt_midpoint0
= is_inexact_lt_midpoint
;
1395 is_inexact_gt_midpoint0
= is_inexact_gt_midpoint
;
1396 is_midpoint_lt_even0
= is_midpoint_lt_even
;
1397 is_midpoint_gt_even0
= is_midpoint_gt_even
;
1398 is_inexact_lt_midpoint
= 0;
1399 is_inexact_gt_midpoint
= 0;
1400 is_midpoint_lt_even
= 0;
1401 is_midpoint_gt_even
= 0;
1402 // the number of decimal digits in res is q4
1403 if (x0
< q4
) { // 1 <= x0 <= q4-1 => round res to q4 - x0 digits
1404 if (q4
<= 18) { // 2 <= q4 <= 18, 1 <= x0 <= 17
1405 round64_2_18 (q4
, x0
, res
.w
[0], &R64
, &incr_exp
,
1406 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
1407 &is_inexact_lt_midpoint
,
1408 &is_inexact_gt_midpoint
);
1410 // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17
1411 R64
= ten2k64
[q4
- x0
];
1413 // res.w[1] = 0; (from above)
1415 } else { // if (q4 <= 34)
1417 P128
.w
[1] = res
.w
[1];
1418 P128
.w
[0] = res
.w
[0];
1419 round128_19_38 (q4
, x0
, P128
, &res
, &incr_exp
,
1420 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
1421 &is_inexact_lt_midpoint
,
1422 &is_inexact_gt_midpoint
);
1424 // increase coefficient by a factor of 10; this will be <= 10^33
1425 // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37
1426 if (q4
- x0
<= 19) { // 1 <= q4 - x0 <= 19
1428 res
.w
[0] = ten2k64
[q4
- x0
];
1429 } else { // 20 <= q4 - x0 <= 37
1430 res
.w
[0] = ten2k128
[q4
- x0
- 20].w
[0];
1431 res
.w
[1] = ten2k128
[q4
- x0
- 20].w
[1];
1435 e4
= e4
+ x0
; // expmin
1436 } else if (x0
== q4
) {
1437 // the second rounding is for 0.d(0)d(1)...d(q4-1) * 10^emin
1438 // determine relationship with 1/2 ulp
1440 if (res
.w
[0] < midpoint64
[q4
- 1]) { // < 1/2 ulp
1442 is_inexact_lt_midpoint
= 1;
1443 } else if (res
.w
[0] == midpoint64
[q4
- 1]) { // = 1/2 ulp
1445 is_midpoint_gt_even
= 1;
1446 } else { // > 1/2 ulp
1448 is_inexact_gt_midpoint
= 1;
1450 } else { // if (q4 <= 34)
1451 if (res
.w
[1] < midpoint128
[q4
- 20].w
[1] ||
1452 (res
.w
[1] == midpoint128
[q4
- 20].w
[1] &&
1453 res
.w
[0] < midpoint128
[q4
- 20].w
[0])) { // < 1/2 ulp
1455 is_inexact_lt_midpoint
= 1;
1456 } else if (res
.w
[1] == midpoint128
[q4
- 20].w
[1] &&
1457 res
.w
[0] == midpoint128
[q4
- 20].w
[0]) { // = 1/2 ulp
1459 is_midpoint_gt_even
= 1;
1460 } else { // > 1/2 ulp
1462 is_inexact_gt_midpoint
= 1;
1465 if (lt_half_ulp
|| eq_half_ulp
) {
1466 // res = +0.0 * 10^expmin
1467 res
.w
[1] = 0x0000000000000000ull
;
1468 res
.w
[0] = 0x0000000000000000ull
;
1469 } else { // if (gt_half_ulp)
1470 // res = +1 * 10^expmin
1471 res
.w
[1] = 0x0000000000000000ull
;
1472 res
.w
[0] = 0x0000000000000001ull
;
1475 } else { // if (x0 > q4)
1476 // the second rounding is for 0.0...d(0)d(1)...d(q4-1) * 10^emin
1480 is_inexact_lt_midpoint
= 1;
1482 // avoid a double rounding error
1483 if ((is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) &&
1484 is_midpoint_lt_even
) { // double rounding error upward
1487 if (res
.w
[0] == 0xffffffffffffffffull
)
1489 // Note: a double rounding error upward is not possible; for this
1490 // the result after the first rounding would have to be 99...95
1491 // (35 digits in all), possibly followed by a number of zeros; this
1492 // not possible for f * f + 0
1493 is_midpoint_lt_even
= 0;
1494 is_inexact_lt_midpoint
= 1;
1495 } else if ((is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) &&
1496 is_midpoint_gt_even
) { // double rounding error downward
1501 is_midpoint_gt_even
= 0;
1502 is_inexact_gt_midpoint
= 1;
1503 } else if (!is_midpoint_lt_even
&& !is_midpoint_gt_even
&&
1504 !is_inexact_lt_midpoint
&& !is_inexact_gt_midpoint
) {
1505 // if this second rounding was exact the result may still be
1506 // inexact because of the first rounding
1507 if (is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) {
1508 is_inexact_gt_midpoint
= 1;
1510 if (is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) {
1511 is_inexact_lt_midpoint
= 1;
1513 } else if (is_midpoint_gt_even
&&
1514 (is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
)) {
1515 // pulled up to a midpoint
1516 is_inexact_lt_midpoint
= 1;
1517 is_inexact_gt_midpoint
= 0;
1518 is_midpoint_lt_even
= 0;
1519 is_midpoint_gt_even
= 0;
1520 } else if (is_midpoint_lt_even
&&
1521 (is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
)) {
1522 // pulled down to a midpoint
1523 is_inexact_lt_midpoint
= 0;
1524 is_inexact_gt_midpoint
= 1;
1525 is_midpoint_lt_even
= 0;
1526 is_midpoint_gt_even
= 0;
1530 } else { // if e4 >= emin then q4 < P and the result is tiny and exact
1532 // if (e3 < e4) the preferred exponent is e3
1533 // return (C4 * 10^scale) * 10^(e4 - scale)
1534 // where scale = min (p34-q4, (e4 - e3))
1540 ; // res and e4 are unchanged
1541 } else if (q4
<= 19) { // C4 fits in 64 bits
1542 if (scale
<= 19) { // 10^scale fits in 64 bits
1543 // 64 x 64 res.w[0] * ten2k64[scale]
1544 __mul_64x64_to_128MACH (res
, res
.w
[0], ten2k64
[scale
]);
1545 } else { // 10^scale fits in 128 bits
1546 // 64 x 128 res.w[0] * ten2k128[scale - 20]
1547 __mul_128x64_to_128 (res
, res
.w
[0], ten2k128
[scale
- 20]);
1549 } else { // res fits in 128 bits, but 10^scale must fit in 64 bits
1550 // 64 x 128 ten2k64[scale] * C3
1551 __mul_128x64_to_128 (res
, ten2k64
[scale
], res
);
1553 // subtract scale from the exponent
1558 // check for inexact result
1559 if (is_inexact_lt_midpoint
|| is_inexact_gt_midpoint
||
1560 is_midpoint_lt_even
|| is_midpoint_gt_even
) {
1561 // set the inexact flag and the underflow flag
1562 *pfpsf
|= INEXACT_EXCEPTION
;
1563 *pfpsf
|= UNDERFLOW_EXCEPTION
;
1565 res
.w
[1] = p_sign
| ((UINT64
) (e4
+ 6176) << 49) | res
.w
[1];
1566 if (rnd_mode
!= ROUNDING_TO_NEAREST
) {
1567 rounding_correction (rnd_mode
,
1568 is_inexact_lt_midpoint
,
1569 is_inexact_gt_midpoint
,
1570 is_midpoint_lt_even
, is_midpoint_gt_even
,
1573 *pfpsf
|= save_fpsf
;
1574 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
1575 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
1576 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
1577 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
1581 // no overflow, and no underflow for rounding to nearest
1582 res
.w
[1] = p_sign
| ((UINT64
) (e4
+ 6176) << 49) | res
.w
[1];
1584 if (rnd_mode
!= ROUNDING_TO_NEAREST
) {
1585 rounding_correction (rnd_mode
,
1586 is_inexact_lt_midpoint
,
1587 is_inexact_gt_midpoint
,
1588 is_midpoint_lt_even
, is_midpoint_gt_even
,
1590 // if e4 = expmin && significand < 10^33 => result is tiny (for RD, RZ)
1592 if ((res
.w
[1] & MASK_COEFF
) < 0x0000314dc6448d93ull
||
1593 ((res
.w
[1] & MASK_COEFF
) == 0x0000314dc6448d93ull
&&
1594 res
.w
[0] < 0x38c15b0a00000000ull
)) {
1600 if (is_inexact_lt_midpoint
|| is_inexact_gt_midpoint
||
1601 is_midpoint_lt_even
|| is_midpoint_gt_even
) {
1602 // set the inexact flag
1603 *pfpsf
|= INEXACT_EXCEPTION
;
1605 *pfpsf
|= UNDERFLOW_EXCEPTION
;
1608 if ((*pfpsf
& INEXACT_EXCEPTION
) == 0) { // x * y is exact
1609 // need to ensure that the result has the preferred exponent
1610 p_exp
= res
.w
[1] & MASK_EXP
;
1611 if (z_exp
< p_exp
) { // the preferred exponent is z_exp
1612 // signficand of res in C3
1613 C3
.w
[1] = res
.w
[1] & MASK_COEFF
;
1615 // the number of decimal digits of x * y is q4 <= 34
1616 // Note: the coefficient fits in 128 bits
1618 // return (C3 * 10^scale) * 10^(p_exp - scale)
1619 // where scale = min (p34-q4, (p_exp-z_exp) >> 49)
1621 ind
= (p_exp
- z_exp
) >> 49;
1624 // subtract scale from the exponent
1625 p_exp
= p_exp
- ((UINT64
) scale
<< 49);
1627 ; // leave res unchanged
1628 } else if (q4
<= 19) { // x * y fits in 64 bits
1629 if (scale
<= 19) { // 10^scale fits in 64 bits
1630 // 64 x 64 C3.w[0] * ten2k64[scale]
1631 __mul_64x64_to_128MACH (res
, C3
.w
[0], ten2k64
[scale
]);
1632 } else { // 10^scale fits in 128 bits
1633 // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1634 __mul_128x64_to_128 (res
, C3
.w
[0], ten2k128
[scale
- 20]);
1636 res
.w
[1] = p_sign
| (p_exp
& MASK_EXP
) | res
.w
[1];
1637 } else { // x * y fits in 128 bits, but 10^scale must fit in 64 bits
1638 // 64 x 128 ten2k64[scale] * C3
1639 __mul_128x64_to_128 (res
, ten2k64
[scale
], C3
);
1640 res
.w
[1] = p_sign
| (p_exp
& MASK_EXP
) | res
.w
[1];
1642 } // else leave the result as it is, because p_exp <= z_exp
1644 *pfpsf
|= save_fpsf
;
1645 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
1646 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
1647 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
1648 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
1651 } // else we have f * f + f
1653 // continue with x = f, y = f, z = f
1655 delta
= q3
+ e3
- q4
- e4
;
1659 if (p34
<= delta
- 1 || // Case (1')
1660 (p34
== delta
&& e3
+ 6176 < p34
- q3
)) { // Case (1''A)
1661 // check for overflow, which can occur only in Case (1')
1662 if ((q3
+ e3
) > (p34
+ expmax
) && p34
<= delta
- 1) {
1663 // e3 > expmax implies p34 <= delta-1 and e3 > expmax is a necessary
1664 // condition for (q3 + e3) > (p34 + expmax)
1665 if (rnd_mode
== ROUNDING_TO_NEAREST
) {
1666 res
.w
[1] = z_sign
| 0x7800000000000000ull
; // +/-inf
1667 res
.w
[0] = 0x0000000000000000ull
;
1668 *pfpsf
|= (INEXACT_EXCEPTION
| OVERFLOW_EXCEPTION
);
1670 if (p_sign
== z_sign
) {
1671 is_inexact_lt_midpoint
= 1;
1673 is_inexact_gt_midpoint
= 1;
1675 // q3 <= p34; if (q3 < p34) scale C3 up by 10^(p34-q3)
1678 res
.w
[1] = z_sign
| C3
.w
[1];
1681 if (q3
<= 19) { // C3 fits in 64 bits
1682 if (scale
<= 19) { // 10^scale fits in 64 bits
1683 // 64 x 64 C3.w[0] * ten2k64[scale]
1684 __mul_64x64_to_128MACH (res
, C3
.w
[0], ten2k64
[scale
]);
1685 } else { // 10^scale fits in 128 bits
1686 // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1687 __mul_128x64_to_128 (res
, C3
.w
[0],
1688 ten2k128
[scale
- 20]);
1690 } else { // C3 fits in 128 bits, but 10^scale must fit in 64 bits
1691 // 64 x 128 ten2k64[scale] * C3
1692 __mul_128x64_to_128 (res
, ten2k64
[scale
], C3
);
1694 // the coefficient in res has q3 + scale = p34 digits
1697 res
.w
[1] = z_sign
| res
.w
[1];
1698 rounding_correction (rnd_mode
,
1699 is_inexact_lt_midpoint
,
1700 is_inexact_gt_midpoint
,
1701 is_midpoint_lt_even
, is_midpoint_gt_even
,
1704 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
1705 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
1706 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
1707 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
1712 if (q3
< p34
) { // the preferred exponent is z_exp - (p34 - q3)
1713 // return (C3 * 10^scale) * 10^(z_exp - scale)
1714 // where scale = min (p34-q3, z_exp-EMIN)
1722 } else if (q3
<= 19) { // z fits in 64 bits
1723 if (scale
<= 19) { // 10^scale fits in 64 bits
1724 // 64 x 64 C3.w[0] * ten2k64[scale]
1725 __mul_64x64_to_128MACH (res
, C3
.w
[0], ten2k64
[scale
]);
1726 } else { // 10^scale fits in 128 bits
1727 // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1728 __mul_128x64_to_128 (res
, C3
.w
[0], ten2k128
[scale
- 20]);
1730 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
1731 // 64 x 128 ten2k64[scale] * C3
1732 __mul_128x64_to_128 (res
, ten2k64
[scale
], C3
);
1734 // the coefficient in res has q3 + scale digits
1735 // subtract scale from the exponent
1736 z_exp
= z_exp
- ((UINT64
) scale
<< 49);
1738 res
.w
[1] = z_sign
| (z_exp
& MASK_EXP
) | res
.w
[1];
1739 if (scale
+ q3
< p34
)
1740 *pfpsf
|= UNDERFLOW_EXCEPTION
;
1743 res
.w
[1] = z_sign
| ((UINT64
) (e3
+ 6176) << 49) | C3
.w
[1];
1747 // use the following to avoid double rounding errors when operating on
1748 // mixed formats in rounding to nearest, and for correcting the result
1749 // if not rounding to nearest
1750 if ((p_sign
!= z_sign
) && (delta
== (q3
+ scale
+ 1))) {
1751 // there is a gap of exactly one digit between the scaled C3 and C4
1752 // C3 * 10^ scale = 10^(q3+scale-1) <=> C3 = 10^(q3-1) is special case
1753 if ((q3
<= 19 && C3
.w
[0] != ten2k64
[q3
- 1]) ||
1754 (q3
== 20 && (C3
.w
[1] != 0 || C3
.w
[0] != ten2k64
[19])) ||
1755 (q3
>= 21 && (C3
.w
[1] != ten2k128
[q3
- 21].w
[1] ||
1756 C3
.w
[0] != ten2k128
[q3
- 21].w
[0]))) {
1757 // C3 * 10^ scale != 10^(q3-1)
1758 // if ((res.w[1] & MASK_COEFF) != 0x0000314dc6448d93ull ||
1759 // res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33
1760 is_inexact_gt_midpoint
= 1; // if (z_sign), set as if for abs. value
1761 } else { // if C3 * 10^scale = 10^(q3+scale-1)
1762 // ok from above e3 = (z_exp >> 49) - 6176;
1763 // the result is always inexact
1767 // if q4 > 1 then truncate C4 from q4 digits to 1 digit;
1768 // x = q4-1, 1 <= x <= 67 and check if this operation is exact
1769 if (q4
<= 18) { // 2 <= q4 <= 18
1770 round64_2_18 (q4
, q4
- 1, C4
.w
[0], &R64
, &incr_exp
,
1771 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
1772 &is_inexact_lt_midpoint
,
1773 &is_inexact_gt_midpoint
);
1774 } else if (q4
<= 38) {
1775 P128
.w
[1] = C4
.w
[1];
1776 P128
.w
[0] = C4
.w
[0];
1777 round128_19_38 (q4
, q4
- 1, P128
, &R128
, &incr_exp
,
1778 &is_midpoint_lt_even
,
1779 &is_midpoint_gt_even
,
1780 &is_inexact_lt_midpoint
,
1781 &is_inexact_gt_midpoint
);
1782 R64
= R128
.w
[0]; // one decimal digit
1783 } else if (q4
<= 57) {
1784 P192
.w
[2] = C4
.w
[2];
1785 P192
.w
[1] = C4
.w
[1];
1786 P192
.w
[0] = C4
.w
[0];
1787 round192_39_57 (q4
, q4
- 1, P192
, &R192
, &incr_exp
,
1788 &is_midpoint_lt_even
,
1789 &is_midpoint_gt_even
,
1790 &is_inexact_lt_midpoint
,
1791 &is_inexact_gt_midpoint
);
1792 R64
= R192
.w
[0]; // one decimal digit
1793 } else { // if (q4 <= 68)
1794 round256_58_76 (q4
, q4
- 1, C4
, &R256
, &incr_exp
,
1795 &is_midpoint_lt_even
,
1796 &is_midpoint_gt_even
,
1797 &is_inexact_lt_midpoint
,
1798 &is_inexact_gt_midpoint
);
1799 R64
= R256
.w
[0]; // one decimal digit
1805 if (q4
== 1 && C4
.w
[0] == 5) {
1806 is_inexact_lt_midpoint
= 0;
1807 is_inexact_gt_midpoint
= 0;
1808 is_midpoint_lt_even
= 1;
1809 is_midpoint_gt_even
= 0;
1810 } else if ((e3
== expmin
) ||
1811 R64
< 5 || (R64
== 5 && is_inexact_gt_midpoint
)) {
1812 // result does not change
1813 is_inexact_lt_midpoint
= 0;
1814 is_inexact_gt_midpoint
= 1;
1815 is_midpoint_lt_even
= 0;
1816 is_midpoint_gt_even
= 0;
1818 is_inexact_lt_midpoint
= 1;
1819 is_inexact_gt_midpoint
= 0;
1820 is_midpoint_lt_even
= 0;
1821 is_midpoint_gt_even
= 0;
1822 // result decremented is 10^(q3+scale) - 1
1823 if ((q3
+ scale
) <= 19) {
1825 res
.w
[0] = ten2k64
[q3
+ scale
];
1826 } else { // if ((q3 + scale + 1) <= 35)
1827 res
.w
[1] = ten2k128
[q3
+ scale
- 20].w
[1];
1828 res
.w
[0] = ten2k128
[q3
+ scale
- 20].w
[0];
1830 res
.w
[0] = res
.w
[0] - 1; // borrow never occurs
1831 z_exp
= z_exp
- EXP_P1
;
1833 res
.w
[1] = z_sign
| ((UINT64
) (e3
+ 6176) << 49) | res
.w
[1];
1836 if (R64
< 5 || (R64
== 5 && !is_inexact_lt_midpoint
)) {
1837 ; // result not tiny (in round-to-nearest mode)
1839 *pfpsf
|= UNDERFLOW_EXCEPTION
;
1842 } // end 10^(q3+scale-1)
1843 // set the inexact flag
1844 *pfpsf
|= INEXACT_EXCEPTION
;
1846 if (p_sign
== z_sign
) {
1847 // if (z_sign), set as if for absolute value
1848 is_inexact_lt_midpoint
= 1;
1849 } else { // if (p_sign != z_sign)
1850 // if (z_sign), set as if for absolute value
1851 is_inexact_gt_midpoint
= 1;
1853 *pfpsf
|= INEXACT_EXCEPTION
;
1855 // the result is always inexact => set the inexact flag
1856 // Determine tininess:
1857 // if (exp > expmin)
1858 // the result is not tiny
1859 // else // if exp = emin
1860 // if (q3 + scale < p34)
1861 // the result is tiny
1862 // else // if (q3 + scale = p34)
1863 // if (C3 * 10^scale > 10^33)
1864 // the result is not tiny
1865 // else // if C3 * 10^scale = 10^33
1867 // the result is not tiny
1868 // else // if (xy * z < 0)
1870 // if rnd_mode != RP
1871 // the result is tiny
1873 // the result is not tiny
1874 // else // if (z < 0)
1875 // if rnd_mode != RM
1876 // the result is tiny
1878 // the result is not tiny
1885 if ((e3
== expmin
&& (q3
+ scale
) < p34
) ||
1886 (e3
== expmin
&& (q3
+ scale
) == p34
&&
1887 (res
.w
[1] & MASK_COEFF
) == 0x0000314dc6448d93ull
&& // 10^33_high
1888 res
.w
[0] == 0x38c15b0a00000000ull
&& // 10^33_low
1889 z_sign
!= p_sign
&& ((!z_sign
&& rnd_mode
!= ROUNDING_UP
) ||
1890 (z_sign
&& rnd_mode
!= ROUNDING_DOWN
)))) {
1891 *pfpsf
|= UNDERFLOW_EXCEPTION
;
1893 if (rnd_mode
!= ROUNDING_TO_NEAREST
) {
1894 rounding_correction (rnd_mode
,
1895 is_inexact_lt_midpoint
,
1896 is_inexact_gt_midpoint
,
1897 is_midpoint_lt_even
, is_midpoint_gt_even
,
1900 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
1901 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
1902 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
1903 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
1907 } else if (p34
== delta
) { // Case (1''B)
1909 // because Case (1''A) was treated above, e3 + 6176 >= p34 - q3
1910 // and C3 can be scaled up to p34 digits if needed
1912 // scale C3 to p34 digits if needed
1913 scale
= p34
- q3
; // 0 <= scale <= p34 - 1
1917 } else if (q3
<= 19) { // z fits in 64 bits
1918 if (scale
<= 19) { // 10^scale fits in 64 bits
1919 // 64 x 64 C3.w[0] * ten2k64[scale]
1920 __mul_64x64_to_128MACH (res
, C3
.w
[0], ten2k64
[scale
]);
1921 } else { // 10^scale fits in 128 bits
1922 // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1923 __mul_128x64_to_128 (res
, C3
.w
[0], ten2k128
[scale
- 20]);
1925 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
1926 // 64 x 128 ten2k64[scale] * C3
1927 __mul_128x64_to_128 (res
, ten2k64
[scale
], C3
);
1929 // subtract scale from the exponent
1930 z_exp
= z_exp
- ((UINT64
) scale
<< 49);
1932 // now z_sign, z_exp, and res correspond to a z scaled to p34 = 34 digits
1934 // determine whether x * y is less than, equal to, or greater than
1937 if (C4
.w
[0] < midpoint64
[q4
- 1]) { // < 1/2 ulp
1939 } else if (C4
.w
[0] == midpoint64
[q4
- 1]) { // = 1/2 ulp
1941 } else { // > 1/2 ulp
1944 } else if (q4
<= 38) {
1945 if (C4
.w
[2] == 0 && (C4
.w
[1] < midpoint128
[q4
- 20].w
[1] ||
1946 (C4
.w
[1] == midpoint128
[q4
- 20].w
[1] &&
1947 C4
.w
[0] < midpoint128
[q4
- 20].w
[0]))) { // < 1/2 ulp
1949 } else if (C4
.w
[2] == 0 && C4
.w
[1] == midpoint128
[q4
- 20].w
[1] &&
1950 C4
.w
[0] == midpoint128
[q4
- 20].w
[0]) { // = 1/2 ulp
1952 } else { // > 1/2 ulp
1955 } else if (q4
<= 58) {
1956 if (C4
.w
[3] == 0 && (C4
.w
[2] < midpoint192
[q4
- 39].w
[2] ||
1957 (C4
.w
[2] == midpoint192
[q4
- 39].w
[2] &&
1958 C4
.w
[1] < midpoint192
[q4
- 39].w
[1]) ||
1959 (C4
.w
[2] == midpoint192
[q4
- 39].w
[2] &&
1960 C4
.w
[1] == midpoint192
[q4
- 39].w
[1] &&
1961 C4
.w
[0] < midpoint192
[q4
- 39].w
[0]))) { // < 1/2 ulp
1963 } else if (C4
.w
[3] == 0 && C4
.w
[2] == midpoint192
[q4
- 39].w
[2] &&
1964 C4
.w
[1] == midpoint192
[q4
- 39].w
[1] &&
1965 C4
.w
[0] == midpoint192
[q4
- 39].w
[0]) { // = 1/2 ulp
1967 } else { // > 1/2 ulp
1971 if (C4
.w
[3] < midpoint256
[q4
- 59].w
[3] ||
1972 (C4
.w
[3] == midpoint256
[q4
- 59].w
[3] &&
1973 C4
.w
[2] < midpoint256
[q4
- 59].w
[2]) ||
1974 (C4
.w
[3] == midpoint256
[q4
- 59].w
[3] &&
1975 C4
.w
[2] == midpoint256
[q4
- 59].w
[2] &&
1976 C4
.w
[1] < midpoint256
[q4
- 59].w
[1]) ||
1977 (C4
.w
[3] == midpoint256
[q4
- 59].w
[3] &&
1978 C4
.w
[2] == midpoint256
[q4
- 59].w
[2] &&
1979 C4
.w
[1] == midpoint256
[q4
- 59].w
[1] &&
1980 C4
.w
[0] < midpoint256
[q4
- 59].w
[0])) { // < 1/2 ulp
1982 } else if (C4
.w
[3] == midpoint256
[q4
- 59].w
[3] &&
1983 C4
.w
[2] == midpoint256
[q4
- 59].w
[2] &&
1984 C4
.w
[1] == midpoint256
[q4
- 59].w
[1] &&
1985 C4
.w
[0] == midpoint256
[q4
- 59].w
[0]) { // = 1/2 ulp
1987 } else { // > 1/2 ulp
1992 if (p_sign
== z_sign
) {
1994 res
.w
[1] = z_sign
| (z_exp
& MASK_EXP
) | res
.w
[1];
1995 // use the following to avoid double rounding errors when operating on
1996 // mixed formats in rounding to nearest
1997 is_inexact_lt_midpoint
= 1; // if (z_sign), as if for absolute value
1998 } else if ((eq_half_ulp
&& (res
.w
[0] & 0x01)) || gt_half_ulp
) {
1999 // add 1 ulp to the significand
2001 if (res
.w
[0] == 0x0ull
)
2003 // check for rounding overflow, when coeff == 10^34
2004 if ((res
.w
[1] & MASK_COEFF
) == 0x0001ed09bead87c0ull
&&
2005 res
.w
[0] == 0x378d8e6400000000ull
) { // coefficient = 10^34
2008 z_exp
= ((UINT64
) (e3
+ 6176) << 49) & MASK_EXP
;
2009 res
.w
[1] = 0x0000314dc6448d93ull
;
2010 res
.w
[0] = 0x38c15b0a00000000ull
;
2013 res
.w
[1] = z_sign
| (z_exp
& MASK_EXP
) | res
.w
[1];
2015 is_midpoint_lt_even
= 1; // if (z_sign), as if for absolute value
2017 is_inexact_gt_midpoint
= 1; // if (z_sign), as if for absolute value
2019 } else { // if (eq_half_ulp && !(res.w[0] & 0x01))
2021 res
.w
[1] = z_sign
| (z_exp
& MASK_EXP
) | res
.w
[1];
2022 is_midpoint_gt_even
= 1; // if (z_sign), as if for absolute value
2024 // the result is always inexact, and never tiny
2025 // set the inexact flag
2026 *pfpsf
|= INEXACT_EXCEPTION
;
2027 // check for overflow
2028 if (e3
> expmax
&& rnd_mode
== ROUNDING_TO_NEAREST
) {
2029 res
.w
[1] = z_sign
| 0x7800000000000000ull
; // +/-inf
2030 res
.w
[0] = 0x0000000000000000ull
;
2031 *pfpsf
|= (INEXACT_EXCEPTION
| OVERFLOW_EXCEPTION
);
2032 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
2033 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
2034 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
2035 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
2039 if (rnd_mode
!= ROUNDING_TO_NEAREST
) {
2040 rounding_correction (rnd_mode
,
2041 is_inexact_lt_midpoint
,
2042 is_inexact_gt_midpoint
,
2043 is_midpoint_lt_even
, is_midpoint_gt_even
,
2045 z_exp
= res
.w
[1] & MASK_EXP
;
2047 } else { // if (p_sign != z_sign)
2048 // consider two cases, because C3 * 10^scale = 10^33 is a special case
2049 if (res
.w
[1] != 0x0000314dc6448d93ull
||
2050 res
.w
[0] != 0x38c15b0a00000000ull
) { // C3 * 10^scale != 10^33
2052 res
.w
[1] = z_sign
| (z_exp
& MASK_EXP
) | res
.w
[1];
2053 // use the following to avoid double rounding errors when operating
2054 // on mixed formats in rounding to nearest
2055 is_inexact_gt_midpoint
= 1; // if (z_sign), as if for absolute value
2056 } else if ((eq_half_ulp
&& (res
.w
[0] & 0x01)) || gt_half_ulp
) {
2057 // subtract 1 ulp from the significand
2059 if (res
.w
[0] == 0xffffffffffffffffull
)
2061 res
.w
[1] = z_sign
| (z_exp
& MASK_EXP
) | res
.w
[1];
2063 is_midpoint_gt_even
= 1; // if (z_sign), as if for absolute value
2065 is_inexact_lt_midpoint
= 1; //if(z_sign), as if for absolute value
2067 } else { // if (eq_half_ulp && !(res.w[0] & 0x01))
2069 res
.w
[1] = z_sign
| (z_exp
& MASK_EXP
) | res
.w
[1];
2070 is_midpoint_lt_even
= 1; // if (z_sign), as if for absolute value
2072 // the result is always inexact, and never tiny
2073 // check for overflow for RN
2075 if (rnd_mode
== ROUNDING_TO_NEAREST
) {
2076 res
.w
[1] = z_sign
| 0x7800000000000000ull
; // +/-inf
2077 res
.w
[0] = 0x0000000000000000ull
;
2078 *pfpsf
|= (INEXACT_EXCEPTION
| OVERFLOW_EXCEPTION
);
2080 rounding_correction (rnd_mode
,
2081 is_inexact_lt_midpoint
,
2082 is_inexact_gt_midpoint
,
2083 is_midpoint_lt_even
,
2084 is_midpoint_gt_even
, e3
, &res
,
2087 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
2088 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
2089 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
2090 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
2094 // set the inexact flag
2095 *pfpsf
|= INEXACT_EXCEPTION
;
2096 if (rnd_mode
!= ROUNDING_TO_NEAREST
) {
2097 rounding_correction (rnd_mode
,
2098 is_inexact_lt_midpoint
,
2099 is_inexact_gt_midpoint
,
2100 is_midpoint_lt_even
,
2101 is_midpoint_gt_even
, e3
, &res
, pfpsf
);
2103 z_exp
= res
.w
[1] & MASK_EXP
;
2104 } else { // if C3 * 10^scale = 10^33
2105 e3
= (z_exp
>> 49) - 6176;
2107 // the result is exact if exp > expmin and C4 = d*10^(q4-1),
2108 // where d = 1, 2, 3, ..., 9; it could be tiny too, but exact
2110 // if q4 = 1 the result is exact
2111 // result coefficient = 10^34 - C4
2112 res
.w
[1] = 0x0001ed09bead87c0ull
;
2113 res
.w
[0] = 0x378d8e6400000000ull
- C4
.w
[0];
2114 z_exp
= z_exp
- EXP_P1
;
2116 res
.w
[1] = z_sign
| (z_exp
& MASK_EXP
) | res
.w
[1];
2118 // if q4 > 1 then truncate C4 from q4 digits to 1 digit;
2119 // x = q4-1, 1 <= x <= 67 and check if this operation is exact
2120 if (q4
<= 18) { // 2 <= q4 <= 18
2121 round64_2_18 (q4
, q4
- 1, C4
.w
[0], &R64
, &incr_exp
,
2122 &is_midpoint_lt_even
,
2123 &is_midpoint_gt_even
,
2124 &is_inexact_lt_midpoint
,
2125 &is_inexact_gt_midpoint
);
2126 } else if (q4
<= 38) {
2127 P128
.w
[1] = C4
.w
[1];
2128 P128
.w
[0] = C4
.w
[0];
2129 round128_19_38 (q4
, q4
- 1, P128
, &R128
, &incr_exp
,
2130 &is_midpoint_lt_even
,
2131 &is_midpoint_gt_even
,
2132 &is_inexact_lt_midpoint
,
2133 &is_inexact_gt_midpoint
);
2134 R64
= R128
.w
[0]; // one decimal digit
2135 } else if (q4
<= 57) {
2136 P192
.w
[2] = C4
.w
[2];
2137 P192
.w
[1] = C4
.w
[1];
2138 P192
.w
[0] = C4
.w
[0];
2139 round192_39_57 (q4
, q4
- 1, P192
, &R192
, &incr_exp
,
2140 &is_midpoint_lt_even
,
2141 &is_midpoint_gt_even
,
2142 &is_inexact_lt_midpoint
,
2143 &is_inexact_gt_midpoint
);
2144 R64
= R192
.w
[0]; // one decimal digit
2145 } else { // if (q4 <= 68)
2146 round256_58_76 (q4
, q4
- 1, C4
, &R256
, &incr_exp
,
2147 &is_midpoint_lt_even
,
2148 &is_midpoint_gt_even
,
2149 &is_inexact_lt_midpoint
,
2150 &is_inexact_gt_midpoint
);
2151 R64
= R256
.w
[0]; // one decimal digit
2153 if (!is_midpoint_lt_even
&& !is_midpoint_gt_even
&&
2154 !is_inexact_lt_midpoint
&& !is_inexact_gt_midpoint
) {
2155 // the result is exact: 10^34 - R64
2156 // incr_exp = 0 with certainty
2157 z_exp
= z_exp
- EXP_P1
;
2160 z_sign
| (z_exp
& MASK_EXP
) | 0x0001ed09bead87c0ull
;
2161 res
.w
[0] = 0x378d8e6400000000ull
- R64
;
2163 // We want R64 to be the top digit of C4, but we actually
2164 // obtained (C4 * 10^(-q4+1))RN; a correction may be needed,
2165 // because the top digit is (C4 * 10^(-q4+1))RZ
2166 // however, if incr_exp = 1 then R64 = 10 with certainty
2170 // the result is inexact as C4 has more than 1 significant digit
2171 // and C3 * 10^scale = 10^33
2172 // example of case that is treated here:
2173 // 100...0 * 10^e3 - 0.41 * 10^e3 =
2174 // 0999...9.59 * 10^e3 -> rounds to 99...96*10^(e3-1)
2175 // note that (e3 > expmin}
2176 // in order to round, subtract R64 from 10^34 and then compare
2177 // C4 - R64 * 10^(q4-1) with 1/2 ulp
2178 // calculate 10^34 - R64
2179 res
.w
[1] = 0x0001ed09bead87c0ull
;
2180 res
.w
[0] = 0x378d8e6400000000ull
- R64
;
2181 z_exp
= z_exp
- EXP_P1
; // will be OR-ed with sign & significand
2182 // calculate C4 - R64 * 10^(q4-1); this is a rare case and
2183 // R64 is small, 1 <= R64 <= 9
2185 if (is_inexact_lt_midpoint
) {
2186 is_inexact_lt_midpoint
= 0;
2187 is_inexact_gt_midpoint
= 1;
2188 } else if (is_inexact_gt_midpoint
) {
2189 is_inexact_gt_midpoint
= 0;
2190 is_inexact_lt_midpoint
= 1;
2191 } else if (is_midpoint_lt_even
) {
2192 is_midpoint_lt_even
= 0;
2193 is_midpoint_gt_even
= 1;
2194 } else if (is_midpoint_gt_even
) {
2195 is_midpoint_gt_even
= 0;
2196 is_midpoint_lt_even
= 1;
2200 // the result is always inexact, and never tiny
2201 // check for overflow for RN
2203 if (rnd_mode
== ROUNDING_TO_NEAREST
) {
2204 res
.w
[1] = z_sign
| 0x7800000000000000ull
; // +/-inf
2205 res
.w
[0] = 0x0000000000000000ull
;
2206 *pfpsf
|= (INEXACT_EXCEPTION
| OVERFLOW_EXCEPTION
);
2208 rounding_correction (rnd_mode
,
2209 is_inexact_lt_midpoint
,
2210 is_inexact_gt_midpoint
,
2211 is_midpoint_lt_even
,
2212 is_midpoint_gt_even
, e3
, &res
,
2215 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
2216 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
2217 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
2218 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
2222 // set the inexact flag
2223 *pfpsf
|= INEXACT_EXCEPTION
;
2225 z_sign
| ((UINT64
) (e3
+ 6176) << 49) | res
.w
[1];
2226 if (rnd_mode
!= ROUNDING_TO_NEAREST
) {
2227 rounding_correction (rnd_mode
,
2228 is_inexact_lt_midpoint
,
2229 is_inexact_gt_midpoint
,
2230 is_midpoint_lt_even
,
2231 is_midpoint_gt_even
, e3
, &res
,
2234 z_exp
= res
.w
[1] & MASK_EXP
;
2235 } // end result is inexact
2237 } else { // if (e3 = emin)
2238 // if e3 = expmin the result is also tiny (the condition for
2239 // tininess is C4 > 050...0 [q4 digits] which is met because
2240 // the msd of C4 is not zero)
2241 // the result is tiny and inexact in all rounding modes;
2242 // it is either 100...0 or 0999...9 (use lt_half_ulp, eq_half_ulp,
2243 // gt_half_ulp to calculate)
2244 // if (lt_half_ulp || eq_half_ulp) res = 10^33 stays unchanged
2246 // p_sign != z_sign so swap gt_half_ulp and lt_half_ulp
2247 if (gt_half_ulp
) { // res = 10^33 - 1
2248 res
.w
[1] = 0x0000314dc6448d93ull
;
2249 res
.w
[0] = 0x38c15b09ffffffffull
;
2251 res
.w
[1] = 0x0000314dc6448d93ull
;
2252 res
.w
[0] = 0x38c15b0a00000000ull
;
2254 res
.w
[1] = z_sign
| (z_exp
& MASK_EXP
) | res
.w
[1];
2255 *pfpsf
|= UNDERFLOW_EXCEPTION
; // inexact is set later
2258 is_midpoint_lt_even
= 1; // if (z_sign), as if for absolute value
2259 } else if (lt_half_ulp
) {
2260 is_inexact_gt_midpoint
= 1; //if(z_sign), as if for absolute value
2261 } else { // if (gt_half_ulp)
2262 is_inexact_lt_midpoint
= 1; //if(z_sign), as if for absolute value
2265 if (rnd_mode
!= ROUNDING_TO_NEAREST
) {
2266 rounding_correction (rnd_mode
,
2267 is_inexact_lt_midpoint
,
2268 is_inexact_gt_midpoint
,
2269 is_midpoint_lt_even
,
2270 is_midpoint_gt_even
, e3
, &res
,
2272 z_exp
= res
.w
[1] & MASK_EXP
;
2275 // set the inexact flag (if the result was not exact)
2276 if (is_inexact_lt_midpoint
|| is_inexact_gt_midpoint
||
2277 is_midpoint_lt_even
|| is_midpoint_gt_even
)
2278 *pfpsf
|= INEXACT_EXCEPTION
;
2280 } // end if (p_sign != z_sign)
2281 res
.w
[1] = z_sign
| (z_exp
& MASK_EXP
) | res
.w
[1];
2282 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
2283 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
2284 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
2285 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
2289 } else if (((q3
<= delta
&& delta
< p34
&& p34
< delta
+ q4
) || // Case (2)
2290 (q3
<= delta
&& delta
+ q4
<= p34
) || // Case (3)
2291 (delta
< q3
&& p34
< delta
+ q4
) || // Case (4)
2292 (delta
< q3
&& q3
<= delta
+ q4
&& delta
+ q4
<= p34
) || // Case (5)
2293 (delta
+ q4
< q3
)) && // Case (6)
2294 !(delta
<= 1 && p_sign
!= z_sign
)) { // Case (2), (3), (4), (5) or (6)
2296 // the result has the sign of z
2298 if ((q3
<= delta
&& delta
< p34
&& p34
< delta
+ q4
) || // Case (2)
2299 (delta
< q3
&& p34
< delta
+ q4
)) { // Case (4)
2300 // round first the sum x * y + z with unbounded exponent
2301 // scale C3 up by scale = p34 - q3, 1 <= scale <= p34-1,
2303 // calculate res = C3 * 10^scale
2305 x0
= delta
+ q4
- p34
;
2306 } else if (delta
+ q4
< q3
) { // Case (6)
2307 // make Case (6) look like Case (3) or Case (5) with scale = 0
2308 // by scaling up C4 by 10^(q3 - delta - q4)
2309 scale
= q3
- delta
- q4
; // 1 <= scale <= 33
2310 if (q4
<= 19) { // 1 <= scale <= 19; C4 fits in 64 bits
2311 if (scale
<= 19) { // 10^scale fits in 64 bits
2312 // 64 x 64 C4.w[0] * ten2k64[scale]
2313 __mul_64x64_to_128MACH (P128
, C4
.w
[0], ten2k64
[scale
]);
2314 } else { // 10^scale fits in 128 bits
2315 // 64 x 128 C4.w[0] * ten2k128[scale - 20]
2316 __mul_128x64_to_128 (P128
, C4
.w
[0], ten2k128
[scale
- 20]);
2318 } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits
2319 // 64 x 128 ten2k64[scale] * C4
2320 __mul_128x64_to_128 (P128
, ten2k64
[scale
], C4
);
2322 C4
.w
[0] = P128
.w
[0];
2323 C4
.w
[1] = P128
.w
[1];
2324 // e4 does not need adjustment, as it is not used from this point on
2327 // now Case (6) looks like Case (3) or Case (5) with scale = 0
2328 } else { // if Case (3) or Case (5)
2329 // Note: Case (3) is similar to Case (2), but scale differs and the
2330 // result is exact, unless it is tiny (so x0 = 0 when calculating the
2331 // result with unbounded exponent)
2333 // calculate first the sum x * y + z with unbounded exponent (exact)
2334 // scale C3 up by scale = delta + q4 - q3, 1 <= scale <= p34-1,
2336 // calculate res = C3 * 10^scale
2337 scale
= delta
+ q4
- q3
;
2339 // Note: the comments which follow refer [mainly] to Case (2)]
2343 if (scale
== 0) { // this could happen e.g. if we return to case2_repeat
2347 } else if (q3
<= 19) { // 1 <= scale <= 19; z fits in 64 bits
2348 if (scale
<= 19) { // 10^scale fits in 64 bits
2349 // 64 x 64 C3.w[0] * ten2k64[scale]
2350 __mul_64x64_to_128MACH (res
, C3
.w
[0], ten2k64
[scale
]);
2351 } else { // 10^scale fits in 128 bits
2352 // 64 x 128 C3.w[0] * ten2k128[scale - 20]
2353 __mul_128x64_to_128 (res
, C3
.w
[0], ten2k128
[scale
- 20]);
2355 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
2356 // 64 x 128 ten2k64[scale] * C3
2357 __mul_128x64_to_128 (res
, ten2k64
[scale
], C3
);
2359 // e3 is already calculated
2361 // now res = C3 * 10^scale and e3 = e3 - scale
2362 // Note: C3 * 10^scale could be 10^34 if we returned to case2_repeat
2363 // because the result was too small
2365 // round C4 to nearest to q4 - x0 digits, where x0 = delta + q4 - p34,
2366 // 1 <= x0 <= min (q4 - 1, 2 * p34 - 1) <=> 1 <= x0 <= min (q4 - 1, 67)
2367 // Also: 1 <= q4 - x0 <= p34 -1 => 1 <= q4 - x0 <= 33 (so the result of
2368 // the rounding fits in 128 bits!)
2369 // x0 = delta + q4 - p34 (calculated before reaching case2_repeat)
2370 // because q3 + q4 - x0 <= P => x0 >= q3 + q4 - p34
2371 if (x0
== 0) { // this could happen only if we return to case2_repeat, or
2372 // for Case (3) or Case (6)
2373 R128
.w
[1] = C4
.w
[1];
2374 R128
.w
[0] = C4
.w
[0];
2375 } else if (q4
<= 18) {
2376 // 2 <= q4 <= 18, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 17
2377 round64_2_18 (q4
, x0
, C4
.w
[0], &R64
, &incr_exp
,
2378 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
2379 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
);
2381 // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17
2382 R64
= ten2k64
[q4
- x0
];
2386 } else if (q4
<= 38) {
2387 // 19 <= q4 <= 38, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 37
2388 P128
.w
[1] = C4
.w
[1];
2389 P128
.w
[0] = C4
.w
[0];
2390 round128_19_38 (q4
, x0
, P128
, &R128
, &incr_exp
,
2391 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
2392 &is_inexact_lt_midpoint
,
2393 &is_inexact_gt_midpoint
);
2395 // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37
2396 if (q4
- x0
<= 19) { // 1 <= q4 - x0 <= 19
2397 R128
.w
[0] = ten2k64
[q4
- x0
];
2398 // R128.w[1] stays 0
2399 } else { // 20 <= q4 - x0 <= 37
2400 R128
.w
[0] = ten2k128
[q4
- x0
- 20].w
[0];
2401 R128
.w
[1] = ten2k128
[q4
- x0
- 20].w
[1];
2404 } else if (q4
<= 57) {
2405 // 38 <= q4 <= 57, max(1, q3+q4-p34) <= x0 <= q4 - 1, 5 <= x0 <= 56
2406 P192
.w
[2] = C4
.w
[2];
2407 P192
.w
[1] = C4
.w
[1];
2408 P192
.w
[0] = C4
.w
[0];
2409 round192_39_57 (q4
, x0
, P192
, &R192
, &incr_exp
,
2410 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
2411 &is_inexact_lt_midpoint
,
2412 &is_inexact_gt_midpoint
);
2413 // R192.w[2] is always 0
2415 // R192 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 5, 1 <= q4 - x0 <= 52
2416 if (q4
- x0
<= 19) { // 1 <= q4 - x0 <= 19
2417 R192
.w
[0] = ten2k64
[q4
- x0
];
2418 // R192.w[1] stays 0
2419 // R192.w[2] stays 0
2420 } else { // 20 <= q4 - x0 <= 33
2421 R192
.w
[0] = ten2k128
[q4
- x0
- 20].w
[0];
2422 R192
.w
[1] = ten2k128
[q4
- x0
- 20].w
[1];
2423 // R192.w[2] stays 0
2426 R128
.w
[1] = R192
.w
[1];
2427 R128
.w
[0] = R192
.w
[0];
2429 // 58 <= q4 <= 68, max(1, q3+q4-p34) <= x0 <= q4 - 1, 25 <= x0 <= 67
2430 round256_58_76 (q4
, x0
, C4
, &R256
, &incr_exp
,
2431 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
2432 &is_inexact_lt_midpoint
,
2433 &is_inexact_gt_midpoint
);
2434 // R256.w[3] and R256.w[2] are always 0
2436 // R256 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 25, 1 <= q4 - x0 <= 43
2437 if (q4
- x0
<= 19) { // 1 <= q4 - x0 <= 19
2438 R256
.w
[0] = ten2k64
[q4
- x0
];
2439 // R256.w[1] stays 0
2440 // R256.w[2] stays 0
2441 // R256.w[3] stays 0
2442 } else { // 20 <= q4 - x0 <= 33
2443 R256
.w
[0] = ten2k128
[q4
- x0
- 20].w
[0];
2444 R256
.w
[1] = ten2k128
[q4
- x0
- 20].w
[1];
2445 // R256.w[2] stays 0
2446 // R256.w[3] stays 0
2449 R128
.w
[1] = R256
.w
[1];
2450 R128
.w
[0] = R256
.w
[0];
2452 // now add C3 * 10^scale in res and the signed top (q4-x0) digits of C4,
2453 // rounded to nearest, which were copied into R128
2454 if (z_sign
== p_sign
) {
2455 lsb
= res
.w
[0] & 0x01; // lsb of C3 * 10^scale
2456 // the sum can result in [up to] p34 or p34 + 1 digits
2457 res
.w
[0] = res
.w
[0] + R128
.w
[0];
2458 res
.w
[1] = res
.w
[1] + R128
.w
[1];
2459 if (res
.w
[0] < R128
.w
[0])
2460 res
.w
[1]++; // carry
2461 // if res > 10^34 - 1 need to increase x0 and decrease scale by 1
2462 if (res
.w
[1] > 0x0001ed09bead87c0ull
||
2463 (res
.w
[1] == 0x0001ed09bead87c0ull
&&
2464 res
.w
[0] > 0x378d8e63ffffffffull
)) {
2465 // avoid double rounding error
2466 is_inexact_lt_midpoint0
= is_inexact_lt_midpoint
;
2467 is_inexact_gt_midpoint0
= is_inexact_gt_midpoint
;
2468 is_midpoint_lt_even0
= is_midpoint_lt_even
;
2469 is_midpoint_gt_even0
= is_midpoint_gt_even
;
2470 is_inexact_lt_midpoint
= 0;
2471 is_inexact_gt_midpoint
= 0;
2472 is_midpoint_lt_even
= 0;
2473 is_midpoint_gt_even
= 0;
2474 P128
.w
[1] = res
.w
[1];
2475 P128
.w
[0] = res
.w
[0];
2476 round128_19_38 (35, 1, P128
, &res
, &incr_exp
,
2477 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
2478 &is_inexact_lt_midpoint
,
2479 &is_inexact_gt_midpoint
);
2480 // incr_exp is 0 with certainty in this case
2481 // avoid a double rounding error
2482 if ((is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) &&
2483 is_midpoint_lt_even
) { // double rounding error upward
2486 if (res
.w
[0] == 0xffffffffffffffffull
)
2488 // Note: a double rounding error upward is not possible; for this
2489 // the result after the first rounding would have to be 99...95
2490 // (35 digits in all), possibly followed by a number of zeros; this
2491 // not possible in Cases (2)-(6) or (15)-(17) which may get here
2492 is_midpoint_lt_even
= 0;
2493 is_inexact_lt_midpoint
= 1;
2494 } else if ((is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) &&
2495 is_midpoint_gt_even
) { // double rounding error downward
2500 is_midpoint_gt_even
= 0;
2501 is_inexact_gt_midpoint
= 1;
2502 } else if (!is_midpoint_lt_even
&& !is_midpoint_gt_even
&&
2503 !is_inexact_lt_midpoint
2504 && !is_inexact_gt_midpoint
) {
2505 // if this second rounding was exact the result may still be
2506 // inexact because of the first rounding
2507 if (is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) {
2508 is_inexact_gt_midpoint
= 1;
2510 if (is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) {
2511 is_inexact_lt_midpoint
= 1;
2513 } else if (is_midpoint_gt_even
&&
2514 (is_inexact_gt_midpoint0
2515 || is_midpoint_lt_even0
)) {
2516 // pulled up to a midpoint
2517 is_inexact_lt_midpoint
= 1;
2518 is_inexact_gt_midpoint
= 0;
2519 is_midpoint_lt_even
= 0;
2520 is_midpoint_gt_even
= 0;
2521 } else if (is_midpoint_lt_even
&&
2522 (is_inexact_lt_midpoint0
2523 || is_midpoint_gt_even0
)) {
2524 // pulled down to a midpoint
2525 is_inexact_lt_midpoint
= 0;
2526 is_inexact_gt_midpoint
= 1;
2527 is_midpoint_lt_even
= 0;
2528 is_midpoint_gt_even
= 0;
2534 if (!is_midpoint_lt_even
&& !is_midpoint_gt_even
&&
2535 !is_inexact_lt_midpoint
&& !is_inexact_gt_midpoint
) {
2536 if (is_midpoint_lt_even0
|| is_midpoint_gt_even0
||
2537 is_inexact_lt_midpoint0
|| is_inexact_gt_midpoint0
) {
2538 is_inexact_lt_midpoint
= 1;
2542 // this is the result rounded with unbounded exponent, unless a
2543 // correction is needed
2544 res
.w
[1] = res
.w
[1] & MASK_COEFF
;
2546 if (is_midpoint_gt_even
) {
2548 is_midpoint_gt_even
= 0;
2549 is_midpoint_lt_even
= 1;
2551 if (res
.w
[0] == 0x0)
2553 // check for rounding overflow
2554 if (res
.w
[1] == 0x0001ed09bead87c0ull
&&
2555 res
.w
[0] == 0x378d8e6400000000ull
) {
2556 // res = 10^34 => rounding overflow
2557 res
.w
[1] = 0x0000314dc6448d93ull
;
2558 res
.w
[0] = 0x38c15b0a00000000ull
; // 10^33
2561 } else if (is_midpoint_lt_even
) {
2563 is_midpoint_lt_even
= 0;
2564 is_midpoint_gt_even
= 1;
2566 if (res
.w
[0] == 0xffffffffffffffffull
)
2568 // if the result is pure zero, the sign depends on the rounding
2569 // mode (x*y and z had opposite signs)
2570 if (res
.w
[1] == 0x0ull
&& res
.w
[0] == 0x0ull
) {
2571 if (rnd_mode
!= ROUNDING_DOWN
)
2572 z_sign
= 0x0000000000000000ull
;
2574 z_sign
= 0x8000000000000000ull
;
2575 // the exponent is max (e3, expmin)
2578 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
2579 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
2580 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
2581 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
2590 } else { // if (z_sign != p_sign)
2591 lsb
= res
.w
[0] & 0x01; // lsb of C3 * 10^scale; R128 contains rounded C4
2592 // used to swap rounding indicators if p_sign != z_sign
2593 // the sum can result in [up to] p34 or p34 - 1 digits
2595 res
.w
[0] = res
.w
[0] - R128
.w
[0];
2596 res
.w
[1] = res
.w
[1] - R128
.w
[1];
2597 if (res
.w
[0] > tmp64
)
2598 res
.w
[1]--; // borrow
2599 // if res < 10^33 and exp > expmin need to decrease x0 and
2600 // increase scale by 1
2601 if (e3
> expmin
&& ((res
.w
[1] < 0x0000314dc6448d93ull
||
2602 (res
.w
[1] == 0x0000314dc6448d93ull
&&
2603 res
.w
[0] < 0x38c15b0a00000000ull
)) ||
2604 (is_inexact_lt_midpoint
2605 && res
.w
[1] == 0x0000314dc6448d93ull
2606 && res
.w
[0] == 0x38c15b0a00000000ull
))
2609 // first restore e3, otherwise it will be too small
2612 is_inexact_lt_midpoint
= 0;
2613 is_inexact_gt_midpoint
= 0;
2614 is_midpoint_lt_even
= 0;
2615 is_midpoint_gt_even
= 0;
2619 // else this is the result rounded with unbounded exponent;
2620 // because the result has opposite sign to that of C4 which was
2621 // rounded, need to change the rounding indicators
2622 if (is_inexact_lt_midpoint
) {
2623 is_inexact_lt_midpoint
= 0;
2624 is_inexact_gt_midpoint
= 1;
2625 } else if (is_inexact_gt_midpoint
) {
2626 is_inexact_gt_midpoint
= 0;
2627 is_inexact_lt_midpoint
= 1;
2628 } else if (lsb
== 0) {
2629 if (is_midpoint_lt_even
) {
2630 is_midpoint_lt_even
= 0;
2631 is_midpoint_gt_even
= 1;
2632 } else if (is_midpoint_gt_even
) {
2633 is_midpoint_gt_even
= 0;
2634 is_midpoint_lt_even
= 1;
2638 } else if (lsb
== 1) {
2639 if (is_midpoint_lt_even
) {
2642 if (res
.w
[0] == 0x0)
2644 // check for rounding overflow
2645 if (res
.w
[1] == 0x0001ed09bead87c0ull
&&
2646 res
.w
[0] == 0x378d8e6400000000ull
) {
2647 // res = 10^34 => rounding overflow
2648 res
.w
[1] = 0x0000314dc6448d93ull
;
2649 res
.w
[0] = 0x38c15b0a00000000ull
; // 10^33
2652 } else if (is_midpoint_gt_even
) {
2655 if (res
.w
[0] == 0xffffffffffffffffull
)
2657 // if the result is pure zero, the sign depends on the rounding
2658 // mode (x*y and z had opposite signs)
2659 if (res
.w
[1] == 0x0ull
&& res
.w
[0] == 0x0ull
) {
2660 if (rnd_mode
!= ROUNDING_DOWN
)
2661 z_sign
= 0x0000000000000000ull
;
2663 z_sign
= 0x8000000000000000ull
;
2664 // the exponent is max (e3, expmin)
2667 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
2668 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
2669 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
2670 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
2681 // check for underflow
2682 if (e3
== expmin
) { // and if significand < 10^33 => result is tiny
2683 if ((res
.w
[1] & MASK_COEFF
) < 0x0000314dc6448d93ull
||
2684 ((res
.w
[1] & MASK_COEFF
) == 0x0000314dc6448d93ull
&&
2685 res
.w
[0] < 0x38c15b0a00000000ull
)) {
2688 } else if (e3
< expmin
) {
2689 // the result is tiny, so we must truncate more of res
2692 is_inexact_lt_midpoint0
= is_inexact_lt_midpoint
;
2693 is_inexact_gt_midpoint0
= is_inexact_gt_midpoint
;
2694 is_midpoint_lt_even0
= is_midpoint_lt_even
;
2695 is_midpoint_gt_even0
= is_midpoint_gt_even
;
2696 is_inexact_lt_midpoint
= 0;
2697 is_inexact_gt_midpoint
= 0;
2698 is_midpoint_lt_even
= 0;
2699 is_midpoint_gt_even
= 0;
2700 // determine the number of decimal digits in res
2701 if (res
.w
[1] == 0x0) {
2702 // between 1 and 19 digits
2703 for (ind
= 1; ind
<= 19; ind
++) {
2704 if (res
.w
[0] < ten2k64
[ind
]) {
2709 } else if (res
.w
[1] < ten2k128
[0].w
[1] ||
2710 (res
.w
[1] == ten2k128
[0].w
[1]
2711 && res
.w
[0] < ten2k128
[0].w
[0])) {
2714 } else { // between 21 and 38 digits
2715 for (ind
= 1; ind
<= 18; ind
++) {
2716 if (res
.w
[1] < ten2k128
[ind
].w
[1] ||
2717 (res
.w
[1] == ten2k128
[ind
].w
[1] &&
2718 res
.w
[0] < ten2k128
[ind
].w
[0])) {
2726 // at this point ind >= x0; because delta >= 2 on this path, the case
2727 // ind = x0 can occur only in Case (2) or case (3), when C3 has one
2728 // digit (q3 = 1) equal to 1 (C3 = 1), e3 is expmin (e3 = expmin),
2729 // the signs of x * y and z are opposite, and through cancellation
2730 // the most significant decimal digit in res has the weight
2731 // 10^(emin-1); however, it is clear that in this case the most
2732 // significant digit is 9, so the result before rounding is
2734 // Otherwise, ind > x0 because there are non-zero decimal digits in the
2735 // result with weight of at least 10^emin, and correction for underflow
2736 // can be carried out using the round*_*_2_* () routines
2737 if (x0
== ind
) { // the result before rounding is 0.9... * 10^emin
2740 is_inexact_gt_midpoint
= 1;
2741 } else if (ind
<= 18) { // check that 2 <= ind
2742 // 2 <= ind <= 18, 1 <= x0 <= 17
2743 round64_2_18 (ind
, x0
, res
.w
[0], &R64
, &incr_exp
,
2744 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
2745 &is_inexact_lt_midpoint
,
2746 &is_inexact_gt_midpoint
);
2748 // R64 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 17
2749 R64
= ten2k64
[ind
- x0
];
2753 } else if (ind
<= 38) {
2755 P128
.w
[1] = res
.w
[1];
2756 P128
.w
[0] = res
.w
[0];
2757 round128_19_38 (ind
, x0
, P128
, &res
, &incr_exp
,
2758 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
2759 &is_inexact_lt_midpoint
,
2760 &is_inexact_gt_midpoint
);
2762 // R128 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 37
2763 if (ind
- x0
<= 19) { // 1 <= ind - x0 <= 19
2764 res
.w
[0] = ten2k64
[ind
- x0
];
2766 } else { // 20 <= ind - x0 <= 37
2767 res
.w
[0] = ten2k128
[ind
- x0
- 20].w
[0];
2768 res
.w
[1] = ten2k128
[ind
- x0
- 20].w
[1];
2772 // avoid a double rounding error
2773 if ((is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) &&
2774 is_midpoint_lt_even
) { // double rounding error upward
2777 if (res
.w
[0] == 0xffffffffffffffffull
)
2779 // Note: a double rounding error upward is not possible; for this
2780 // the result after the first rounding would have to be 99...95
2781 // (35 digits in all), possibly followed by a number of zeros; this
2782 // not possible in Cases (2)-(6) which may get here
2783 is_midpoint_lt_even
= 0;
2784 is_inexact_lt_midpoint
= 1;
2785 } else if ((is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) &&
2786 is_midpoint_gt_even
) { // double rounding error downward
2791 is_midpoint_gt_even
= 0;
2792 is_inexact_gt_midpoint
= 1;
2793 } else if (!is_midpoint_lt_even
&& !is_midpoint_gt_even
&&
2794 !is_inexact_lt_midpoint
&& !is_inexact_gt_midpoint
) {
2795 // if this second rounding was exact the result may still be
2796 // inexact because of the first rounding
2797 if (is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) {
2798 is_inexact_gt_midpoint
= 1;
2800 if (is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) {
2801 is_inexact_lt_midpoint
= 1;
2803 } else if (is_midpoint_gt_even
&&
2804 (is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
)) {
2805 // pulled up to a midpoint
2806 is_inexact_lt_midpoint
= 1;
2807 is_inexact_gt_midpoint
= 0;
2808 is_midpoint_lt_even
= 0;
2809 is_midpoint_gt_even
= 0;
2810 } else if (is_midpoint_lt_even
&&
2811 (is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
)) {
2812 // pulled down to a midpoint
2813 is_inexact_lt_midpoint
= 0;
2814 is_inexact_gt_midpoint
= 1;
2815 is_midpoint_lt_even
= 0;
2816 is_midpoint_gt_even
= 0;
2822 if (!is_midpoint_lt_even
&& !is_midpoint_gt_even
&&
2823 !is_inexact_lt_midpoint
&& !is_inexact_gt_midpoint
) {
2824 if (is_midpoint_lt_even0
|| is_midpoint_gt_even0
||
2825 is_inexact_lt_midpoint0
|| is_inexact_gt_midpoint0
) {
2826 is_inexact_lt_midpoint
= 1;
2832 // check for inexact result
2833 if (is_inexact_lt_midpoint
|| is_inexact_gt_midpoint
||
2834 is_midpoint_lt_even
|| is_midpoint_gt_even
) {
2835 // set the inexact flag
2836 *pfpsf
|= INEXACT_EXCEPTION
;
2838 *pfpsf
|= UNDERFLOW_EXCEPTION
;
2840 // now check for significand = 10^34 (may have resulted from going
2841 // back to case2_repeat)
2842 if (res
.w
[1] == 0x0001ed09bead87c0ull
&&
2843 res
.w
[0] == 0x378d8e6400000000ull
) { // if res = 10^34
2844 res
.w
[1] = 0x0000314dc6448d93ull
; // res = 10^33
2845 res
.w
[0] = 0x38c15b0a00000000ull
;
2848 res
.w
[1] = z_sign
| ((UINT64
) (e3
+ 6176) << 49) | res
.w
[1];
2849 // check for overflow
2850 if (rnd_mode
== ROUNDING_TO_NEAREST
&& e3
> expmax
) {
2851 res
.w
[1] = z_sign
| 0x7800000000000000ull
; // +/-inf
2852 res
.w
[0] = 0x0000000000000000ull
;
2853 *pfpsf
|= (INEXACT_EXCEPTION
| OVERFLOW_EXCEPTION
);
2855 if (rnd_mode
!= ROUNDING_TO_NEAREST
) {
2856 rounding_correction (rnd_mode
,
2857 is_inexact_lt_midpoint
,
2858 is_inexact_gt_midpoint
,
2859 is_midpoint_lt_even
, is_midpoint_gt_even
,
2862 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
2863 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
2864 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
2865 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
2871 // we get here only if delta <= 1 in Cases (2), (3), (4), (5), or (6) and
2872 // the signs of x*y and z are opposite; in these cases massive
2873 // cancellation can occur, so it is better to scale either C3 or C4 and
2874 // to perform the subtraction before rounding; rounding is performed
2875 // next, depending on the number of decimal digits in the result and on
2876 // the exponent value
2877 // Note: overlow is not possible in this case
2878 // this is similar to Cases (15), (16), and (17)
2880 if (delta
+ q4
< q3
) { // from Case (6)
2881 // Case (6) with 0<= delta <= 1 is similar to Cases (15), (16), and
2882 // (17) if we swap (C3, C4), (q3, q4), (e3, e4), (z_sign, p_sign)
2883 // and call add_and_round; delta stays positive
2884 // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3
2885 P128
.w
[1] = C3
.w
[1];
2886 P128
.w
[0] = C3
.w
[0];
2889 C4
.w
[1] = P128
.w
[1];
2890 C4
.w
[0] = P128
.w
[0];
2900 } else { // from Cases (2), (3), (4), (5)
2901 // In Cases (2), (3), (4), (5) with 0 <= delta <= 1 C3 has to be
2902 // scaled up by q4 + delta - q3; this is the same as in Cases (15),
2903 // (16), and (17) if we just change the sign of delta
2906 add_and_round (q3
, q4
, e4
, delta
, p34
, z_sign
, p_sign
, C3
, C4
,
2907 rnd_mode
, &is_midpoint_lt_even
,
2908 &is_midpoint_gt_even
, &is_inexact_lt_midpoint
,
2909 &is_inexact_gt_midpoint
, pfpsf
, &res
);
2910 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
2911 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
2912 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
2913 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
2919 } else { // if delta < 0
2923 if (p34
< q4
&& q4
<= delta
) { // Case (7)
2925 // truncate C4 to p34 digits into res
2926 // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68
2929 P128
.w
[1] = C4
.w
[1];
2930 P128
.w
[0] = C4
.w
[0];
2931 round128_19_38 (q4
, x0
, P128
, &res
, &incr_exp
,
2932 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
2933 &is_inexact_lt_midpoint
,
2934 &is_inexact_gt_midpoint
);
2935 } else if (q4
<= 57) { // 35 <= q4 <= 57
2936 P192
.w
[2] = C4
.w
[2];
2937 P192
.w
[1] = C4
.w
[1];
2938 P192
.w
[0] = C4
.w
[0];
2939 round192_39_57 (q4
, x0
, P192
, &R192
, &incr_exp
,
2940 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
2941 &is_inexact_lt_midpoint
,
2942 &is_inexact_gt_midpoint
);
2943 res
.w
[0] = R192
.w
[0];
2944 res
.w
[1] = R192
.w
[1];
2945 } else { // if (q4 <= 68)
2946 round256_58_76 (q4
, x0
, C4
, &R256
, &incr_exp
,
2947 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
2948 &is_inexact_lt_midpoint
,
2949 &is_inexact_gt_midpoint
);
2950 res
.w
[0] = R256
.w
[0];
2951 res
.w
[1] = R256
.w
[1];
2957 if (!is_midpoint_lt_even
&& !is_midpoint_gt_even
&&
2958 !is_inexact_lt_midpoint
&& !is_inexact_gt_midpoint
) {
2959 // if C4 rounded to p34 digits is exact then the result is inexact,
2960 // in a way that depends on the signs of x * y and z
2961 if (p_sign
== z_sign
) {
2962 is_inexact_lt_midpoint
= 1;
2963 } else { // if (p_sign != z_sign)
2964 if (res
.w
[1] != 0x0000314dc6448d93ull
||
2965 res
.w
[0] != 0x38c15b0a00000000ull
) { // res != 10^33
2966 is_inexact_gt_midpoint
= 1;
2967 } else { // res = 10^33 and exact is a special case
2968 // if C3 < 1/2 ulp then res = 10^33 and is_inexact_gt_midpoint = 1
2969 // if C3 = 1/2 ulp then res = 10^33 and is_midpoint_lt_even = 1
2970 // if C3 > 1/2 ulp then res = 10^34-1 and is_inexact_lt_midpoint = 1
2971 // Note: ulp is really ulp/10 (after borrow which propagates to msd)
2972 if (delta
> p34
+ 1) { // C3 < 1/2
2973 // res = 10^33, unchanged
2974 is_inexact_gt_midpoint
= 1;
2975 } else { // if (delta == p34 + 1)
2977 if (C3
.w
[0] < midpoint64
[q3
- 1]) { // C3 < 1/2 ulp
2978 // res = 10^33, unchanged
2979 is_inexact_gt_midpoint
= 1;
2980 } else if (C3
.w
[0] == midpoint64
[q3
- 1]) { // C3 = 1/2 ulp
2981 // res = 10^33, unchanged
2982 is_midpoint_lt_even
= 1;
2983 } else { // if (C3.w[0] > midpoint64[q3-1]), C3 > 1/2 ulp
2984 res
.w
[1] = 0x0001ed09bead87c0ull
; // 10^34 - 1
2985 res
.w
[0] = 0x378d8e63ffffffffull
;
2987 is_inexact_lt_midpoint
= 1;
2989 } else { // if (20 <= q3 <=34)
2990 if (C3
.w
[1] < midpoint128
[q3
- 20].w
[1] ||
2991 (C3
.w
[1] == midpoint128
[q3
- 20].w
[1] &&
2992 C3
.w
[0] < midpoint128
[q3
- 20].w
[0])) { // C3 < 1/2 ulp
2993 // res = 10^33, unchanged
2994 is_inexact_gt_midpoint
= 1;
2995 } else if (C3
.w
[1] == midpoint128
[q3
- 20].w
[1] &&
2996 C3
.w
[0] == midpoint128
[q3
- 20].w
[0]) { // C3 = 1/2 ulp
2997 // res = 10^33, unchanged
2998 is_midpoint_lt_even
= 1;
2999 } else { // if (C3 > midpoint128[q3-20]), C3 > 1/2 ulp
3000 res
.w
[1] = 0x0001ed09bead87c0ull
; // 10^34 - 1
3001 res
.w
[0] = 0x378d8e63ffffffffull
;
3003 is_inexact_lt_midpoint
= 1;
3009 } else if (is_midpoint_lt_even
) {
3010 if (z_sign
!= p_sign
) {
3011 // needs correction: res = res - 1
3012 res
.w
[0] = res
.w
[0] - 1;
3013 if (res
.w
[0] == 0xffffffffffffffffull
)
3015 // if it is (10^33-1)*10^e4 then the corect result is
3016 // (10^34-1)*10(e4-1)
3017 if (res
.w
[1] == 0x0000314dc6448d93ull
&&
3018 res
.w
[0] == 0x38c15b09ffffffffull
) {
3019 res
.w
[1] = 0x0001ed09bead87c0ull
; // 10^34 - 1
3020 res
.w
[0] = 0x378d8e63ffffffffull
;
3023 is_midpoint_lt_even
= 0;
3024 is_inexact_lt_midpoint
= 1;
3025 } else { // if (z_sign == p_sign)
3026 is_midpoint_lt_even
= 0;
3027 is_inexact_gt_midpoint
= 1;
3029 } else if (is_midpoint_gt_even
) {
3030 if (z_sign
== p_sign
) {
3031 // needs correction: res = res + 1 (cannot cross in the next binade)
3032 res
.w
[0] = res
.w
[0] + 1;
3033 if (res
.w
[0] == 0x0000000000000000ull
)
3035 is_midpoint_gt_even
= 0;
3036 is_inexact_gt_midpoint
= 1;
3037 } else { // if (z_sign != p_sign)
3038 is_midpoint_gt_even
= 0;
3039 is_inexact_lt_midpoint
= 1;
3042 ; // the rounded result is already correct
3044 // check for overflow
3045 if (rnd_mode
== ROUNDING_TO_NEAREST
&& e4
> expmax
) {
3046 res
.w
[1] = p_sign
| 0x7800000000000000ull
;
3047 res
.w
[0] = 0x0000000000000000ull
;
3048 *pfpsf
|= (OVERFLOW_EXCEPTION
| INEXACT_EXCEPTION
);
3049 } else { // no overflow or not RN
3050 p_exp
= ((UINT64
) (e4
+ 6176) << 49);
3051 res
.w
[1] = p_sign
| (p_exp
& MASK_EXP
) | res
.w
[1];
3053 if (rnd_mode
!= ROUNDING_TO_NEAREST
) {
3054 rounding_correction (rnd_mode
,
3055 is_inexact_lt_midpoint
,
3056 is_inexact_gt_midpoint
,
3057 is_midpoint_lt_even
, is_midpoint_gt_even
,
3060 if (is_inexact_lt_midpoint
|| is_inexact_gt_midpoint
||
3061 is_midpoint_lt_even
|| is_midpoint_gt_even
) {
3062 // set the inexact flag
3063 *pfpsf
|= INEXACT_EXCEPTION
;
3065 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
3066 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
3067 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
3068 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
3072 } else if ((q4
<= p34
&& p34
<= delta
) || // Case (8)
3073 (q4
<= delta
&& delta
< p34
&& p34
< delta
+ q3
) || // Case (9)
3074 (q4
<= delta
&& delta
+ q3
<= p34
) || // Case (10)
3075 (delta
< q4
&& q4
<= p34
&& p34
< delta
+ q3
) || // Case (13)
3076 (delta
< q4
&& q4
<= delta
+ q3
&& delta
+ q3
<= p34
) || // Case (14)
3077 (delta
+ q3
< q4
&& q4
<= p34
)) { // Case (18)
3079 // Case (8) is similar to Case (1), with C3 and C4 swapped
3080 // Case (9) is similar to Case (2), with C3 and C4 swapped
3081 // Case (10) is similar to Case (3), with C3 and C4 swapped
3082 // Case (13) is similar to Case (4), with C3 and C4 swapped
3083 // Case (14) is similar to Case (5), with C3 and C4 swapped
3084 // Case (18) is similar to Case (6), with C3 and C4 swapped
3086 // swap (C3, C4), (q3, q4), (e3, 34), (z_sign, p_sign), (z_exp, p_exp)
3087 // and go back to delta_ge_zero
3088 // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3
3089 P128
.w
[1] = C3
.w
[1];
3090 P128
.w
[0] = C3
.w
[0];
3093 C4
.w
[1] = P128
.w
[1];
3094 C4
.w
[0] = P128
.w
[0];
3109 } else if ((p34
<= delta
&& delta
< q4
&& q4
< delta
+ q3
) || // Case (11)
3110 (delta
< p34
&& p34
< q4
&& q4
< delta
+ q3
)) { // Case (12)
3112 // round C3 to nearest to q3 - x0 digits, where x0 = e4 - e3,
3113 // 1 <= x0 <= q3 - 1 <= p34 - 1
3114 x0
= e4
- e3
; // or x0 = delta + q3 - q4
3115 if (q3
<= 18) { // 2 <= q3 <= 18
3116 round64_2_18 (q3
, x0
, C3
.w
[0], &R64
, &incr_exp
,
3117 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
3118 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
);
3121 } else if (q3
<= 38) {
3122 round128_19_38 (q3
, x0
, C3
, &R128
, &incr_exp
,
3123 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
3124 &is_inexact_lt_midpoint
,
3125 &is_inexact_gt_midpoint
);
3126 C3
.w
[1] = R128
.w
[1];
3127 C3
.w
[0] = R128
.w
[0];
3129 // the rounded result has q3 - x0 digits
3130 // we want the exponent to be e4, so if incr_exp = 1 then
3131 // multiply the rounded result by 10 - it will still fit in 113 bits
3134 P128
.w
[1] = C3
.w
[1];
3135 P128
.w
[0] = C3
.w
[0];
3136 __mul_64x128_to_128 (C3
, ten2k64
[1], P128
);
3138 e3
= e3
+ x0
; // this is e4
3139 // now add/subtract the 256-bit C4 and the new (and shorter) 128-bit C3;
3140 // the result will have the sign of x * y; the exponent is e4
3143 R256
.w
[1] = C3
.w
[1];
3144 R256
.w
[0] = C3
.w
[0];
3145 if (p_sign
== z_sign
) { // R256 = C4 + R256
3146 add256 (C4
, R256
, &R256
);
3147 } else { // if (p_sign != z_sign) { // R256 = C4 - R256
3148 sub256 (C4
, R256
, &R256
); // the result cannot be pure zero
3149 // because the result has opposite sign to that of R256 which was
3150 // rounded, need to change the rounding indicators
3151 lsb
= C4
.w
[0] & 0x01;
3152 if (is_inexact_lt_midpoint
) {
3153 is_inexact_lt_midpoint
= 0;
3154 is_inexact_gt_midpoint
= 1;
3155 } else if (is_inexact_gt_midpoint
) {
3156 is_inexact_gt_midpoint
= 0;
3157 is_inexact_lt_midpoint
= 1;
3158 } else if (lsb
== 0) {
3159 if (is_midpoint_lt_even
) {
3160 is_midpoint_lt_even
= 0;
3161 is_midpoint_gt_even
= 1;
3162 } else if (is_midpoint_gt_even
) {
3163 is_midpoint_gt_even
= 0;
3164 is_midpoint_lt_even
= 1;
3168 } else if (lsb
== 1) {
3169 if (is_midpoint_lt_even
) {
3172 if (R256
.w
[0] == 0x0) {
3174 if (R256
.w
[1] == 0x0) {
3176 if (R256
.w
[2] == 0x0) {
3181 // no check for rounding overflow - R256 was a difference
3182 } else if (is_midpoint_gt_even
) {
3185 if (R256
.w
[0] == 0xffffffffffffffffull
) {
3187 if (R256
.w
[1] == 0xffffffffffffffffull
) {
3189 if (R256
.w
[2] == 0xffffffffffffffffull
) {
3201 // determine the number of decimal digits in R256
3202 ind
= nr_digits256 (R256
); // ind >= p34
3203 // if R256 is sum, then ind > p34; if R256 is a difference, then
3204 // ind >= p34; this means that we can calculate the result rounded to
3205 // the destination precision, with unbounded exponent, starting from R256
3206 // and using the indicators from the rounding of C3 to avoid a double
3211 } else if (ind
== p34
) {
3212 // the result rounded to the destination precision with
3213 // unbounded exponent
3214 // is (-1)^p_sign * R256 * 10^e4
3215 res
.w
[1] = R256
.w
[1];
3216 res
.w
[0] = R256
.w
[0];
3217 } else { // if (ind > p34)
3218 // if more than P digits, round to nearest to P digits
3219 // round R256 to p34 digits
3220 x0
= ind
- p34
; // 1 <= x0 <= 34 as 35 <= ind <= 68
3221 // save C3 rounding indicators to help avoid double rounding error
3222 is_inexact_lt_midpoint0
= is_inexact_lt_midpoint
;
3223 is_inexact_gt_midpoint0
= is_inexact_gt_midpoint
;
3224 is_midpoint_lt_even0
= is_midpoint_lt_even
;
3225 is_midpoint_gt_even0
= is_midpoint_gt_even
;
3226 // initialize rounding indicators
3227 is_inexact_lt_midpoint
= 0;
3228 is_inexact_gt_midpoint
= 0;
3229 is_midpoint_lt_even
= 0;
3230 is_midpoint_gt_even
= 0;
3231 // round to p34 digits; the result fits in 113 bits
3233 P128
.w
[1] = R256
.w
[1];
3234 P128
.w
[0] = R256
.w
[0];
3235 round128_19_38 (ind
, x0
, P128
, &R128
, &incr_exp
,
3236 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
3237 &is_inexact_lt_midpoint
,
3238 &is_inexact_gt_midpoint
);
3239 } else if (ind
<= 57) {
3240 P192
.w
[2] = R256
.w
[2];
3241 P192
.w
[1] = R256
.w
[1];
3242 P192
.w
[0] = R256
.w
[0];
3243 round192_39_57 (ind
, x0
, P192
, &R192
, &incr_exp
,
3244 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
3245 &is_inexact_lt_midpoint
,
3246 &is_inexact_gt_midpoint
);
3247 R128
.w
[1] = R192
.w
[1];
3248 R128
.w
[0] = R192
.w
[0];
3249 } else { // if (ind <= 68)
3250 round256_58_76 (ind
, x0
, R256
, &R256
, &incr_exp
,
3251 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
3252 &is_inexact_lt_midpoint
,
3253 &is_inexact_gt_midpoint
);
3254 R128
.w
[1] = R256
.w
[1];
3255 R128
.w
[0] = R256
.w
[0];
3257 // the rounded result has p34 = 34 digits
3258 e4
= e4
+ x0
+ incr_exp
;
3260 res
.w
[1] = R128
.w
[1];
3261 res
.w
[0] = R128
.w
[0];
3263 // avoid a double rounding error
3264 if ((is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) &&
3265 is_midpoint_lt_even
) { // double rounding error upward
3268 if (res
.w
[0] == 0xffffffffffffffffull
)
3270 is_midpoint_lt_even
= 0;
3271 is_inexact_lt_midpoint
= 1;
3272 // Note: a double rounding error upward is not possible; for this
3273 // the result after the first rounding would have to be 99...95
3274 // (35 digits in all), possibly followed by a number of zeros; this
3275 // not possible in Cases (2)-(6) or (15)-(17) which may get here
3276 // if this is 10^33 - 1 make it 10^34 - 1 and decrement exponent
3277 if (res
.w
[1] == 0x0000314dc6448d93ull
&&
3278 res
.w
[0] == 0x38c15b09ffffffffull
) { // 10^33 - 1
3279 res
.w
[1] = 0x0001ed09bead87c0ull
; // 10^34 - 1
3280 res
.w
[0] = 0x378d8e63ffffffffull
;
3283 } else if ((is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) &&
3284 is_midpoint_gt_even
) { // double rounding error downward
3289 is_midpoint_gt_even
= 0;
3290 is_inexact_gt_midpoint
= 1;
3291 } else if (!is_midpoint_lt_even
&& !is_midpoint_gt_even
&&
3292 !is_inexact_lt_midpoint
&& !is_inexact_gt_midpoint
) {
3293 // if this second rounding was exact the result may still be
3294 // inexact because of the first rounding
3295 if (is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) {
3296 is_inexact_gt_midpoint
= 1;
3298 if (is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) {
3299 is_inexact_lt_midpoint
= 1;
3301 } else if (is_midpoint_gt_even
&&
3302 (is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
)) {
3303 // pulled up to a midpoint
3304 is_inexact_lt_midpoint
= 1;
3305 is_inexact_gt_midpoint
= 0;
3306 is_midpoint_lt_even
= 0;
3307 is_midpoint_gt_even
= 0;
3308 } else if (is_midpoint_lt_even
&&
3309 (is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
)) {
3310 // pulled down to a midpoint
3311 is_inexact_lt_midpoint
= 0;
3312 is_inexact_gt_midpoint
= 1;
3313 is_midpoint_lt_even
= 0;
3314 is_midpoint_gt_even
= 0;
3320 // determine tininess
3321 if (rnd_mode
== ROUNDING_TO_NEAREST
) {
3323 is_tiny
= 1; // for other rounding modes apply correction
3326 // for RM, RP, RZ, RA apply correction in order to determine tininess
3327 // but do not save the result; apply the correction to
3328 // (-1)^p_sign * res * 10^0
3329 P128
.w
[1] = p_sign
| 0x3040000000000000ull
| res
.w
[1];
3330 P128
.w
[0] = res
.w
[0];
3331 rounding_correction (rnd_mode
,
3332 is_inexact_lt_midpoint
,
3333 is_inexact_gt_midpoint
,
3334 is_midpoint_lt_even
, is_midpoint_gt_even
,
3336 scale
= ((P128
.w
[1] & MASK_EXP
) >> 49) - 6176; // -1, 0, or +1
3337 // the number of digits in the significand is p34 = 34
3338 if (e4
+ scale
< expmin
) {
3343 // the result rounded to the destination precision with unbounded exponent
3344 // is (-1)^p_sign * res * 10^e4
3345 res
.w
[1] = p_sign
| ((UINT64
) (e4
+ 6176) << 49) | res
.w
[1]; // RN
3346 // res.w[0] unchanged;
3347 // Note: res is correct only if expmin <= e4 <= expmax
3348 ind
= p34
; // the number of decimal digits in the signifcand of res
3350 // at this point we have the result rounded with unbounded exponent in
3351 // res and we know its tininess:
3352 // res = (-1)^p_sign * significand * 10^e4,
3353 // where q (significand) = ind = p34
3354 // Note: res is correct only if expmin <= e4 <= expmax
3356 // check for overflow if RN
3357 if (rnd_mode
== ROUNDING_TO_NEAREST
3358 && (ind
+ e4
) > (p34
+ expmax
)) {
3359 res
.w
[1] = p_sign
| 0x7800000000000000ull
;
3360 res
.w
[0] = 0x0000000000000000ull
;
3361 *pfpsf
|= (INEXACT_EXCEPTION
| OVERFLOW_EXCEPTION
);
3362 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
3363 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
3364 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
3365 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
3368 } // else not overflow or not RN, so continue
3370 // from this point on this is similar to the last part of the computation
3371 // for Cases (15), (16), (17)
3373 // if (e4 >= expmin) we have the result rounded with bounded exponent
3375 x0
= expmin
- e4
; // x0 >= 1; the number of digits to chop off of res
3376 // where the result rounded [at most] once is
3377 // (-1)^p_sign * significand_res * 10^e4
3379 // avoid double rounding error
3380 is_inexact_lt_midpoint0
= is_inexact_lt_midpoint
;
3381 is_inexact_gt_midpoint0
= is_inexact_gt_midpoint
;
3382 is_midpoint_lt_even0
= is_midpoint_lt_even
;
3383 is_midpoint_gt_even0
= is_midpoint_gt_even
;
3384 is_inexact_lt_midpoint
= 0;
3385 is_inexact_gt_midpoint
= 0;
3386 is_midpoint_lt_even
= 0;
3387 is_midpoint_gt_even
= 0;
3390 // nothing is left of res when moving the decimal point left x0 digits
3391 is_inexact_lt_midpoint
= 1;
3392 res
.w
[1] = p_sign
| 0x0000000000000000ull
;
3393 res
.w
[0] = 0x0000000000000000ull
;
3395 } else if (x0
== ind
) { // 1 <= x0 = ind <= p34 = 34
3396 // this is <, =, or > 1/2 ulp
3397 // compare the ind-digit value in the significand of res with
3398 // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is
3399 // less than, equal to, or greater than 1/2 ulp (significand of res)
3400 R128
.w
[1] = res
.w
[1] & MASK_COEFF
;
3401 R128
.w
[0] = res
.w
[0];
3403 if (R128
.w
[0] < midpoint64
[ind
- 1]) { // < 1/2 ulp
3405 is_inexact_lt_midpoint
= 1;
3406 } else if (R128
.w
[0] == midpoint64
[ind
- 1]) { // = 1/2 ulp
3408 is_midpoint_gt_even
= 1;
3409 } else { // > 1/2 ulp
3411 is_inexact_gt_midpoint
= 1;
3413 } else { // if (ind <= 38)
3414 if (R128
.w
[1] < midpoint128
[ind
- 20].w
[1] ||
3415 (R128
.w
[1] == midpoint128
[ind
- 20].w
[1] &&
3416 R128
.w
[0] < midpoint128
[ind
- 20].w
[0])) { // < 1/2 ulp
3418 is_inexact_lt_midpoint
= 1;
3419 } else if (R128
.w
[1] == midpoint128
[ind
- 20].w
[1] &&
3420 R128
.w
[0] == midpoint128
[ind
- 20].w
[0]) { // = 1/2 ulp
3422 is_midpoint_gt_even
= 1;
3423 } else { // > 1/2 ulp
3425 is_inexact_gt_midpoint
= 1;
3428 if (lt_half_ulp
|| eq_half_ulp
) {
3429 // res = +0.0 * 10^expmin
3430 res
.w
[1] = 0x0000000000000000ull
;
3431 res
.w
[0] = 0x0000000000000000ull
;
3432 } else { // if (gt_half_ulp)
3433 // res = +1 * 10^expmin
3434 res
.w
[1] = 0x0000000000000000ull
;
3435 res
.w
[0] = 0x0000000000000001ull
;
3437 res
.w
[1] = p_sign
| res
.w
[1];
3439 } else { // if (1 <= x0 <= ind - 1 <= 33)
3440 // round the ind-digit result to ind - x0 digits
3442 if (ind
<= 18) { // 2 <= ind <= 18
3443 round64_2_18 (ind
, x0
, res
.w
[0], &R64
, &incr_exp
,
3444 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
3445 &is_inexact_lt_midpoint
,
3446 &is_inexact_gt_midpoint
);
3449 } else if (ind
<= 38) {
3450 P128
.w
[1] = res
.w
[1] & MASK_COEFF
;
3451 P128
.w
[0] = res
.w
[0];
3452 round128_19_38 (ind
, x0
, P128
, &res
, &incr_exp
,
3453 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
3454 &is_inexact_lt_midpoint
,
3455 &is_inexact_gt_midpoint
);
3457 e4
= e4
+ x0
; // expmin
3458 // we want the exponent to be expmin, so if incr_exp = 1 then
3459 // multiply the rounded result by 10 - it will still fit in 113 bits
3462 P128
.w
[1] = res
.w
[1] & MASK_COEFF
;
3463 P128
.w
[0] = res
.w
[0];
3464 __mul_64x128_to_128 (res
, ten2k64
[1], P128
);
3467 p_sign
| ((UINT64
) (e4
+ 6176) << 49) | (res
.
3469 // avoid a double rounding error
3470 if ((is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) &&
3471 is_midpoint_lt_even
) { // double rounding error upward
3474 if (res
.w
[0] == 0xffffffffffffffffull
)
3476 // Note: a double rounding error upward is not possible; for this
3477 // the result after the first rounding would have to be 99...95
3478 // (35 digits in all), possibly followed by a number of zeros; this
3479 // not possible in this underflow case
3480 is_midpoint_lt_even
= 0;
3481 is_inexact_lt_midpoint
= 1;
3482 } else if ((is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) &&
3483 is_midpoint_gt_even
) { // double rounding error downward
3488 is_midpoint_gt_even
= 0;
3489 is_inexact_gt_midpoint
= 1;
3490 } else if (!is_midpoint_lt_even
&& !is_midpoint_gt_even
&&
3491 !is_inexact_lt_midpoint
3492 && !is_inexact_gt_midpoint
) {
3493 // if this second rounding was exact the result may still be
3494 // inexact because of the first rounding
3495 if (is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) {
3496 is_inexact_gt_midpoint
= 1;
3498 if (is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) {
3499 is_inexact_lt_midpoint
= 1;
3501 } else if (is_midpoint_gt_even
&&
3502 (is_inexact_gt_midpoint0
3503 || is_midpoint_lt_even0
)) {
3504 // pulled up to a midpoint
3505 is_inexact_lt_midpoint
= 1;
3506 is_inexact_gt_midpoint
= 0;
3507 is_midpoint_lt_even
= 0;
3508 is_midpoint_gt_even
= 0;
3509 } else if (is_midpoint_lt_even
&&
3510 (is_inexact_lt_midpoint0
3511 || is_midpoint_gt_even0
)) {
3512 // pulled down to a midpoint
3513 is_inexact_lt_midpoint
= 0;
3514 is_inexact_gt_midpoint
= 1;
3515 is_midpoint_lt_even
= 0;
3516 is_midpoint_gt_even
= 0;
3522 // res contains the correct result
3523 // apply correction if not rounding to nearest
3524 if (rnd_mode
!= ROUNDING_TO_NEAREST
) {
3525 rounding_correction (rnd_mode
,
3526 is_inexact_lt_midpoint
,
3527 is_inexact_gt_midpoint
,
3528 is_midpoint_lt_even
, is_midpoint_gt_even
,
3531 if (is_midpoint_lt_even
|| is_midpoint_gt_even
||
3532 is_inexact_lt_midpoint
|| is_inexact_gt_midpoint
) {
3533 // set the inexact flag
3534 *pfpsf
|= INEXACT_EXCEPTION
;
3536 *pfpsf
|= UNDERFLOW_EXCEPTION
;
3538 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
3539 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
3540 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
3541 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
3545 } else if ((p34
<= delta
&& delta
+ q3
<= q4
) || // Case (15)
3546 (delta
< p34
&& p34
< delta
+ q3
&& delta
+ q3
<= q4
) || //Case (16)
3547 (delta
+ q3
<= p34
&& p34
< q4
)) { // Case (17)
3549 // calculate first the result rounded to the destination precision, with
3550 // unbounded exponent
3552 add_and_round (q3
, q4
, e4
, delta
, p34
, z_sign
, p_sign
, C3
, C4
,
3553 rnd_mode
, &is_midpoint_lt_even
,
3554 &is_midpoint_gt_even
, &is_inexact_lt_midpoint
,
3555 &is_inexact_gt_midpoint
, pfpsf
, &res
);
3556 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
3557 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
3558 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
3559 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
3567 } // end if delta < 0
3569 *ptr_is_midpoint_lt_even
= is_midpoint_lt_even
;
3570 *ptr_is_midpoint_gt_even
= is_midpoint_gt_even
;
3571 *ptr_is_inexact_lt_midpoint
= is_inexact_lt_midpoint
;
3572 *ptr_is_inexact_gt_midpoint
= is_inexact_gt_midpoint
;
3579 #if DECIMAL_CALL_BY_REFERENCE
3581 bid128_fma (UINT128
* pres
, UINT128
* px
, UINT128
* py
, UINT128
* pz
3582 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3584 UINT128 x
= *px
, y
= *py
, z
= *pz
;
3585 #if !DECIMAL_GLOBAL_ROUNDING
3586 unsigned int rnd_mode
= *prnd_mode
;
3590 bid128_fma (UINT128 x
, UINT128 y
, UINT128 z
3591 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3594 int is_midpoint_lt_even
, is_midpoint_gt_even
,
3595 is_inexact_lt_midpoint
, is_inexact_gt_midpoint
;
3596 UINT128 res
= { {0xbaddbaddbaddbaddull
, 0xbaddbaddbaddbaddull
} };
3598 #if DECIMAL_CALL_BY_REFERENCE
3599 bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3600 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
,
3602 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3605 res
= bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3606 &is_inexact_lt_midpoint
,
3607 &is_inexact_gt_midpoint
, x
, y
,
3608 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3615 #if DECIMAL_CALL_BY_REFERENCE
3617 bid128ddd_fma (UINT128
* pres
, UINT64
* px
, UINT64
* py
, UINT64
* pz
3618 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3620 UINT64 x
= *px
, y
= *py
, z
= *pz
;
3621 #if !DECIMAL_GLOBAL_ROUNDING
3622 unsigned int rnd_mode
= *prnd_mode
;
3626 bid128ddd_fma (UINT64 x
, UINT64 y
, UINT64 z
3627 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3630 int is_midpoint_lt_even
= 0, is_midpoint_gt_even
= 0,
3631 is_inexact_lt_midpoint
= 0, is_inexact_gt_midpoint
= 0;
3632 UINT128 res
= { {0xbaddbaddbaddbaddull
, 0xbaddbaddbaddbaddull
} };
3635 #if DECIMAL_CALL_BY_REFERENCE
3636 bid64_to_bid128 (&x1
, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3637 bid64_to_bid128 (&y1
, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3638 bid64_to_bid128 (&z1
, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3639 bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3640 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
,
3642 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3645 x1
= bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3646 y1
= bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3647 z1
= bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3648 res
= bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3649 &is_inexact_lt_midpoint
,
3650 &is_inexact_gt_midpoint
, x1
, y1
,
3651 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3658 #if DECIMAL_CALL_BY_REFERENCE
3660 bid128ddq_fma (UINT128
* pres
, UINT64
* px
, UINT64
* py
, UINT128
* pz
3661 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3663 UINT64 x
= *px
, y
= *py
;
3665 #if !DECIMAL_GLOBAL_ROUNDING
3666 unsigned int rnd_mode
= *prnd_mode
;
3670 bid128ddq_fma (UINT64 x
, UINT64 y
, UINT128 z
3671 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3674 int is_midpoint_lt_even
= 0, is_midpoint_gt_even
= 0,
3675 is_inexact_lt_midpoint
= 0, is_inexact_gt_midpoint
= 0;
3676 UINT128 res
= { {0xbaddbaddbaddbaddull
, 0xbaddbaddbaddbaddull
} };
3679 #if DECIMAL_CALL_BY_REFERENCE
3680 bid64_to_bid128 (&x1
, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3681 bid64_to_bid128 (&y1
, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3682 bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3683 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
,
3685 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3688 x1
= bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3689 y1
= bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3690 res
= bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3691 &is_inexact_lt_midpoint
,
3692 &is_inexact_gt_midpoint
, x1
, y1
,
3693 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3700 #if DECIMAL_CALL_BY_REFERENCE
3702 bid128dqd_fma (UINT128
* pres
, UINT64
* px
, UINT128
* py
, UINT64
* pz
3703 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3705 UINT64 x
= *px
, z
= *pz
;
3706 #if !DECIMAL_GLOBAL_ROUNDING
3707 unsigned int rnd_mode
= *prnd_mode
;
3711 bid128dqd_fma (UINT64 x
, UINT128 y
, UINT64 z
3712 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3715 int is_midpoint_lt_even
= 0, is_midpoint_gt_even
= 0,
3716 is_inexact_lt_midpoint
= 0, is_inexact_gt_midpoint
= 0;
3717 UINT128 res
= { {0xbaddbaddbaddbaddull
, 0xbaddbaddbaddbaddull
} };
3720 #if DECIMAL_CALL_BY_REFERENCE
3721 bid64_to_bid128 (&x1
, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3722 bid64_to_bid128 (&z1
, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3723 bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3724 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
,
3726 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3729 x1
= bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3730 z1
= bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3731 res
= bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3732 &is_inexact_lt_midpoint
,
3733 &is_inexact_gt_midpoint
, x1
, y
,
3734 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3741 #if DECIMAL_CALL_BY_REFERENCE
3743 bid128dqq_fma (UINT128
* pres
, UINT64
* px
, UINT128
* py
, UINT128
* pz
3744 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3747 #if !DECIMAL_GLOBAL_ROUNDING
3748 unsigned int rnd_mode
= *prnd_mode
;
3752 bid128dqq_fma (UINT64 x
, UINT128 y
, UINT128 z
3753 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3756 int is_midpoint_lt_even
= 0, is_midpoint_gt_even
= 0,
3757 is_inexact_lt_midpoint
= 0, is_inexact_gt_midpoint
= 0;
3758 UINT128 res
= { {0xbaddbaddbaddbaddull
, 0xbaddbaddbaddbaddull
} };
3761 #if DECIMAL_CALL_BY_REFERENCE
3762 bid64_to_bid128 (&x1
, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3763 bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3764 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
,
3766 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3769 x1
= bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3770 res
= bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3771 &is_inexact_lt_midpoint
,
3772 &is_inexact_gt_midpoint
, x1
, y
,
3773 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3780 #if DECIMAL_CALL_BY_REFERENCE
3782 bid128qdd_fma (UINT128
* pres
, UINT128
* px
, UINT64
* py
, UINT64
* pz
3783 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3785 UINT64 y
= *py
, z
= *pz
;
3786 #if !DECIMAL_GLOBAL_ROUNDING
3787 unsigned int rnd_mode
= *prnd_mode
;
3791 bid128qdd_fma (UINT128 x
, UINT64 y
, UINT64 z
3792 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3795 int is_midpoint_lt_even
= 0, is_midpoint_gt_even
= 0,
3796 is_inexact_lt_midpoint
= 0, is_inexact_gt_midpoint
= 0;
3797 UINT128 res
= { {0xbaddbaddbaddbaddull
, 0xbaddbaddbaddbaddull
} };
3800 #if DECIMAL_CALL_BY_REFERENCE
3801 bid64_to_bid128 (&y1
, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3802 bid64_to_bid128 (&z1
, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3803 bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3804 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
,
3806 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3809 y1
= bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3810 z1
= bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3811 res
= bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3812 &is_inexact_lt_midpoint
,
3813 &is_inexact_gt_midpoint
, x
, y1
,
3814 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3821 #if DECIMAL_CALL_BY_REFERENCE
3823 bid128qdq_fma (UINT128
* pres
, UINT128
* px
, UINT64
* py
, UINT128
* pz
3824 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3827 #if !DECIMAL_GLOBAL_ROUNDING
3828 unsigned int rnd_mode
= *prnd_mode
;
3832 bid128qdq_fma (UINT128 x
, UINT64 y
, UINT128 z
3833 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3836 int is_midpoint_lt_even
= 0, is_midpoint_gt_even
= 0,
3837 is_inexact_lt_midpoint
= 0, is_inexact_gt_midpoint
= 0;
3838 UINT128 res
= { {0xbaddbaddbaddbaddull
, 0xbaddbaddbaddbaddull
} };
3841 #if DECIMAL_CALL_BY_REFERENCE
3842 bid64_to_bid128 (&y1
, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3843 bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3844 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
,
3846 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3849 y1
= bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3850 res
= bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3851 &is_inexact_lt_midpoint
,
3852 &is_inexact_gt_midpoint
, x
, y1
,
3853 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3860 #if DECIMAL_CALL_BY_REFERENCE
3862 bid128qqd_fma (UINT128
* pres
, UINT128
* px
, UINT128
* py
, UINT64
* pz
3863 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3866 #if !DECIMAL_GLOBAL_ROUNDING
3867 unsigned int rnd_mode
= *prnd_mode
;
3871 bid128qqd_fma (UINT128 x
, UINT128 y
, UINT64 z
3872 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3875 int is_midpoint_lt_even
= 0, is_midpoint_gt_even
= 0,
3876 is_inexact_lt_midpoint
= 0, is_inexact_gt_midpoint
= 0;
3877 UINT128 res
= { {0xbaddbaddbaddbaddull
, 0xbaddbaddbaddbaddull
} };
3880 #if DECIMAL_CALL_BY_REFERENCE
3881 bid64_to_bid128 (&z1
, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3882 bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3883 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
,
3885 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3888 z1
= bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3889 res
= bid128_ext_fma (&is_midpoint_lt_even
, &is_midpoint_gt_even
,
3890 &is_inexact_lt_midpoint
,
3891 &is_inexact_gt_midpoint
, x
, y
,
3892 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3898 // Note: bid128qqq_fma is represented by bid128_fma
3900 // Note: bid64ddd_fma is represented by bid64_fma
3902 #if DECIMAL_CALL_BY_REFERENCE
3904 bid64ddq_fma (UINT64
* pres
, UINT64
* px
, UINT64
* py
, UINT128
* pz
3905 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3907 UINT64 x
= *px
, y
= *py
;
3908 #if !DECIMAL_GLOBAL_ROUNDING
3909 unsigned int rnd_mode
= *prnd_mode
;
3913 bid64ddq_fma (UINT64 x
, UINT64 y
, UINT128 z
3914 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3917 UINT64 res1
= 0xbaddbaddbaddbaddull
;
3920 #if DECIMAL_CALL_BY_REFERENCE
3921 bid64_to_bid128 (&x1
, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3922 bid64_to_bid128 (&y1
, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3923 bid64qqq_fma (&res1
, &x1
, &y1
, pz
3924 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3927 x1
= bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3928 y1
= bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3929 res1
= bid64qqq_fma (x1
, y1
, z
3930 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3937 #if DECIMAL_CALL_BY_REFERENCE
3939 bid64dqd_fma (UINT64
* pres
, UINT64
* px
, UINT128
* py
, UINT64
* pz
3940 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3942 UINT64 x
= *px
, z
= *pz
;
3943 #if !DECIMAL_GLOBAL_ROUNDING
3944 unsigned int rnd_mode
= *prnd_mode
;
3948 bid64dqd_fma (UINT64 x
, UINT128 y
, UINT64 z
3949 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3952 UINT64 res1
= 0xbaddbaddbaddbaddull
;
3955 #if DECIMAL_CALL_BY_REFERENCE
3956 bid64_to_bid128 (&x1
, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3957 bid64_to_bid128 (&z1
, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3958 bid64qqq_fma (&res1
, &x1
, py
, &z1
3959 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3962 x1
= bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3963 z1
= bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3964 res1
= bid64qqq_fma (x1
, y
, z1
3965 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3972 #if DECIMAL_CALL_BY_REFERENCE
3974 bid64dqq_fma (UINT64
* pres
, UINT64
* px
, UINT128
* py
, UINT128
* pz
3975 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3978 #if !DECIMAL_GLOBAL_ROUNDING
3979 unsigned int rnd_mode
= *prnd_mode
;
3983 bid64dqq_fma (UINT64 x
, UINT128 y
, UINT128 z
3984 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3987 UINT64 res1
= 0xbaddbaddbaddbaddull
;
3990 #if DECIMAL_CALL_BY_REFERENCE
3991 bid64_to_bid128 (&x1
, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3992 bid64qqq_fma (&res1
, &x1
, py
, pz
3993 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3996 x1
= bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
3997 res1
= bid64qqq_fma (x1
, y
, z
3998 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4005 #if DECIMAL_CALL_BY_REFERENCE
4007 bid64qdd_fma (UINT64
* pres
, UINT128
* px
, UINT64
* py
, UINT64
* pz
4008 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4010 UINT64 y
= *py
, z
= *pz
;
4011 #if !DECIMAL_GLOBAL_ROUNDING
4012 unsigned int rnd_mode
= *prnd_mode
;
4016 bid64qdd_fma (UINT128 x
, UINT64 y
, UINT64 z
4017 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4020 UINT64 res1
= 0xbaddbaddbaddbaddull
;
4023 #if DECIMAL_CALL_BY_REFERENCE
4024 bid64_to_bid128 (&y1
, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
4025 bid64_to_bid128 (&z1
, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
4026 bid64qqq_fma (&res1
, px
, &y1
, &z1
4027 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4030 y1
= bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
4031 z1
= bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
4032 res1
= bid64qqq_fma (x
, y1
, z1
4033 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4040 #if DECIMAL_CALL_BY_REFERENCE
4042 bid64qdq_fma (UINT64
* pres
, UINT128
* px
, UINT64
* py
, UINT128
* pz
4043 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4046 #if !DECIMAL_GLOBAL_ROUNDING
4047 unsigned int rnd_mode
= *prnd_mode
;
4051 bid64qdq_fma (UINT128 x
, UINT64 y
, UINT128 z
4052 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4055 UINT64 res1
= 0xbaddbaddbaddbaddull
;
4058 #if DECIMAL_CALL_BY_REFERENCE
4059 bid64_to_bid128 (&y1
, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
4060 bid64qqq_fma (&res1
, px
, &y1
, pz
4061 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4064 y1
= bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
4065 res1
= bid64qqq_fma (x
, y1
, z
4066 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4073 #if DECIMAL_CALL_BY_REFERENCE
4075 bid64qqd_fma (UINT64
* pres
, UINT128
* px
, UINT128
* py
, UINT64
* pz
4076 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4079 #if !DECIMAL_GLOBAL_ROUNDING
4080 unsigned int rnd_mode
= *prnd_mode
;
4084 bid64qqd_fma (UINT128 x
, UINT128 y
, UINT64 z
4085 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4088 UINT64 res1
= 0xbaddbaddbaddbaddull
;
4091 #if DECIMAL_CALL_BY_REFERENCE
4092 bid64_to_bid128 (&z1
, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
4093 bid64qqq_fma (&res1
, px
, py
, &z1
4094 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4097 z1
= bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG
);
4098 res1
= bid64qqq_fma (x
, y
, z1
4099 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4106 #if DECIMAL_CALL_BY_REFERENCE
4108 bid64qqq_fma (UINT64
* pres
, UINT128
* px
, UINT128
* py
, UINT128
* pz
4109 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4111 UINT128 x
= *px
, y
= *py
, z
= *pz
;
4112 #if !DECIMAL_GLOBAL_ROUNDING
4113 unsigned int rnd_mode
= *prnd_mode
;
4117 bid64qqq_fma (UINT128 x
, UINT128 y
, UINT128 z
4118 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4121 int is_midpoint_lt_even0
= 0, is_midpoint_gt_even0
= 0,
4122 is_inexact_lt_midpoint0
= 0, is_inexact_gt_midpoint0
= 0;
4123 int is_midpoint_lt_even
= 0, is_midpoint_gt_even
= 0,
4124 is_inexact_lt_midpoint
= 0, is_inexact_gt_midpoint
= 0;
4126 UINT128 res
= { {0xbaddbaddbaddbaddull
, 0xbaddbaddbaddbaddull
} };
4127 UINT128 res128
= { {0xbaddbaddbaddbaddull
, 0xbaddbaddbaddbaddull
} };
4128 UINT64 res1
= 0xbaddbaddbaddbaddull
;
4129 unsigned int save_fpsf
; // needed because of the call to bid128_ext_fma
4138 int lt_half_ulp
= 0, eq_half_ulp
= 0;
4140 // Note: for rounding modes other than RN or RA, the result can be obtained
4141 // by rounding first to BID128 and then to BID64
4143 save_fpsf
= *pfpsf
; // sticky bits - caller value must be preserved
4146 #if DECIMAL_CALL_BY_REFERENCE
4147 bid128_ext_fma (&is_midpoint_lt_even0
, &is_midpoint_gt_even0
,
4148 &is_inexact_lt_midpoint0
, &is_inexact_gt_midpoint0
,
4150 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4153 res
= bid128_ext_fma (&is_midpoint_lt_even0
, &is_midpoint_gt_even0
,
4154 &is_inexact_lt_midpoint0
,
4155 &is_inexact_gt_midpoint0
, x
, y
,
4156 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4160 if ((rnd_mode
== ROUNDING_DOWN
) || (rnd_mode
== ROUNDING_UP
) ||
4161 (rnd_mode
== ROUNDING_TO_ZERO
) || // no double rounding error is possible
4162 ((res
.w
[HIGH_128W
] & MASK_NAN
) == MASK_NAN
) || //res=QNaN (cannot be SNaN)
4163 ((res
.w
[HIGH_128W
] & MASK_ANY_INF
) == MASK_INF
)) { // result is infinity
4164 #if DECIMAL_CALL_BY_REFERENCE
4165 bid128_to_bid64 (&res1
, &res _RND_MODE_ARG _EXC_FLAGS_ARG
);
4167 res1
= bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG
);
4169 // determine the unbiased exponent of the result
4170 unbexp
= ((res1
>> 53) & 0x3ff) - 398;
4172 // if subnormal, res1 must have exp = -398
4173 // if tiny and inexact set underflow and inexact status flags
4174 if (!((res1
& MASK_NAN
) == MASK_NAN
) && // res1 not NaN
4176 && ((res1
& MASK_BINARY_SIG1
) < 1000000000000000ull)
4177 && (is_inexact_lt_midpoint0
|| is_inexact_gt_midpoint0
4178 || is_midpoint_lt_even0
|| is_midpoint_gt_even0
)) {
4179 // set the inexact flag and the underflow flag
4180 *pfpsf
|= (INEXACT_EXCEPTION
| UNDERFLOW_EXCEPTION
);
4181 } else if (is_inexact_lt_midpoint0
|| is_inexact_gt_midpoint0
||
4182 is_midpoint_lt_even0
|| is_midpoint_gt_even0
) {
4183 // set the inexact flag and the underflow flag
4184 *pfpsf
|= INEXACT_EXCEPTION
;
4187 *pfpsf
|= save_fpsf
;
4189 } // else continue, and use rounding to nearest to round to 16 digits
4191 // at this point the result is rounded to nearest (even or away) to 34 digits
4192 // (or less if exact), and it is zero or finite non-zero canonical [sub]normal
4193 sign
= res
.w
[HIGH_128W
] & MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
4194 exp
= res
.w
[HIGH_128W
] & MASK_EXP
; // biased and shifted left 49 bits
4195 unbexp
= (exp
>> 49) - 6176;
4196 C
.w
[1] = res
.w
[HIGH_128W
] & MASK_COEFF
;
4197 C
.w
[0] = res
.w
[LOW_128W
];
4199 if ((C
.w
[1] == 0x0 && C
.w
[0] == 0x0) || // result is zero
4200 (unbexp
<= (-398 - 35)) || (unbexp
>= (369 + 16))) {
4201 // clear under/overflow
4202 #if DECIMAL_CALL_BY_REFERENCE
4203 bid128_to_bid64 (&res1
, &res _RND_MODE_ARG _EXC_FLAGS_ARG
);
4205 res1
= bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG
);
4207 *pfpsf
|= save_fpsf
;
4211 // -398 - 34 <= unbexp <= 369 + 15
4212 if (rnd_mode
== ROUNDING_TIES_AWAY
) {
4213 // apply correction, if needed, to make the result rounded to nearest-even
4214 if (is_midpoint_gt_even
) {
4216 res1
--; // res1 is now even
4217 } // else the result is already correctly rounded to nearest-even
4219 // at this point the result is finite, non-zero canonical normal or subnormal,
4220 // and in most cases overflow or underflow will not occur
4222 // determine the number of digits q in the result
4223 // q = nr. of decimal digits in x
4224 // determine first the nr. of bits in x
4226 if (C
.w
[0] >= 0x0020000000000000ull
) { // x >= 2^53
4227 // split the 64-bit value in two 32-bit halves to avoid rounding errors
4228 if (C
.w
[0] >= 0x0000000100000000ull
) { // x >= 2^32
4229 tmp
.d
= (double) (C
.w
[0] >> 32); // exact conversion
4231 33 + ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
4232 } else { // x < 2^32
4233 tmp
.d
= (double) (C
.w
[0]); // exact conversion
4235 1 + ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
4237 } else { // if x < 2^53
4238 tmp
.d
= (double) C
.w
[0]; // exact conversion
4240 1 + ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
4242 } else { // C.w[1] != 0 => nr. bits = 64 + nr_bits (C.w[1])
4243 tmp
.d
= (double) C
.w
[1]; // exact conversion
4245 65 + ((((unsigned int) (tmp
.ui64
>> 52)) & 0x7ff) - 0x3ff);
4247 q
= nr_digits
[nr_bits
- 1].digits
;
4249 q
= nr_digits
[nr_bits
- 1].digits1
;
4250 if (C
.w
[1] > nr_digits
[nr_bits
- 1].threshold_hi
||
4251 (C
.w
[1] == nr_digits
[nr_bits
- 1].threshold_hi
&&
4252 C
.w
[0] >= nr_digits
[nr_bits
- 1].threshold_lo
))
4255 // if q > 16, round to nearest even to 16 digits (but for underflow it may
4256 // have to be truncated even more)
4260 round64_2_18 (q
, x0
, C
.w
[0], &res1
, &incr_exp
,
4261 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
4262 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
);
4263 } else { // 19 <= q <= 34
4264 round128_19_38 (q
, x0
, C
, &res128
, &incr_exp
,
4265 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
4266 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
);
4267 res1
= res128
.w
[0]; // the result fits in 64 bits
4269 unbexp
= unbexp
+ x0
;
4272 q
= 16; // need to set in case denormalization is necessary
4274 // the result does not require a second rounding (and it must have
4275 // been exact in the first rounding, since q <= 16)
4279 // avoid a double rounding error
4280 if ((is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) &&
4281 is_midpoint_lt_even
) { // double rounding error upward
4283 res1
--; // res1 becomes odd
4284 is_midpoint_lt_even
= 0;
4285 is_inexact_lt_midpoint
= 1;
4286 if (res1
== 0x00038d7ea4c67fffull
) { // 10^15 - 1
4287 res1
= 0x002386f26fc0ffffull
; // 10^16 - 1
4290 } else if ((is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) &&
4291 is_midpoint_gt_even
) { // double rounding error downward
4293 res1
++; // res1 becomes odd (so it cannot be 10^16)
4294 is_midpoint_gt_even
= 0;
4295 is_inexact_gt_midpoint
= 1;
4296 } else if (!is_midpoint_lt_even
&& !is_midpoint_gt_even
&&
4297 !is_inexact_lt_midpoint
&& !is_inexact_gt_midpoint
) {
4298 // if this second rounding was exact the result may still be
4299 // inexact because of the first rounding
4300 if (is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) {
4301 is_inexact_gt_midpoint
= 1;
4303 if (is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) {
4304 is_inexact_lt_midpoint
= 1;
4306 } else if (is_midpoint_gt_even
&&
4307 (is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
)) {
4308 // pulled up to a midpoint
4309 is_inexact_lt_midpoint
= 1;
4310 is_inexact_gt_midpoint
= 0;
4311 is_midpoint_lt_even
= 0;
4312 is_midpoint_gt_even
= 0;
4313 } else if (is_midpoint_lt_even
&&
4314 (is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
)) {
4315 // pulled down to a midpoint
4316 is_inexact_lt_midpoint
= 0;
4317 is_inexact_gt_midpoint
= 1;
4318 is_midpoint_lt_even
= 0;
4319 is_midpoint_gt_even
= 0;
4323 // this is the result rounded correctly to nearest even, with unbounded exp.
4325 // check for overflow
4326 if (q
+ unbexp
> P16
+ expmax16
) {
4327 res1
= sign
| 0x7800000000000000ull
;
4328 *pfpsf
|= (INEXACT_EXCEPTION
| OVERFLOW_EXCEPTION
);
4329 *pfpsf
|= save_fpsf
;
4331 } else if (unbexp
> expmax16
) { // q + unbexp <= P16 + expmax16
4332 // not overflow; the result must be exact, and we can multiply res1 by
4333 // 10^(unbexp - expmax16) and the product will fit in 16 decimal digits
4334 scale
= unbexp
- expmax16
;
4335 res1
= res1
* ten2k64
[scale
]; // res1 * 10^scale
4336 unbexp
= expmax16
; // unbexp - scale
4341 // check for underflow
4342 if (q
+ unbexp
< P16
+ expmin16
) {
4343 if (unbexp
< expmin16
) {
4344 // we must truncate more of res
4345 x0
= expmin16
- unbexp
; // x0 >= 1
4346 is_inexact_lt_midpoint0
= is_inexact_lt_midpoint
;
4347 is_inexact_gt_midpoint0
= is_inexact_gt_midpoint
;
4348 is_midpoint_lt_even0
= is_midpoint_lt_even
;
4349 is_midpoint_gt_even0
= is_midpoint_gt_even
;
4350 is_inexact_lt_midpoint
= 0;
4351 is_inexact_gt_midpoint
= 0;
4352 is_midpoint_lt_even
= 0;
4353 is_midpoint_gt_even
= 0;
4354 // the number of decimal digits in res1 is q
4355 if (x0
< q
) { // 1 <= x0 <= q-1 => round res to q - x0 digits
4356 // 2 <= q <= 16, 1 <= x0 <= 15
4357 round64_2_18 (q
, x0
, res1
, &res1
, &incr_exp
,
4358 &is_midpoint_lt_even
, &is_midpoint_gt_even
,
4359 &is_inexact_lt_midpoint
, &is_inexact_gt_midpoint
);
4361 // res1 = 10^(q-x0), 1 <= q - x0 <= q - 1, 1 <= q - x0 <= 15
4362 res1
= ten2k64
[q
- x0
];
4364 unbexp
= unbexp
+ x0
; // expmin16
4365 } else if (x0
== q
) {
4366 // the second rounding is for 0.d(0)d(1)...d(q-1) * 10^emin
4367 // determine relationship with 1/2 ulp
4369 if (res1
< midpoint64
[q
- 1]) { // < 1/2 ulp
4371 is_inexact_lt_midpoint
= 1;
4372 } else if (res1
== midpoint64
[q
- 1]) { // = 1/2 ulp
4374 is_midpoint_gt_even
= 1;
4375 } else { // > 1/2 ulp
4377 is_inexact_gt_midpoint
= 1;
4379 if (lt_half_ulp
|| eq_half_ulp
) {
4380 // res = +0.0 * 10^expmin16
4381 res1
= 0x0000000000000000ull
;
4382 } else { // if (gt_half_ulp)
4383 // res = +1 * 10^expmin16
4384 res1
= 0x0000000000000001ull
;
4387 } else { // if (x0 > q)
4388 // the second rounding is for 0.0...d(0)d(1)...d(q-1) * 10^emin
4389 res1
= 0x0000000000000000ull
;
4391 is_inexact_lt_midpoint
= 1;
4393 // avoid a double rounding error
4394 if ((is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) &&
4395 is_midpoint_lt_even
) { // double rounding error upward
4397 res1
--; // res1 becomes odd
4398 is_midpoint_lt_even
= 0;
4399 is_inexact_lt_midpoint
= 1;
4400 } else if ((is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) &&
4401 is_midpoint_gt_even
) { // double rounding error downward
4403 res1
++; // res1 becomes odd
4404 is_midpoint_gt_even
= 0;
4405 is_inexact_gt_midpoint
= 1;
4406 } else if (!is_midpoint_lt_even
&& !is_midpoint_gt_even
&&
4407 !is_inexact_lt_midpoint
&& !is_inexact_gt_midpoint
) {
4408 // if this rounding was exact the result may still be
4409 // inexact because of the previous roundings
4410 if (is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
) {
4411 is_inexact_gt_midpoint
= 1;
4413 if (is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
) {
4414 is_inexact_lt_midpoint
= 1;
4416 } else if (is_midpoint_gt_even
&&
4417 (is_inexact_gt_midpoint0
|| is_midpoint_lt_even0
)) {
4418 // pulled up to a midpoint
4419 is_inexact_lt_midpoint
= 1;
4420 is_inexact_gt_midpoint
= 0;
4421 is_midpoint_lt_even
= 0;
4422 is_midpoint_gt_even
= 0;
4423 } else if (is_midpoint_lt_even
&&
4424 (is_inexact_lt_midpoint0
|| is_midpoint_gt_even0
)) {
4425 // pulled down to a midpoint
4426 is_inexact_lt_midpoint
= 0;
4427 is_inexact_gt_midpoint
= 1;
4428 is_midpoint_lt_even
= 0;
4429 is_midpoint_gt_even
= 0;
4434 // else if unbexp >= emin then q < P (because q + unbexp < P16 + expmin16)
4435 // and the result is tiny and exact
4437 // check for inexact result
4438 if (is_inexact_lt_midpoint
|| is_inexact_gt_midpoint
||
4439 is_midpoint_lt_even
|| is_midpoint_gt_even
||
4440 is_inexact_lt_midpoint0
|| is_inexact_gt_midpoint0
||
4441 is_midpoint_lt_even0
|| is_midpoint_gt_even0
) {
4442 // set the inexact flag and the underflow flag
4443 *pfpsf
|= (INEXACT_EXCEPTION
| UNDERFLOW_EXCEPTION
);
4445 } else if (is_inexact_lt_midpoint
|| is_inexact_gt_midpoint
||
4446 is_midpoint_lt_even
|| is_midpoint_gt_even
) {
4447 *pfpsf
|= INEXACT_EXCEPTION
;
4449 // this is the result rounded correctly to nearest, with bounded exponent
4451 if (rnd_mode
== ROUNDING_TIES_AWAY
&& is_midpoint_gt_even
) { // correction
4453 res1
++; // res1 is now odd
4454 } // else the result is already correct
4456 // assemble the result
4457 if (res1
< 0x0020000000000000ull
) { // res < 2^53
4458 res1
= sign
| ((UINT64
) (unbexp
+ 398) << 53) | res1
;
4459 } else { // res1 >= 2^53
4460 res1
= sign
| MASK_STEERING_BITS
|
4461 ((UINT64
) (unbexp
+ 398) << 51) | (res1
& MASK_BINARY_SIG2
);
4463 *pfpsf
|= save_fpsf
;