Merge -r 127928:132243 from trunk
[official-gcc.git] / libgcc / config / libbid / bid128_fma.c
blob1d0ffb128ed42e4793c5eed4cc2ef545e9b66220
1 /* Copyright (C) 2007 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 2, or (at your option) any later
8 version.
10 In addition to the permissions in the GNU General Public License, the
11 Free Software Foundation gives you unlimited permission to link the
12 compiled version of this file into combinations with other programs,
13 and to distribute those combinations without any restriction coming
14 from the use of this file. (The General Public License restrictions
15 do apply in other respects; for example, they cover modification of
16 the file, and distribution when not linked into a combine
17 executable.)
19 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
20 WARRANTY; without even the implied warranty of MERCHANTABILITY or
21 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
22 for more details.
24 You should have received a copy of the GNU General Public License
25 along with GCC; see the file COPYING. If not, write to the Free
26 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
27 02110-1301, USA. */
29 /*****************************************************************************
31 * BID128 fma x * y + z
33 ****************************************************************************/
35 #include "bid_internal.h"
37 static void
38 rounding_correction (unsigned int rnd_mode,
39 unsigned int is_inexact_lt_midpoint,
40 unsigned int is_inexact_gt_midpoint,
41 unsigned int is_midpoint_lt_even,
42 unsigned int is_midpoint_gt_even,
43 int unbexp,
44 UINT128 * ptrres, _IDEC_flags * ptrfpsf) {
45 // unbiased true exponent unbexp may be larger than emax
47 UINT128 res = *ptrres; // expected to have the correct sign and coefficient
48 // (the exponent field is ignored, as unbexp is used instead)
49 UINT64 sign, exp;
50 UINT64 C_hi, C_lo;
52 // general correction from RN to RA, RM, RP, RZ
53 // Note: if the result is negative, then is_inexact_lt_midpoint,
54 // is_inexact_gt_midpoint, is_midpoint_lt_even, and is_midpoint_gt_even
55 // have to be considered as if determined for the absolute value of the
56 // result (so they seem to be reversed)
58 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
59 is_midpoint_lt_even || is_midpoint_gt_even) {
60 *ptrfpsf |= INEXACT_EXCEPTION;
62 // apply correction to result calculated with unbounded exponent
63 sign = res.w[1] & MASK_SIGN;
64 exp = (UINT64) (unbexp + 6176) << 49; // valid only if expmin<=unbexp<=expmax
65 C_hi = res.w[1] & MASK_COEFF;
66 C_lo = res.w[0];
67 if ((!sign && ((rnd_mode == ROUNDING_UP && is_inexact_lt_midpoint) ||
68 ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_UP) &&
69 is_midpoint_gt_even))) ||
70 (sign && ((rnd_mode == ROUNDING_DOWN && is_inexact_lt_midpoint) ||
71 ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_DOWN) &&
72 is_midpoint_gt_even)))) {
73 // C = C + 1
74 C_lo = C_lo + 1;
75 if (C_lo == 0)
76 C_hi = C_hi + 1;
77 if (C_hi == 0x0001ed09bead87c0ull && C_lo == 0x378d8e6400000000ull) {
78 // C = 10^34 => rounding overflow
79 C_hi = 0x0000314dc6448d93ull;
80 C_lo = 0x38c15b0a00000000ull; // 10^33
81 // exp = exp + EXP_P1;
82 unbexp = unbexp + 1;
83 exp = (UINT64) (unbexp + 6176) << 49;
85 } else if ((is_midpoint_lt_even || is_inexact_gt_midpoint) &&
86 ((sign && (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TO_ZERO)) ||
87 (!sign && (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TO_ZERO)))) {
88 // C = C - 1
89 C_lo = C_lo - 1;
90 if (C_lo == 0xffffffffffffffffull)
91 C_hi--;
92 // check if we crossed into the lower decade
93 if (C_hi == 0x0000314dc6448d93ull && C_lo == 0x38c15b09ffffffffull) {
94 // C = 10^33 - 1
95 if (exp > 0) {
96 C_hi = 0x0001ed09bead87c0ull; // 10^34 - 1
97 C_lo = 0x378d8e63ffffffffull;
98 // exp = exp - EXP_P1;
99 unbexp = unbexp - 1;
100 exp = (UINT64) (unbexp + 6176) << 49;
101 } else { // if exp = 0
102 if (is_midpoint_lt_even || is_midpoint_lt_even ||
103 is_inexact_gt_midpoint || is_inexact_gt_midpoint) // tiny & inexact
104 *ptrfpsf |= UNDERFLOW_EXCEPTION;
107 } else {
108 ; // the result is already correct
110 if (unbexp > expmax) { // 6111
111 *ptrfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
112 exp = 0;
113 if (!sign) { // result is positive
114 if (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TIES_AWAY) { // +inf
115 C_hi = 0x7800000000000000ull;
116 C_lo = 0x0000000000000000ull;
117 } else { // res = +MAXFP = (10^34-1) * 10^emax
118 C_hi = 0x5fffed09bead87c0ull;
119 C_lo = 0x378d8e63ffffffffull;
121 } else { // result is negative
122 if (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TIES_AWAY) { // -inf
123 C_hi = 0xf800000000000000ull;
124 C_lo = 0x0000000000000000ull;
125 } else { // res = -MAXFP = -(10^34-1) * 10^emax
126 C_hi = 0xdfffed09bead87c0ull;
127 C_lo = 0x378d8e63ffffffffull;
131 // assemble the result
132 res.w[1] = sign | exp | C_hi;
133 res.w[0] = C_lo;
134 *ptrres = res;
137 static void
138 add256 (UINT256 x, UINT256 y, UINT256 * pz) {
139 // *z = x + yl assume the sum fits in 256 bits
140 UINT256 z;
141 z.w[0] = x.w[0] + y.w[0];
142 if (z.w[0] < x.w[0]) {
143 x.w[1]++;
144 if (x.w[1] == 0x0000000000000000ull) {
145 x.w[2]++;
146 if (x.w[2] == 0x0000000000000000ull) {
147 x.w[3]++;
151 z.w[1] = x.w[1] + y.w[1];
152 if (z.w[1] < x.w[1]) {
153 x.w[2]++;
154 if (x.w[2] == 0x0000000000000000ull) {
155 x.w[3]++;
158 z.w[2] = x.w[2] + y.w[2];
159 if (z.w[2] < x.w[2]) {
160 x.w[3]++;
162 z.w[3] = x.w[3] + y.w[3]; // it was assumed that no carry is possible
163 *pz = z;
166 static void
167 sub256 (UINT256 x, UINT256 y, UINT256 * pz) {
168 // *z = x - y; assume x >= y
169 UINT256 z;
170 z.w[0] = x.w[0] - y.w[0];
171 if (z.w[0] > x.w[0]) {
172 x.w[1]--;
173 if (x.w[1] == 0xffffffffffffffffull) {
174 x.w[2]--;
175 if (x.w[2] == 0xffffffffffffffffull) {
176 x.w[3]--;
180 z.w[1] = x.w[1] - y.w[1];
181 if (z.w[1] > x.w[1]) {
182 x.w[2]--;
183 if (x.w[2] == 0xffffffffffffffffull) {
184 x.w[3]--;
187 z.w[2] = x.w[2] - y.w[2];
188 if (z.w[2] > x.w[2]) {
189 x.w[3]--;
191 z.w[3] = x.w[3] - y.w[3]; // no borrow possible, because x >= y
192 *pz = z;
196 static int
197 nr_digits256 (UINT256 R256) {
198 int ind;
199 // determine the number of decimal digits in R256
200 if (R256.w[3] == 0x0 && R256.w[2] == 0x0 && R256.w[1] == 0x0) {
201 // between 1 and 19 digits
202 for (ind = 1; ind <= 19; ind++) {
203 if (R256.w[0] < ten2k64[ind]) {
204 break;
207 // ind digits
208 } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0 &&
209 (R256.w[1] < ten2k128[0].w[1] ||
210 (R256.w[1] == ten2k128[0].w[1]
211 && R256.w[0] < ten2k128[0].w[0]))) {
212 // 20 digits
213 ind = 20;
214 } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0) {
215 // between 21 and 38 digits
216 for (ind = 1; ind <= 18; ind++) {
217 if (R256.w[1] < ten2k128[ind].w[1] ||
218 (R256.w[1] == ten2k128[ind].w[1] &&
219 R256.w[0] < ten2k128[ind].w[0])) {
220 break;
223 // ind + 20 digits
224 ind = ind + 20;
225 } else if (R256.w[3] == 0x0 &&
226 (R256.w[2] < ten2k256[0].w[2] ||
227 (R256.w[2] == ten2k256[0].w[2] &&
228 R256.w[1] < ten2k256[0].w[1]) ||
229 (R256.w[2] == ten2k256[0].w[2] &&
230 R256.w[1] == ten2k256[0].w[1] &&
231 R256.w[0] < ten2k256[0].w[0]))) {
232 // 39 digits
233 ind = 39;
234 } else {
235 // between 40 and 68 digits
236 for (ind = 1; ind <= 29; ind++) {
237 if (R256.w[3] < ten2k256[ind].w[3] ||
238 (R256.w[3] == ten2k256[ind].w[3] &&
239 R256.w[2] < ten2k256[ind].w[2]) ||
240 (R256.w[3] == ten2k256[ind].w[3] &&
241 R256.w[2] == ten2k256[ind].w[2] &&
242 R256.w[1] < ten2k256[ind].w[1]) ||
243 (R256.w[3] == ten2k256[ind].w[3] &&
244 R256.w[2] == ten2k256[ind].w[2] &&
245 R256.w[1] == ten2k256[ind].w[1] &&
246 R256.w[0] < ten2k256[ind].w[0])) {
247 break;
250 // ind + 39 digits
251 ind = ind + 39;
253 return (ind);
256 // add/subtract C4 and C3 * 10^scale; this may follow a previous rounding, so
257 // use the rounding information from ptr_is_* to avoid a double rounding error
258 static void
259 add_and_round (int q3,
260 int q4,
261 int e4,
262 int delta,
263 int p34,
264 UINT64 z_sign,
265 UINT64 p_sign,
266 UINT128 C3,
267 UINT256 C4,
268 int rnd_mode,
269 int *ptr_is_midpoint_lt_even,
270 int *ptr_is_midpoint_gt_even,
271 int *ptr_is_inexact_lt_midpoint,
272 int *ptr_is_inexact_gt_midpoint,
273 _IDEC_flags * ptrfpsf, UINT128 * ptrres) {
275 int scale;
276 int x0;
277 int ind;
278 UINT64 R64;
279 UINT128 P128, R128;
280 UINT192 P192, R192;
281 UINT256 R256;
282 int is_midpoint_lt_even = 0;
283 int is_midpoint_gt_even = 0;
284 int is_inexact_lt_midpoint = 0;
285 int is_inexact_gt_midpoint = 0;
286 int is_midpoint_lt_even0 = 0;
287 int is_midpoint_gt_even0 = 0;
288 int is_inexact_lt_midpoint0 = 0;
289 int is_inexact_gt_midpoint0 = 0;
290 int incr_exp = 0;
291 int is_tiny = 0;
292 int lt_half_ulp = 0;
293 int eq_half_ulp = 0;
294 // int gt_half_ulp = 0;
295 UINT128 res = *ptrres;
297 // scale C3 up by 10^(q4-delta-q3), 0 <= q4-delta-q3 <= 2*P34-2 = 66
298 scale = q4 - delta - q3; // 0 <= scale <= 66 (or 0 <= scale <= 68 if this
299 // comes from Cases (2), (3), (4), (5), (6), with 0 <= |delta| <= 1
301 // calculate C3 * 10^scale in R256 (it has at most 67 decimal digits for
302 // Cases (15),(16),(17) and at most 69 for Cases (2),(3),(4),(5),(6))
303 if (scale == 0) {
304 R256.w[3] = 0x0ull;
305 R256.w[2] = 0x0ull;
306 R256.w[1] = C3.w[1];
307 R256.w[0] = C3.w[0];
308 } else if (scale <= 19) { // 10^scale fits in 64 bits
309 P128.w[1] = 0;
310 P128.w[0] = ten2k64[scale];
311 __mul_128x128_to_256 (R256, P128, C3);
312 } else if (scale <= 38) { // 10^scale fits in 128 bits
313 __mul_128x128_to_256 (R256, ten2k128[scale - 20], C3);
314 } else if (scale <= 57) { // 39 <= scale <= 57
315 // 10^scale fits in 192 bits but C3 * 10^scale fits in 223 or 230 bits
316 // (10^67 has 223 bits; 10^69 has 230 bits);
317 // must split the computation:
318 // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127
319 // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty
320 // Note that 1 <= scale - 38 <= 19 => 10^(scale-38) fits in 64 bits
321 __mul_64x128_to_128 (R128, ten2k64[scale - 38], C3);
322 // now multiply R128 by 10^38
323 __mul_128x128_to_256 (R256, R128, ten2k128[18]);
324 } else { // 58 <= scale <= 66
325 // 10^scale takes between 193 and 220 bits,
326 // and C3 * 10^scale fits in 223 bits (10^67/10^69 has 223/230 bits)
327 // must split the computation:
328 // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127
329 // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty
330 // Note that 20 <= scale - 38 <= 30 => 10^(scale-38) fits in 128 bits
331 // Calculate first 10^(scale-38) * C3, which fits in 128 bits; because
332 // 10^(scale-38) takes more than 64 bits, C3 will take less than 64
333 __mul_64x128_to_128 (R128, C3.w[0], ten2k128[scale - 58]);
334 // now calculate 10*38 * 10^(scale-38) * C3
335 __mul_128x128_to_256 (R256, R128, ten2k128[18]);
337 // C3 * 10^scale is now in R256
339 // for Cases (15), (16), (17) C4 > C3 * 10^scale because C4 has at least
340 // one extra digit; for Cases (2), (3), (4), (5), or (6) any order is
341 // possible
342 // add/subtract C4 and C3 * 10^scale; the exponent is e4
343 if (p_sign == z_sign) { // R256 = C4 + R256
344 // calculate R256 = C4 + C3 * 10^scale = C4 + R256 which is exact,
345 // but may require rounding
346 add256 (C4, R256, &R256);
347 } else { // if (p_sign != z_sign) { // R256 = C4 - R256
348 // calculate R256 = C4 - C3 * 10^scale = C4 - R256 or
349 // R256 = C3 * 10^scale - C4 = R256 - C4 which is exact,
350 // but may require rounding
352 // compare first R256 = C3 * 10^scale and C4
353 if (R256.w[3] > C4.w[3] || (R256.w[3] == C4.w[3] && R256.w[2] > C4.w[2]) ||
354 (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] > C4.w[1]) ||
355 (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] == C4.w[1] &&
356 R256.w[0] >= C4.w[0])) { // C3 * 10^scale >= C4
357 // calculate R256 = C3 * 10^scale - C4 = R256 - C4, which is exact,
358 // but may require rounding
359 sub256 (R256, C4, &R256);
360 // flip p_sign too, because the result has the sign of z
361 p_sign = z_sign;
362 } else { // if C4 > C3 * 10^scale
363 // calculate R256 = C4 - C3 * 10^scale = C4 - R256, which is exact,
364 // but may require rounding
365 sub256 (C4, R256, &R256);
367 // if the result is pure zero, the sign depends on the rounding mode
368 // (x*y and z had opposite signs)
369 if (R256.w[3] == 0x0ull && R256.w[2] == 0x0ull &&
370 R256.w[1] == 0x0ull && R256.w[0] == 0x0ull) {
371 if (rnd_mode != ROUNDING_DOWN)
372 p_sign = 0x0000000000000000ull;
373 else
374 p_sign = 0x8000000000000000ull;
375 // the exponent is max (e4, expmin)
376 if (e4 < -6176)
377 e4 = expmin;
378 // assemble result
379 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49);
380 res.w[0] = 0x0;
381 *ptrres = res;
382 return;
386 // determine the number of decimal digits in R256
387 ind = nr_digits256 (R256);
389 // the exact result is (-1)^p_sign * R256 * 10^e4 where q (R256) = ind;
390 // round to the destination precision, with unbounded exponent
392 if (ind <= p34) {
393 // result rounded to the destination precision with unbounded exponent
394 // is exact
395 if (ind + e4 < p34 + expmin) {
396 is_tiny = 1; // applies to all rounding modes
398 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | R256.w[1];
399 res.w[0] = R256.w[0];
400 // Note: res is correct only if expmin <= e4 <= expmax
401 } else { // if (ind > p34)
402 // if more than P digits, round to nearest to P digits
403 // round R256 to p34 digits
404 x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68
405 if (ind <= 38) {
406 P128.w[1] = R256.w[1];
407 P128.w[0] = R256.w[0];
408 round128_19_38 (ind, x0, P128, &R128, &incr_exp,
409 &is_midpoint_lt_even, &is_midpoint_gt_even,
410 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
411 } else if (ind <= 57) {
412 P192.w[2] = R256.w[2];
413 P192.w[1] = R256.w[1];
414 P192.w[0] = R256.w[0];
415 round192_39_57 (ind, x0, P192, &R192, &incr_exp,
416 &is_midpoint_lt_even, &is_midpoint_gt_even,
417 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
418 R128.w[1] = R192.w[1];
419 R128.w[0] = R192.w[0];
420 } else { // if (ind <= 68)
421 round256_58_76 (ind, x0, R256, &R256, &incr_exp,
422 &is_midpoint_lt_even, &is_midpoint_gt_even,
423 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
424 R128.w[1] = R256.w[1];
425 R128.w[0] = R256.w[0];
427 // the rounded result has p34 = 34 digits
428 e4 = e4 + x0 + incr_exp;
429 if (rnd_mode == ROUNDING_TO_NEAREST) {
430 if (e4 < expmin) {
431 is_tiny = 1; // for other rounding modes apply correction
433 } else {
434 // for RM, RP, RZ, RA apply correction in order to determine tininess
435 // but do not save the result; apply the correction to
436 // (-1)^p_sign * significand * 10^0
437 P128.w[1] = p_sign | 0x3040000000000000ull | R128.w[1];
438 P128.w[0] = R128.w[0];
439 rounding_correction (rnd_mode,
440 is_inexact_lt_midpoint,
441 is_inexact_gt_midpoint, is_midpoint_lt_even,
442 is_midpoint_gt_even, 0, &P128, ptrfpsf);
443 scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1
444 // the number of digits in the significand is p34 = 34
445 if (e4 + scale < expmin) {
446 is_tiny = 1;
449 ind = p34; // the number of decimal digits in the signifcand of res
450 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | R128.w[1]; // RN
451 res.w[0] = R128.w[0];
452 // Note: res is correct only if expmin <= e4 <= expmax
453 // set the inexact flag after rounding with bounded exponent, if any
455 // at this point we have the result rounded with unbounded exponent in
456 // res and we know its tininess:
457 // res = (-1)^p_sign * significand * 10^e4,
458 // where q (significand) = ind <= p34
459 // Note: res is correct only if expmin <= e4 <= expmax
461 // check for overflow if RN
462 if (rnd_mode == ROUNDING_TO_NEAREST && (ind + e4) > (p34 + expmax)) {
463 res.w[1] = p_sign | 0x7800000000000000ull;
464 res.w[0] = 0x0000000000000000ull;
465 *ptrres = res;
466 *ptrfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
467 return; // BID_RETURN (res)
468 } // else not overflow or not RN, so continue
470 // if (e4 >= expmin) we have the result rounded with bounded exponent
471 if (e4 < expmin) {
472 x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res
473 // where the result rounded [at most] once is
474 // (-1)^p_sign * significand_res * 10^e4
476 // avoid double rounding error
477 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
478 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
479 is_midpoint_lt_even0 = is_midpoint_lt_even;
480 is_midpoint_gt_even0 = is_midpoint_gt_even;
481 is_inexact_lt_midpoint = 0;
482 is_inexact_gt_midpoint = 0;
483 is_midpoint_lt_even = 0;
484 is_midpoint_gt_even = 0;
486 if (x0 > ind) {
487 // nothing is left of res when moving the decimal point left x0 digits
488 is_inexact_lt_midpoint = 1;
489 res.w[1] = p_sign | 0x0000000000000000ull;
490 res.w[0] = 0x0000000000000000ull;
491 e4 = expmin;
492 } else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34
493 // this is <, =, or > 1/2 ulp
494 // compare the ind-digit value in the significand of res with
495 // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is
496 // less than, equal to, or greater than 1/2 ulp (significand of res)
497 R128.w[1] = res.w[1] & MASK_COEFF;
498 R128.w[0] = res.w[0];
499 if (ind <= 19) {
500 if (R128.w[0] < midpoint64[ind - 1]) { // < 1/2 ulp
501 lt_half_ulp = 1;
502 is_inexact_lt_midpoint = 1;
503 } else if (R128.w[0] == midpoint64[ind - 1]) { // = 1/2 ulp
504 eq_half_ulp = 1;
505 is_midpoint_gt_even = 1;
506 } else { // > 1/2 ulp
507 // gt_half_ulp = 1;
508 is_inexact_gt_midpoint = 1;
510 } else { // if (ind <= 38) {
511 if (R128.w[1] < midpoint128[ind - 20].w[1] ||
512 (R128.w[1] == midpoint128[ind - 20].w[1] &&
513 R128.w[0] < midpoint128[ind - 20].w[0])) { // < 1/2 ulp
514 lt_half_ulp = 1;
515 is_inexact_lt_midpoint = 1;
516 } else if (R128.w[1] == midpoint128[ind - 20].w[1] &&
517 R128.w[0] == midpoint128[ind - 20].w[0]) { // = 1/2 ulp
518 eq_half_ulp = 1;
519 is_midpoint_gt_even = 1;
520 } else { // > 1/2 ulp
521 // gt_half_ulp = 1;
522 is_inexact_gt_midpoint = 1;
525 if (lt_half_ulp || eq_half_ulp) {
526 // res = +0.0 * 10^expmin
527 res.w[1] = 0x0000000000000000ull;
528 res.w[0] = 0x0000000000000000ull;
529 } else { // if (gt_half_ulp)
530 // res = +1 * 10^expmin
531 res.w[1] = 0x0000000000000000ull;
532 res.w[0] = 0x0000000000000001ull;
534 res.w[1] = p_sign | res.w[1];
535 e4 = expmin;
536 } else { // if (1 <= x0 <= ind - 1 <= 33)
537 // round the ind-digit result to ind - x0 digits
539 if (ind <= 18) { // 2 <= ind <= 18
540 round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
541 &is_midpoint_lt_even, &is_midpoint_gt_even,
542 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
543 res.w[1] = 0x0;
544 res.w[0] = R64;
545 } else if (ind <= 38) {
546 P128.w[1] = res.w[1] & MASK_COEFF;
547 P128.w[0] = res.w[0];
548 round128_19_38 (ind, x0, P128, &res, &incr_exp,
549 &is_midpoint_lt_even, &is_midpoint_gt_even,
550 &is_inexact_lt_midpoint,
551 &is_inexact_gt_midpoint);
553 e4 = e4 + x0; // expmin
554 // we want the exponent to be expmin, so if incr_exp = 1 then
555 // multiply the rounded result by 10 - it will still fit in 113 bits
556 if (incr_exp) {
557 // 64 x 128 -> 128
558 P128.w[1] = res.w[1] & MASK_COEFF;
559 P128.w[0] = res.w[0];
560 __mul_64x128_to_128 (res, ten2k64[1], P128);
562 res.w[1] =
563 p_sign | ((UINT64) (e4 + 6176) << 49) | (res.w[1] & MASK_COEFF);
564 // avoid a double rounding error
565 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
566 is_midpoint_lt_even) { // double rounding error upward
567 // res = res - 1
568 res.w[0]--;
569 if (res.w[0] == 0xffffffffffffffffull)
570 res.w[1]--;
571 // Note: a double rounding error upward is not possible; for this
572 // the result after the first rounding would have to be 99...95
573 // (35 digits in all), possibly followed by a number of zeros; this
574 // is not possible in Cases (2)-(6) or (15)-(17) which may get here
575 is_midpoint_lt_even = 0;
576 is_inexact_lt_midpoint = 1;
577 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
578 is_midpoint_gt_even) { // double rounding error downward
579 // res = res + 1
580 res.w[0]++;
581 if (res.w[0] == 0)
582 res.w[1]++;
583 is_midpoint_gt_even = 0;
584 is_inexact_gt_midpoint = 1;
585 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
586 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
587 // if this second rounding was exact the result may still be
588 // inexact because of the first rounding
589 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
590 is_inexact_gt_midpoint = 1;
592 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
593 is_inexact_lt_midpoint = 1;
595 } else if (is_midpoint_gt_even &&
596 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
597 // pulled up to a midpoint
598 is_inexact_lt_midpoint = 1;
599 is_inexact_gt_midpoint = 0;
600 is_midpoint_lt_even = 0;
601 is_midpoint_gt_even = 0;
602 } else if (is_midpoint_lt_even &&
603 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
604 // pulled down to a midpoint
605 is_inexact_lt_midpoint = 0;
606 is_inexact_gt_midpoint = 1;
607 is_midpoint_lt_even = 0;
608 is_midpoint_gt_even = 0;
609 } else {
614 // res contains the correct result
615 // apply correction if not rounding to nearest
616 if (rnd_mode != ROUNDING_TO_NEAREST) {
617 rounding_correction (rnd_mode,
618 is_inexact_lt_midpoint, is_inexact_gt_midpoint,
619 is_midpoint_lt_even, is_midpoint_gt_even,
620 e4, &res, ptrfpsf);
622 if (is_midpoint_lt_even || is_midpoint_gt_even ||
623 is_inexact_lt_midpoint || is_inexact_gt_midpoint) {
624 // set the inexact flag
625 *ptrfpsf |= INEXACT_EXCEPTION;
626 if (is_tiny)
627 *ptrfpsf |= UNDERFLOW_EXCEPTION;
630 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
631 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
632 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
633 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
634 *ptrres = res;
635 return;
639 #if DECIMAL_CALL_BY_REFERENCE
640 static void
641 bid128_ext_fma (int *ptr_is_midpoint_lt_even,
642 int *ptr_is_midpoint_gt_even,
643 int *ptr_is_inexact_lt_midpoint,
644 int *ptr_is_inexact_gt_midpoint, UINT128 * pres,
645 UINT128 * px, UINT128 * py,
646 UINT128 *
647 pz _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
648 _EXC_INFO_PARAM) {
649 UINT128 x = *px, y = *py, z = *pz;
650 #if !DECIMAL_GLOBAL_ROUNDING
651 unsigned int rnd_mode = *prnd_mode;
652 #endif
653 #else
654 static UINT128
655 bid128_ext_fma (int *ptr_is_midpoint_lt_even,
656 int *ptr_is_midpoint_gt_even,
657 int *ptr_is_inexact_lt_midpoint,
658 int *ptr_is_inexact_gt_midpoint, UINT128 x, UINT128 y,
659 UINT128 z _RND_MODE_PARAM _EXC_FLAGS_PARAM
660 _EXC_MASKS_PARAM _EXC_INFO_PARAM) {
661 #endif
663 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
664 UINT64 x_sign, y_sign, z_sign, p_sign, tmp_sign;
665 UINT64 x_exp = 0, y_exp = 0, z_exp = 0, p_exp;
666 int true_p_exp;
667 UINT128 C1, C2, C3;
668 UINT256 C4;
669 int q1 = 0, q2 = 0, q3 = 0, q4;
670 int e1, e2, e3, e4;
671 int scale, ind, delta, x0;
672 int p34 = P34; // used to modify the limit on the number of digits
673 BID_UI64DOUBLE tmp;
674 int x_nr_bits, y_nr_bits, z_nr_bits;
675 unsigned int save_fpsf;
676 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0;
677 int is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
678 int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0;
679 int is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0;
680 int incr_exp = 0;
681 int lsb;
682 int lt_half_ulp = 0;
683 int eq_half_ulp = 0;
684 int gt_half_ulp = 0;
685 int is_tiny = 0;
686 UINT64 R64, tmp64;
687 UINT128 P128, R128;
688 UINT192 P192, R192;
689 UINT256 R256;
691 // the following are based on the table of special cases for fma; the NaN
692 // behavior is similar to that of the IA-64 Architecture fma
694 // identify cases where at least one operand is NaN
696 BID_SWAP128 (x);
697 BID_SWAP128 (y);
698 BID_SWAP128 (z);
699 if ((y.w[1] & MASK_NAN) == MASK_NAN) { // y is NAN
700 // if x = {0, f, inf, NaN}, y = NaN, z = {0, f, inf, NaN} then res = Q (y)
701 // check first for non-canonical NaN payload
702 if (((y.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
703 (((y.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
704 (y.w[0] > 0x38c15b09ffffffffull))) {
705 y.w[1] = y.w[1] & 0xffffc00000000000ull;
706 y.w[0] = 0x0ull;
708 if ((y.w[1] & MASK_SNAN) == MASK_SNAN) { // y is SNAN
709 // set invalid flag
710 *pfpsf |= INVALID_EXCEPTION;
711 // return quiet (y)
712 res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
713 res.w[0] = y.w[0];
714 } else { // y is QNaN
715 // return y
716 res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
717 res.w[0] = y.w[0];
718 // if z = SNaN or x = SNaN signal invalid exception
719 if ((z.w[1] & MASK_SNAN) == MASK_SNAN ||
720 (x.w[1] & MASK_SNAN) == MASK_SNAN) {
721 // set invalid flag
722 *pfpsf |= INVALID_EXCEPTION;
725 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
726 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
727 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
728 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
729 BID_SWAP128 (res);
730 BID_RETURN (res)
731 } else if ((z.w[1] & MASK_NAN) == MASK_NAN) { // z is NAN
732 // if x = {0, f, inf, NaN}, y = {0, f, inf}, z = NaN then res = Q (z)
733 // check first for non-canonical NaN payload
734 if (((z.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
735 (((z.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
736 (z.w[0] > 0x38c15b09ffffffffull))) {
737 z.w[1] = z.w[1] & 0xffffc00000000000ull;
738 z.w[0] = 0x0ull;
740 if ((z.w[1] & MASK_SNAN) == MASK_SNAN) { // z is SNAN
741 // set invalid flag
742 *pfpsf |= INVALID_EXCEPTION;
743 // return quiet (z)
744 res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
745 res.w[0] = z.w[0];
746 } else { // z is QNaN
747 // return z
748 res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
749 res.w[0] = z.w[0];
750 // if x = SNaN signal invalid exception
751 if ((x.w[1] & MASK_SNAN) == MASK_SNAN) {
752 // set invalid flag
753 *pfpsf |= INVALID_EXCEPTION;
756 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
757 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
758 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
759 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
760 BID_SWAP128 (res);
761 BID_RETURN (res)
762 } else if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
763 // if x = NaN, y = {0, f, inf}, z = {0, f, inf} then res = Q (x)
764 // check first for non-canonical NaN payload
765 if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
766 (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
767 (x.w[0] > 0x38c15b09ffffffffull))) {
768 x.w[1] = x.w[1] & 0xffffc00000000000ull;
769 x.w[0] = 0x0ull;
771 if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN
772 // set invalid flag
773 *pfpsf |= INVALID_EXCEPTION;
774 // return quiet (x)
775 res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
776 res.w[0] = x.w[0];
777 } else { // x is QNaN
778 // return x
779 res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
780 res.w[0] = x.w[0];
782 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
783 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
784 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
785 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
786 BID_SWAP128 (res);
787 BID_RETURN (res)
789 // x, y, z are 0, f, or inf but not NaN => unpack the arguments and check
790 // for non-canonical values
792 x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
793 C1.w[1] = x.w[1] & MASK_COEFF;
794 C1.w[0] = x.w[0];
795 if ((x.w[1] & MASK_ANY_INF) != MASK_INF) { // x != inf
796 // if x is not infinity check for non-canonical values - treated as zero
797 if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
798 // non-canonical
799 x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
800 C1.w[1] = 0; // significand high
801 C1.w[0] = 0; // significand low
802 } else { // G0_G1 != 11
803 x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits
804 if (C1.w[1] > 0x0001ed09bead87c0ull ||
805 (C1.w[1] == 0x0001ed09bead87c0ull &&
806 C1.w[0] > 0x378d8e63ffffffffull)) {
807 // x is non-canonical if coefficient is larger than 10^34 -1
808 C1.w[1] = 0;
809 C1.w[0] = 0;
810 } else { // canonical
815 y_sign = y.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
816 C2.w[1] = y.w[1] & MASK_COEFF;
817 C2.w[0] = y.w[0];
818 if ((y.w[1] & MASK_ANY_INF) != MASK_INF) { // y != inf
819 // if y is not infinity check for non-canonical values - treated as zero
820 if ((y.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
821 // non-canonical
822 y_exp = (y.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
823 C2.w[1] = 0; // significand high
824 C2.w[0] = 0; // significand low
825 } else { // G0_G1 != 11
826 y_exp = y.w[1] & MASK_EXP; // biased and shifted left 49 bits
827 if (C2.w[1] > 0x0001ed09bead87c0ull ||
828 (C2.w[1] == 0x0001ed09bead87c0ull &&
829 C2.w[0] > 0x378d8e63ffffffffull)) {
830 // y is non-canonical if coefficient is larger than 10^34 -1
831 C2.w[1] = 0;
832 C2.w[0] = 0;
833 } else { // canonical
838 z_sign = z.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
839 C3.w[1] = z.w[1] & MASK_COEFF;
840 C3.w[0] = z.w[0];
841 if ((z.w[1] & MASK_ANY_INF) != MASK_INF) { // z != inf
842 // if z is not infinity check for non-canonical values - treated as zero
843 if ((z.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
844 // non-canonical
845 z_exp = (z.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
846 C3.w[1] = 0; // significand high
847 C3.w[0] = 0; // significand low
848 } else { // G0_G1 != 11
849 z_exp = z.w[1] & MASK_EXP; // biased and shifted left 49 bits
850 if (C3.w[1] > 0x0001ed09bead87c0ull ||
851 (C3.w[1] == 0x0001ed09bead87c0ull &&
852 C3.w[0] > 0x378d8e63ffffffffull)) {
853 // z is non-canonical if coefficient is larger than 10^34 -1
854 C3.w[1] = 0;
855 C3.w[0] = 0;
856 } else { // canonical
862 p_sign = x_sign ^ y_sign; // sign of the product
864 // identify cases where at least one operand is infinity
866 if ((x.w[1] & MASK_ANY_INF) == MASK_INF) { // x = inf
867 if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf
868 if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
869 if (p_sign == z_sign) {
870 res.w[1] = z_sign | MASK_INF;
871 res.w[0] = 0x0;
872 } else {
873 // return QNaN Indefinite
874 res.w[1] = 0x7c00000000000000ull;
875 res.w[0] = 0x0000000000000000ull;
876 // set invalid flag
877 *pfpsf |= INVALID_EXCEPTION;
879 } else { // z = 0 or z = f
880 res.w[1] = p_sign | MASK_INF;
881 res.w[0] = 0x0;
883 } else if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f
884 if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
885 if (p_sign == z_sign) {
886 res.w[1] = z_sign | MASK_INF;
887 res.w[0] = 0x0;
888 } else {
889 // return QNaN Indefinite
890 res.w[1] = 0x7c00000000000000ull;
891 res.w[0] = 0x0000000000000000ull;
892 // set invalid flag
893 *pfpsf |= INVALID_EXCEPTION;
895 } else { // z = 0 or z = f
896 res.w[1] = p_sign | MASK_INF;
897 res.w[0] = 0x0;
899 } else { // y = 0
900 // return QNaN Indefinite
901 res.w[1] = 0x7c00000000000000ull;
902 res.w[0] = 0x0000000000000000ull;
903 // set invalid flag
904 *pfpsf |= INVALID_EXCEPTION;
906 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
907 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
908 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
909 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
910 BID_SWAP128 (res);
911 BID_RETURN (res)
912 } else if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf
913 if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
914 // x = f, necessarily
915 if ((p_sign != z_sign)
916 || (C1.w[1] == 0x0ull && C1.w[0] == 0x0ull)) {
917 // return QNaN Indefinite
918 res.w[1] = 0x7c00000000000000ull;
919 res.w[0] = 0x0000000000000000ull;
920 // set invalid flag
921 *pfpsf |= INVALID_EXCEPTION;
922 } else {
923 res.w[1] = z_sign | MASK_INF;
924 res.w[0] = 0x0;
926 } else if (C1.w[1] == 0x0 && C1.w[0] == 0x0) { // x = 0
927 // z = 0, f, inf
928 // return QNaN Indefinite
929 res.w[1] = 0x7c00000000000000ull;
930 res.w[0] = 0x0000000000000000ull;
931 // set invalid flag
932 *pfpsf |= INVALID_EXCEPTION;
933 } else {
934 // x = f and z = 0, f, necessarily
935 res.w[1] = p_sign | MASK_INF;
936 res.w[0] = 0x0;
938 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
939 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
940 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
941 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
942 BID_SWAP128 (res);
943 BID_RETURN (res)
944 } else if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
945 // x = 0, f and y = 0, f, necessarily
946 res.w[1] = z_sign | MASK_INF;
947 res.w[0] = 0x0;
948 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
949 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
950 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
951 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
952 BID_SWAP128 (res);
953 BID_RETURN (res)
956 true_p_exp = (x_exp >> 49) - 6176 + (y_exp >> 49) - 6176;
957 if (true_p_exp < -6176)
958 p_exp = 0; // cannot be less than EXP_MIN
959 else
960 p_exp = (UINT64) (true_p_exp + 6176) << 49;
962 if (((C1.w[1] == 0x0 && C1.w[0] == 0x0) || (C2.w[1] == 0x0 && C2.w[0] == 0x0)) && C3.w[1] == 0x0 && C3.w[0] == 0x0) { // (x = 0 or y = 0) and z = 0
963 // the result is 0
964 if (p_exp < z_exp)
965 res.w[1] = p_exp; // preferred exponent
966 else
967 res.w[1] = z_exp; // preferred exponent
968 if (p_sign == z_sign) {
969 res.w[1] |= z_sign;
970 res.w[0] = 0x0;
971 } else { // x * y and z have opposite signs
972 if (rnd_mode == ROUNDING_DOWN) {
973 // res = -0.0
974 res.w[1] |= MASK_SIGN;
975 res.w[0] = 0x0;
976 } else {
977 // res = +0.0
978 // res.w[1] |= 0x0;
979 res.w[0] = 0x0;
982 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
983 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
984 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
985 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
986 BID_SWAP128 (res);
987 BID_RETURN (res)
989 // from this point on, we may need to know the number of decimal digits
990 // in the significands of x, y, z when x, y, z != 0
992 if (C1.w[1] != 0 || C1.w[0] != 0) { // x = f (non-zero finite)
993 // q1 = nr. of decimal digits in x
994 // determine first the nr. of bits in x
995 if (C1.w[1] == 0) {
996 if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53
997 // split the 64-bit value in two 32-bit halves to avoid rounding errors
998 if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32
999 tmp.d = (double) (C1.w[0] >> 32); // exact conversion
1000 x_nr_bits =
1001 33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1002 } else { // x < 2^32
1003 tmp.d = (double) (C1.w[0]); // exact conversion
1004 x_nr_bits =
1005 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1007 } else { // if x < 2^53
1008 tmp.d = (double) C1.w[0]; // exact conversion
1009 x_nr_bits =
1010 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1012 } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
1013 tmp.d = (double) C1.w[1]; // exact conversion
1014 x_nr_bits =
1015 65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1017 q1 = nr_digits[x_nr_bits - 1].digits;
1018 if (q1 == 0) {
1019 q1 = nr_digits[x_nr_bits - 1].digits1;
1020 if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
1021 (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
1022 C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
1023 q1++;
1027 if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f (non-zero finite)
1028 if (C2.w[1] == 0) {
1029 if (C2.w[0] >= 0x0020000000000000ull) { // y >= 2^53
1030 // split the 64-bit value in two 32-bit halves to avoid rounding errors
1031 if (C2.w[0] >= 0x0000000100000000ull) { // y >= 2^32
1032 tmp.d = (double) (C2.w[0] >> 32); // exact conversion
1033 y_nr_bits =
1034 32 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1035 } else { // y < 2^32
1036 tmp.d = (double) C2.w[0]; // exact conversion
1037 y_nr_bits =
1038 ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1040 } else { // if y < 2^53
1041 tmp.d = (double) C2.w[0]; // exact conversion
1042 y_nr_bits =
1043 ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1045 } else { // C2.w[1] != 0 => nr. bits = 64 + nr_bits (C2.w[1])
1046 tmp.d = (double) C2.w[1]; // exact conversion
1047 y_nr_bits =
1048 64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1051 q2 = nr_digits[y_nr_bits].digits;
1052 if (q2 == 0) {
1053 q2 = nr_digits[y_nr_bits].digits1;
1054 if (C2.w[1] > nr_digits[y_nr_bits].threshold_hi ||
1055 (C2.w[1] == nr_digits[y_nr_bits].threshold_hi &&
1056 C2.w[0] >= nr_digits[y_nr_bits].threshold_lo))
1057 q2++;
1061 if (C3.w[1] != 0 || C3.w[0] != 0) { // z = f (non-zero finite)
1062 if (C3.w[1] == 0) {
1063 if (C3.w[0] >= 0x0020000000000000ull) { // z >= 2^53
1064 // split the 64-bit value in two 32-bit halves to avoid rounding errors
1065 if (C3.w[0] >= 0x0000000100000000ull) { // z >= 2^32
1066 tmp.d = (double) (C3.w[0] >> 32); // exact conversion
1067 z_nr_bits =
1068 32 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1069 } else { // z < 2^32
1070 tmp.d = (double) C3.w[0]; // exact conversion
1071 z_nr_bits =
1072 ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1074 } else { // if z < 2^53
1075 tmp.d = (double) C3.w[0]; // exact conversion
1076 z_nr_bits =
1077 ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1079 } else { // C3.w[1] != 0 => nr. bits = 64 + nr_bits (C3.w[1])
1080 tmp.d = (double) C3.w[1]; // exact conversion
1081 z_nr_bits =
1082 64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1085 q3 = nr_digits[z_nr_bits].digits;
1086 if (q3 == 0) {
1087 q3 = nr_digits[z_nr_bits].digits1;
1088 if (C3.w[1] > nr_digits[z_nr_bits].threshold_hi ||
1089 (C3.w[1] == nr_digits[z_nr_bits].threshold_hi &&
1090 C3.w[0] >= nr_digits[z_nr_bits].threshold_lo))
1091 q3++;
1095 if ((C1.w[1] == 0x0 && C1.w[0] == 0x0) ||
1096 (C2.w[1] == 0x0 && C2.w[0] == 0x0)) {
1097 // x = 0 or y = 0
1098 // z = f, necessarily; for 0 + z return z, with the preferred exponent
1099 // the result is z, but need to get the preferred exponent
1100 if (z_exp <= p_exp) { // the preferred exponent is z_exp
1101 res.w[1] = z_sign | (z_exp & MASK_EXP) | C3.w[1];
1102 res.w[0] = C3.w[0];
1103 } else { // if (p_exp < z_exp) the preferred exponent is p_exp
1104 // return (C3 * 10^scale) * 10^(z_exp - scale)
1105 // where scale = min (p34-q3, (z_exp-p_exp) >> 49)
1106 scale = p34 - q3;
1107 ind = (z_exp - p_exp) >> 49;
1108 if (ind < scale)
1109 scale = ind;
1110 if (scale == 0) {
1111 res.w[1] = z.w[1]; // & MASK_COEFF, which is redundant
1112 res.w[0] = z.w[0];
1113 } else if (q3 <= 19) { // z fits in 64 bits
1114 if (scale <= 19) { // 10^scale fits in 64 bits
1115 // 64 x 64 C3.w[0] * ten2k64[scale]
1116 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
1117 } else { // 10^scale fits in 128 bits
1118 // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1119 __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
1121 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
1122 // 64 x 128 ten2k64[scale] * C3
1123 __mul_128x64_to_128 (res, ten2k64[scale], C3);
1125 // subtract scale from the exponent
1126 z_exp = z_exp - ((UINT64) scale << 49);
1127 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
1129 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1130 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1131 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1132 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1133 BID_SWAP128 (res);
1134 BID_RETURN (res)
1135 } else {
1136 ; // continue with x = f, y = f, z = 0 or x = f, y = f, z = f
1139 e1 = (x_exp >> 49) - 6176; // unbiased exponent of x
1140 e2 = (y_exp >> 49) - 6176; // unbiased exponent of y
1141 e3 = (z_exp >> 49) - 6176; // unbiased exponent of z
1142 e4 = e1 + e2; // unbiased exponent of the exact x * y
1144 // calculate C1 * C2 and its number of decimal digits, q4
1146 // the exact product has either q1 + q2 - 1 or q1 + q2 decimal digits
1147 // where 2 <= q1 + q2 <= 68
1148 // calculate C4 = C1 * C2 and determine q
1149 C4.w[3] = C4.w[2] = C4.w[1] = C4.w[0] = 0;
1150 if (q1 + q2 <= 19) { // if 2 <= q1 + q2 <= 19, C4 = C1 * C2 fits in 64 bits
1151 C4.w[0] = C1.w[0] * C2.w[0];
1152 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
1153 if (C4.w[0] < ten2k64[q1 + q2 - 1])
1154 q4 = q1 + q2 - 1; // q4 in [1, 18]
1155 else
1156 q4 = q1 + q2; // q4 in [2, 19]
1157 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1158 } else if (q1 + q2 == 20) { // C4 = C1 * C2 fits in 64 or 128 bits
1159 // q1 <= 19 and q2 <= 19 so both C1 and C2 fit in 64 bits
1160 __mul_64x64_to_128MACH (C4, C1.w[0], C2.w[0]);
1161 // if C4 < 10^(q1+q2-1) = 10^19 then q4 = q1+q2-1 = 19 else q4 = q1+q2 = 20
1162 if (C4.w[1] == 0 && C4.w[0] < ten2k64[19]) { // 19 = q1+q2-1
1163 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1164 q4 = 19; // 19 = q1 + q2 - 1
1165 } else {
1166 // if (C4.w[1] == 0)
1167 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1168 // else
1169 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1170 q4 = 20; // 20 = q1 + q2
1172 } else if (q1 + q2 <= 38) { // 21 <= q1 + q2 <= 38
1173 // C4 = C1 * C2 fits in 64 or 128 bits
1174 // (64 bits possibly, but only when q1 + q2 = 21 and C4 has 20 digits)
1175 // at least one of C1, C2 has at most 19 decimal digits & fits in 64 bits
1176 if (q1 <= 19) {
1177 __mul_128x64_to_128 (C4, C1.w[0], C2);
1178 } else { // q2 <= 19
1179 __mul_128x64_to_128 (C4, C2.w[0], C1);
1181 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
1182 if (C4.w[1] < ten2k128[q1 + q2 - 21].w[1] ||
1183 (C4.w[1] == ten2k128[q1 + q2 - 21].w[1] &&
1184 C4.w[0] < ten2k128[q1 + q2 - 21].w[0])) {
1185 // if (C4.w[1] == 0) // q4 = 20, necessarily
1186 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1187 // else
1188 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1189 q4 = q1 + q2 - 1; // q4 in [20, 37]
1190 } else {
1191 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1192 q4 = q1 + q2; // q4 in [21, 38]
1194 } else if (q1 + q2 == 39) { // C4 = C1 * C2 fits in 128 or 192 bits
1195 // both C1 and C2 fit in 128 bits (actually in 113 bits)
1196 // may replace this by 128x128_to192
1197 __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] is 0
1198 // if C4 < 10^(q1+q2-1) = 10^38 then q4 = q1+q2-1 = 38 else q4 = q1+q2 = 39
1199 if (C4.w[2] == 0 && (C4.w[1] < ten2k128[18].w[1] ||
1200 (C4.w[1] == ten2k128[18].w[1]
1201 && C4.w[0] < ten2k128[18].w[0]))) {
1202 // 18 = 38 - 20 = q1+q2-1 - 20
1203 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1204 q4 = 38; // 38 = q1 + q2 - 1
1205 } else {
1206 // if (C4.w[2] == 0)
1207 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1208 // else
1209 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1210 q4 = 39; // 39 = q1 + q2
1212 } else if (q1 + q2 <= 57) { // 40 <= q1 + q2 <= 57
1213 // C4 = C1 * C2 fits in 128 or 192 bits
1214 // (128 bits possibly, but only when q1 + q2 = 40 and C4 has 39 digits)
1215 // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one
1216 // may fit in 64 bits
1217 if (C1.w[1] == 0) { // C1 fits in 64 bits
1218 // __mul_64x128_full (REShi64, RESlo128, A64, B128)
1219 __mul_64x128_full (C4.w[2], C4, C1.w[0], C2);
1220 } else if (C2.w[1] == 0) { // C2 fits in 64 bits
1221 // __mul_64x128_full (REShi64, RESlo128, A64, B128)
1222 __mul_64x128_full (C4.w[2], C4, C2.w[0], C1);
1223 } else { // both C1 and C2 require 128 bits
1224 // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1);
1225 __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0
1227 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
1228 if (C4.w[2] < ten2k256[q1 + q2 - 40].w[2] ||
1229 (C4.w[2] == ten2k256[q1 + q2 - 40].w[2] &&
1230 (C4.w[1] < ten2k256[q1 + q2 - 40].w[1] ||
1231 (C4.w[1] == ten2k256[q1 + q2 - 40].w[1] &&
1232 C4.w[0] < ten2k256[q1 + q2 - 40].w[0])))) {
1233 // if (C4.w[2] == 0) // q4 = 39, necessarily
1234 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1235 // else
1236 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1237 q4 = q1 + q2 - 1; // q4 in [39, 56]
1238 } else {
1239 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1240 q4 = q1 + q2; // q4 in [40, 57]
1242 } else if (q1 + q2 == 58) { // C4 = C1 * C2 fits in 192 or 256 bits
1243 // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one
1244 // may fit in 64 bits
1245 if (C1.w[1] == 0) { // C1 * C2 will fit in 192 bits
1246 __mul_64x128_full (C4.w[2], C4, C1.w[0], C2); // may use 64x128_to_192
1247 } else if (C2.w[1] == 0) { // C1 * C2 will fit in 192 bits
1248 __mul_64x128_full (C4.w[2], C4, C2.w[0], C1); // may use 64x128_to_192
1249 } else { // C1 * C2 will fit in 192 bits or in 256 bits
1250 __mul_128x128_to_256 (C4, C1, C2);
1252 // if C4 < 10^(q1+q2-1) = 10^57 then q4 = q1+q2-1 = 57 else q4 = q1+q2 = 58
1253 if (C4.w[3] == 0 && (C4.w[2] < ten2k256[18].w[2] ||
1254 (C4.w[2] == ten2k256[18].w[2]
1255 && (C4.w[1] < ten2k256[18].w[1]
1256 || (C4.w[1] == ten2k256[18].w[1]
1257 && C4.w[0] < ten2k256[18].w[0]))))) {
1258 // 18 = 57 - 39 = q1+q2-1 - 39
1259 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1260 q4 = 57; // 57 = q1 + q2 - 1
1261 } else {
1262 // if (C4.w[3] == 0)
1263 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1264 // else
1265 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
1266 q4 = 58; // 58 = q1 + q2
1268 } else { // if 59 <= q1 + q2 <= 68
1269 // C4 = C1 * C2 fits in 192 or 256 bits
1270 // (192 bits possibly, but only when q1 + q2 = 59 and C4 has 58 digits)
1271 // both C1 and C2 fit in 128 bits (actually in 113 bits); none fits in
1272 // 64 bits
1273 // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1);
1274 __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0
1275 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
1276 if (C4.w[3] < ten2k256[q1 + q2 - 40].w[3] ||
1277 (C4.w[3] == ten2k256[q1 + q2 - 40].w[3] &&
1278 (C4.w[2] < ten2k256[q1 + q2 - 40].w[2] ||
1279 (C4.w[2] == ten2k256[q1 + q2 - 40].w[2] &&
1280 (C4.w[1] < ten2k256[q1 + q2 - 40].w[1] ||
1281 (C4.w[1] == ten2k256[q1 + q2 - 40].w[1] &&
1282 C4.w[0] < ten2k256[q1 + q2 - 40].w[0])))))) {
1283 // if (C4.w[3] == 0) // q4 = 58, necessarily
1284 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1285 // else
1286 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
1287 q4 = q1 + q2 - 1; // q4 in [58, 67]
1288 } else {
1289 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
1290 q4 = q1 + q2; // q4 in [59, 68]
1294 if (C3.w[1] == 0x0 && C3.w[0] == 0x0) { // x = f, y = f, z = 0
1295 save_fpsf = *pfpsf; // sticky bits - caller value must be preserved
1296 *pfpsf = 0;
1298 if (q4 > p34) {
1300 // truncate C4 to p34 digits into res
1301 // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68
1302 x0 = q4 - p34;
1303 if (q4 <= 38) {
1304 P128.w[1] = C4.w[1];
1305 P128.w[0] = C4.w[0];
1306 round128_19_38 (q4, x0, P128, &res, &incr_exp,
1307 &is_midpoint_lt_even, &is_midpoint_gt_even,
1308 &is_inexact_lt_midpoint,
1309 &is_inexact_gt_midpoint);
1310 } else if (q4 <= 57) { // 35 <= q4 <= 57
1311 P192.w[2] = C4.w[2];
1312 P192.w[1] = C4.w[1];
1313 P192.w[0] = C4.w[0];
1314 round192_39_57 (q4, x0, P192, &R192, &incr_exp,
1315 &is_midpoint_lt_even, &is_midpoint_gt_even,
1316 &is_inexact_lt_midpoint,
1317 &is_inexact_gt_midpoint);
1318 res.w[0] = R192.w[0];
1319 res.w[1] = R192.w[1];
1320 } else { // if (q4 <= 68)
1321 round256_58_76 (q4, x0, C4, &R256, &incr_exp,
1322 &is_midpoint_lt_even, &is_midpoint_gt_even,
1323 &is_inexact_lt_midpoint,
1324 &is_inexact_gt_midpoint);
1325 res.w[0] = R256.w[0];
1326 res.w[1] = R256.w[1];
1328 e4 = e4 + x0;
1329 if (incr_exp) {
1330 e4 = e4 + 1;
1332 q4 = p34;
1333 // res is now the coefficient of the result rounded to the destination
1334 // precision, with unbounded exponent; the exponent is e4; q4=digits(res)
1335 } else { // if (q4 <= p34)
1336 // C4 * 10^e4 is the result rounded to the destination precision, with
1337 // unbounded exponent (which is exact)
1339 if ((q4 + e4 <= p34 + expmax) && (e4 > expmax)) {
1340 // e4 is too large, but can be brought within range by scaling up C4
1341 scale = e4 - expmax; // 1 <= scale < P-q4 <= P-1 => 1 <= scale <= P-2
1342 // res = (C4 * 10^scale) * 10^expmax
1343 if (q4 <= 19) { // C4 fits in 64 bits
1344 if (scale <= 19) { // 10^scale fits in 64 bits
1345 // 64 x 64 C4.w[0] * ten2k64[scale]
1346 __mul_64x64_to_128MACH (res, C4.w[0], ten2k64[scale]);
1347 } else { // 10^scale fits in 128 bits
1348 // 64 x 128 C4.w[0] * ten2k128[scale - 20]
1349 __mul_128x64_to_128 (res, C4.w[0], ten2k128[scale - 20]);
1351 } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits
1352 // 64 x 128 ten2k64[scale] * CC43
1353 __mul_128x64_to_128 (res, ten2k64[scale], C4);
1355 e4 = e4 - scale; // expmax
1356 q4 = q4 + scale;
1357 } else {
1358 res.w[1] = C4.w[1];
1359 res.w[0] = C4.w[0];
1361 // res is the coefficient of the result rounded to the destination
1362 // precision, with unbounded exponent (it has q4 digits); the exponent
1363 // is e4 (exact result)
1366 // check for overflow
1367 if (q4 + e4 > p34 + expmax) {
1368 if (rnd_mode == ROUNDING_TO_NEAREST) {
1369 res.w[1] = p_sign | 0x7800000000000000ull; // +/-inf
1370 res.w[0] = 0x0000000000000000ull;
1371 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
1372 } else {
1373 res.w[1] = p_sign | res.w[1];
1374 rounding_correction (rnd_mode,
1375 is_inexact_lt_midpoint,
1376 is_inexact_gt_midpoint,
1377 is_midpoint_lt_even, is_midpoint_gt_even,
1378 e4, &res, pfpsf);
1380 *pfpsf |= save_fpsf;
1381 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1382 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1383 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1384 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1385 BID_SWAP128 (res);
1386 BID_RETURN (res)
1388 // check for underflow
1389 if (q4 + e4 < expmin + P34) {
1390 is_tiny = 1; // the result is tiny
1391 if (e4 < expmin) {
1392 // if e4 < expmin, we must truncate more of res
1393 x0 = expmin - e4; // x0 >= 1
1394 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
1395 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
1396 is_midpoint_lt_even0 = is_midpoint_lt_even;
1397 is_midpoint_gt_even0 = is_midpoint_gt_even;
1398 is_inexact_lt_midpoint = 0;
1399 is_inexact_gt_midpoint = 0;
1400 is_midpoint_lt_even = 0;
1401 is_midpoint_gt_even = 0;
1402 // the number of decimal digits in res is q4
1403 if (x0 < q4) { // 1 <= x0 <= q4-1 => round res to q4 - x0 digits
1404 if (q4 <= 18) { // 2 <= q4 <= 18, 1 <= x0 <= 17
1405 round64_2_18 (q4, x0, res.w[0], &R64, &incr_exp,
1406 &is_midpoint_lt_even, &is_midpoint_gt_even,
1407 &is_inexact_lt_midpoint,
1408 &is_inexact_gt_midpoint);
1409 if (incr_exp) {
1410 // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17
1411 R64 = ten2k64[q4 - x0];
1413 // res.w[1] = 0; (from above)
1414 res.w[0] = R64;
1415 } else { // if (q4 <= 34)
1416 // 19 <= q4 <= 38
1417 P128.w[1] = res.w[1];
1418 P128.w[0] = res.w[0];
1419 round128_19_38 (q4, x0, P128, &res, &incr_exp,
1420 &is_midpoint_lt_even, &is_midpoint_gt_even,
1421 &is_inexact_lt_midpoint,
1422 &is_inexact_gt_midpoint);
1423 if (incr_exp) {
1424 // increase coefficient by a factor of 10; this will be <= 10^33
1425 // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37
1426 if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
1427 // res.w[1] = 0;
1428 res.w[0] = ten2k64[q4 - x0];
1429 } else { // 20 <= q4 - x0 <= 37
1430 res.w[0] = ten2k128[q4 - x0 - 20].w[0];
1431 res.w[1] = ten2k128[q4 - x0 - 20].w[1];
1435 e4 = e4 + x0; // expmin
1436 } else if (x0 == q4) {
1437 // the second rounding is for 0.d(0)d(1)...d(q4-1) * 10^emin
1438 // determine relationship with 1/2 ulp
1439 if (q4 <= 19) {
1440 if (res.w[0] < midpoint64[q4 - 1]) { // < 1/2 ulp
1441 lt_half_ulp = 1;
1442 is_inexact_lt_midpoint = 1;
1443 } else if (res.w[0] == midpoint64[q4 - 1]) { // = 1/2 ulp
1444 eq_half_ulp = 1;
1445 is_midpoint_gt_even = 1;
1446 } else { // > 1/2 ulp
1447 // gt_half_ulp = 1;
1448 is_inexact_gt_midpoint = 1;
1450 } else { // if (q4 <= 34)
1451 if (res.w[1] < midpoint128[q4 - 20].w[1] ||
1452 (res.w[1] == midpoint128[q4 - 20].w[1] &&
1453 res.w[0] < midpoint128[q4 - 20].w[0])) { // < 1/2 ulp
1454 lt_half_ulp = 1;
1455 is_inexact_lt_midpoint = 1;
1456 } else if (res.w[1] == midpoint128[q4 - 20].w[1] &&
1457 res.w[0] == midpoint128[q4 - 20].w[0]) { // = 1/2 ulp
1458 eq_half_ulp = 1;
1459 is_midpoint_gt_even = 1;
1460 } else { // > 1/2 ulp
1461 // gt_half_ulp = 1;
1462 is_inexact_gt_midpoint = 1;
1465 if (lt_half_ulp || eq_half_ulp) {
1466 // res = +0.0 * 10^expmin
1467 res.w[1] = 0x0000000000000000ull;
1468 res.w[0] = 0x0000000000000000ull;
1469 } else { // if (gt_half_ulp)
1470 // res = +1 * 10^expmin
1471 res.w[1] = 0x0000000000000000ull;
1472 res.w[0] = 0x0000000000000001ull;
1474 e4 = expmin;
1475 } else { // if (x0 > q4)
1476 // the second rounding is for 0.0...d(0)d(1)...d(q4-1) * 10^emin
1477 res.w[1] = 0;
1478 res.w[0] = 0;
1479 e4 = expmin;
1480 is_inexact_lt_midpoint = 1;
1482 // avoid a double rounding error
1483 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
1484 is_midpoint_lt_even) { // double rounding error upward
1485 // res = res - 1
1486 res.w[0]--;
1487 if (res.w[0] == 0xffffffffffffffffull)
1488 res.w[1]--;
1489 // Note: a double rounding error upward is not possible; for this
1490 // the result after the first rounding would have to be 99...95
1491 // (35 digits in all), possibly followed by a number of zeros; this
1492 // not possible for f * f + 0
1493 is_midpoint_lt_even = 0;
1494 is_inexact_lt_midpoint = 1;
1495 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
1496 is_midpoint_gt_even) { // double rounding error downward
1497 // res = res + 1
1498 res.w[0]++;
1499 if (res.w[0] == 0)
1500 res.w[1]++;
1501 is_midpoint_gt_even = 0;
1502 is_inexact_gt_midpoint = 1;
1503 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
1504 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
1505 // if this second rounding was exact the result may still be
1506 // inexact because of the first rounding
1507 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
1508 is_inexact_gt_midpoint = 1;
1510 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
1511 is_inexact_lt_midpoint = 1;
1513 } else if (is_midpoint_gt_even &&
1514 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
1515 // pulled up to a midpoint
1516 is_inexact_lt_midpoint = 1;
1517 is_inexact_gt_midpoint = 0;
1518 is_midpoint_lt_even = 0;
1519 is_midpoint_gt_even = 0;
1520 } else if (is_midpoint_lt_even &&
1521 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
1522 // pulled down to a midpoint
1523 is_inexact_lt_midpoint = 0;
1524 is_inexact_gt_midpoint = 1;
1525 is_midpoint_lt_even = 0;
1526 is_midpoint_gt_even = 0;
1527 } else {
1530 } else { // if e4 >= emin then q4 < P and the result is tiny and exact
1531 if (e3 < e4) {
1532 // if (e3 < e4) the preferred exponent is e3
1533 // return (C4 * 10^scale) * 10^(e4 - scale)
1534 // where scale = min (p34-q4, (e4 - e3))
1535 scale = p34 - q4;
1536 ind = e4 - e3;
1537 if (ind < scale)
1538 scale = ind;
1539 if (scale == 0) {
1540 ; // res and e4 are unchanged
1541 } else if (q4 <= 19) { // C4 fits in 64 bits
1542 if (scale <= 19) { // 10^scale fits in 64 bits
1543 // 64 x 64 res.w[0] * ten2k64[scale]
1544 __mul_64x64_to_128MACH (res, res.w[0], ten2k64[scale]);
1545 } else { // 10^scale fits in 128 bits
1546 // 64 x 128 res.w[0] * ten2k128[scale - 20]
1547 __mul_128x64_to_128 (res, res.w[0], ten2k128[scale - 20]);
1549 } else { // res fits in 128 bits, but 10^scale must fit in 64 bits
1550 // 64 x 128 ten2k64[scale] * C3
1551 __mul_128x64_to_128 (res, ten2k64[scale], res);
1553 // subtract scale from the exponent
1554 e4 = e4 - scale;
1558 // check for inexact result
1559 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
1560 is_midpoint_lt_even || is_midpoint_gt_even) {
1561 // set the inexact flag and the underflow flag
1562 *pfpsf |= INEXACT_EXCEPTION;
1563 *pfpsf |= UNDERFLOW_EXCEPTION;
1565 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1];
1566 if (rnd_mode != ROUNDING_TO_NEAREST) {
1567 rounding_correction (rnd_mode,
1568 is_inexact_lt_midpoint,
1569 is_inexact_gt_midpoint,
1570 is_midpoint_lt_even, is_midpoint_gt_even,
1571 e4, &res, pfpsf);
1573 *pfpsf |= save_fpsf;
1574 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1575 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1576 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1577 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1578 BID_SWAP128 (res);
1579 BID_RETURN (res)
1581 // no overflow, and no underflow for rounding to nearest
1582 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1];
1584 if (rnd_mode != ROUNDING_TO_NEAREST) {
1585 rounding_correction (rnd_mode,
1586 is_inexact_lt_midpoint,
1587 is_inexact_gt_midpoint,
1588 is_midpoint_lt_even, is_midpoint_gt_even,
1589 e4, &res, pfpsf);
1590 // if e4 = expmin && significand < 10^33 => result is tiny (for RD, RZ)
1591 if (e4 == expmin) {
1592 if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull ||
1593 ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull &&
1594 res.w[0] < 0x38c15b0a00000000ull)) {
1595 is_tiny = 1;
1600 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
1601 is_midpoint_lt_even || is_midpoint_gt_even) {
1602 // set the inexact flag
1603 *pfpsf |= INEXACT_EXCEPTION;
1604 if (is_tiny)
1605 *pfpsf |= UNDERFLOW_EXCEPTION;
1608 if ((*pfpsf & INEXACT_EXCEPTION) == 0) { // x * y is exact
1609 // need to ensure that the result has the preferred exponent
1610 p_exp = res.w[1] & MASK_EXP;
1611 if (z_exp < p_exp) { // the preferred exponent is z_exp
1612 // signficand of res in C3
1613 C3.w[1] = res.w[1] & MASK_COEFF;
1614 C3.w[0] = res.w[0];
1615 // the number of decimal digits of x * y is q4 <= 34
1616 // Note: the coefficient fits in 128 bits
1618 // return (C3 * 10^scale) * 10^(p_exp - scale)
1619 // where scale = min (p34-q4, (p_exp-z_exp) >> 49)
1620 scale = p34 - q4;
1621 ind = (p_exp - z_exp) >> 49;
1622 if (ind < scale)
1623 scale = ind;
1624 // subtract scale from the exponent
1625 p_exp = p_exp - ((UINT64) scale << 49);
1626 if (scale == 0) {
1627 ; // leave res unchanged
1628 } else if (q4 <= 19) { // x * y fits in 64 bits
1629 if (scale <= 19) { // 10^scale fits in 64 bits
1630 // 64 x 64 C3.w[0] * ten2k64[scale]
1631 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
1632 } else { // 10^scale fits in 128 bits
1633 // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1634 __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
1636 res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1];
1637 } else { // x * y fits in 128 bits, but 10^scale must fit in 64 bits
1638 // 64 x 128 ten2k64[scale] * C3
1639 __mul_128x64_to_128 (res, ten2k64[scale], C3);
1640 res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1];
1642 } // else leave the result as it is, because p_exp <= z_exp
1644 *pfpsf |= save_fpsf;
1645 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1646 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1647 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1648 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1649 BID_SWAP128 (res);
1650 BID_RETURN (res)
1651 } // else we have f * f + f
1653 // continue with x = f, y = f, z = f
1655 delta = q3 + e3 - q4 - e4;
1656 delta_ge_zero:
1657 if (delta >= 0) {
1659 if (p34 <= delta - 1 || // Case (1')
1660 (p34 == delta && e3 + 6176 < p34 - q3)) { // Case (1''A)
1661 // check for overflow, which can occur only in Case (1')
1662 if ((q3 + e3) > (p34 + expmax) && p34 <= delta - 1) {
1663 // e3 > expmax implies p34 <= delta-1 and e3 > expmax is a necessary
1664 // condition for (q3 + e3) > (p34 + expmax)
1665 if (rnd_mode == ROUNDING_TO_NEAREST) {
1666 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
1667 res.w[0] = 0x0000000000000000ull;
1668 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
1669 } else {
1670 if (p_sign == z_sign) {
1671 is_inexact_lt_midpoint = 1;
1672 } else {
1673 is_inexact_gt_midpoint = 1;
1675 // q3 <= p34; if (q3 < p34) scale C3 up by 10^(p34-q3)
1676 scale = p34 - q3;
1677 if (scale == 0) {
1678 res.w[1] = z_sign | C3.w[1];
1679 res.w[0] = C3.w[0];
1680 } else {
1681 if (q3 <= 19) { // C3 fits in 64 bits
1682 if (scale <= 19) { // 10^scale fits in 64 bits
1683 // 64 x 64 C3.w[0] * ten2k64[scale]
1684 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
1685 } else { // 10^scale fits in 128 bits
1686 // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1687 __mul_128x64_to_128 (res, C3.w[0],
1688 ten2k128[scale - 20]);
1690 } else { // C3 fits in 128 bits, but 10^scale must fit in 64 bits
1691 // 64 x 128 ten2k64[scale] * C3
1692 __mul_128x64_to_128 (res, ten2k64[scale], C3);
1694 // the coefficient in res has q3 + scale = p34 digits
1696 e3 = e3 - scale;
1697 res.w[1] = z_sign | res.w[1];
1698 rounding_correction (rnd_mode,
1699 is_inexact_lt_midpoint,
1700 is_inexact_gt_midpoint,
1701 is_midpoint_lt_even, is_midpoint_gt_even,
1702 e3, &res, pfpsf);
1704 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1705 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1706 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1707 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1708 BID_SWAP128 (res);
1709 BID_RETURN (res)
1711 // res = z
1712 if (q3 < p34) { // the preferred exponent is z_exp - (p34 - q3)
1713 // return (C3 * 10^scale) * 10^(z_exp - scale)
1714 // where scale = min (p34-q3, z_exp-EMIN)
1715 scale = p34 - q3;
1716 ind = e3 + 6176;
1717 if (ind < scale)
1718 scale = ind;
1719 if (scale == 0) {
1720 res.w[1] = C3.w[1];
1721 res.w[0] = C3.w[0];
1722 } else if (q3 <= 19) { // z fits in 64 bits
1723 if (scale <= 19) { // 10^scale fits in 64 bits
1724 // 64 x 64 C3.w[0] * ten2k64[scale]
1725 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
1726 } else { // 10^scale fits in 128 bits
1727 // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1728 __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
1730 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
1731 // 64 x 128 ten2k64[scale] * C3
1732 __mul_128x64_to_128 (res, ten2k64[scale], C3);
1734 // the coefficient in res has q3 + scale digits
1735 // subtract scale from the exponent
1736 z_exp = z_exp - ((UINT64) scale << 49);
1737 e3 = e3 - scale;
1738 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
1739 if (scale + q3 < p34)
1740 *pfpsf |= UNDERFLOW_EXCEPTION;
1741 } else {
1742 scale = 0;
1743 res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | C3.w[1];
1744 res.w[0] = C3.w[0];
1747 // use the following to avoid double rounding errors when operating on
1748 // mixed formats in rounding to nearest, and for correcting the result
1749 // if not rounding to nearest
1750 if ((p_sign != z_sign) && (delta == (q3 + scale + 1))) {
1751 // there is a gap of exactly one digit between the scaled C3 and C4
1752 // C3 * 10^ scale = 10^(q3+scale-1) <=> C3 = 10^(q3-1) is special case
1753 if ((q3 <= 19 && C3.w[0] != ten2k64[q3 - 1]) ||
1754 (q3 == 20 && (C3.w[1] != 0 || C3.w[0] != ten2k64[19])) ||
1755 (q3 >= 21 && (C3.w[1] != ten2k128[q3 - 21].w[1] ||
1756 C3.w[0] != ten2k128[q3 - 21].w[0]))) {
1757 // C3 * 10^ scale != 10^(q3-1)
1758 // if ((res.w[1] & MASK_COEFF) != 0x0000314dc6448d93ull ||
1759 // res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33
1760 is_inexact_gt_midpoint = 1; // if (z_sign), set as if for abs. value
1761 } else { // if C3 * 10^scale = 10^(q3+scale-1)
1762 // ok from above e3 = (z_exp >> 49) - 6176;
1763 // the result is always inexact
1764 if (q4 == 1) {
1765 R64 = C4.w[0];
1766 } else {
1767 // if q4 > 1 then truncate C4 from q4 digits to 1 digit;
1768 // x = q4-1, 1 <= x <= 67 and check if this operation is exact
1769 if (q4 <= 18) { // 2 <= q4 <= 18
1770 round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp,
1771 &is_midpoint_lt_even, &is_midpoint_gt_even,
1772 &is_inexact_lt_midpoint,
1773 &is_inexact_gt_midpoint);
1774 } else if (q4 <= 38) {
1775 P128.w[1] = C4.w[1];
1776 P128.w[0] = C4.w[0];
1777 round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp,
1778 &is_midpoint_lt_even,
1779 &is_midpoint_gt_even,
1780 &is_inexact_lt_midpoint,
1781 &is_inexact_gt_midpoint);
1782 R64 = R128.w[0]; // one decimal digit
1783 } else if (q4 <= 57) {
1784 P192.w[2] = C4.w[2];
1785 P192.w[1] = C4.w[1];
1786 P192.w[0] = C4.w[0];
1787 round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp,
1788 &is_midpoint_lt_even,
1789 &is_midpoint_gt_even,
1790 &is_inexact_lt_midpoint,
1791 &is_inexact_gt_midpoint);
1792 R64 = R192.w[0]; // one decimal digit
1793 } else { // if (q4 <= 68)
1794 round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp,
1795 &is_midpoint_lt_even,
1796 &is_midpoint_gt_even,
1797 &is_inexact_lt_midpoint,
1798 &is_inexact_gt_midpoint);
1799 R64 = R256.w[0]; // one decimal digit
1801 if (incr_exp) {
1802 R64 = 10;
1805 if (q4 == 1 && C4.w[0] == 5) {
1806 is_inexact_lt_midpoint = 0;
1807 is_inexact_gt_midpoint = 0;
1808 is_midpoint_lt_even = 1;
1809 is_midpoint_gt_even = 0;
1810 } else if ((e3 == expmin) ||
1811 R64 < 5 || (R64 == 5 && is_inexact_gt_midpoint)) {
1812 // result does not change
1813 is_inexact_lt_midpoint = 0;
1814 is_inexact_gt_midpoint = 1;
1815 is_midpoint_lt_even = 0;
1816 is_midpoint_gt_even = 0;
1817 } else {
1818 is_inexact_lt_midpoint = 1;
1819 is_inexact_gt_midpoint = 0;
1820 is_midpoint_lt_even = 0;
1821 is_midpoint_gt_even = 0;
1822 // result decremented is 10^(q3+scale) - 1
1823 if ((q3 + scale) <= 19) {
1824 res.w[1] = 0;
1825 res.w[0] = ten2k64[q3 + scale];
1826 } else { // if ((q3 + scale + 1) <= 35)
1827 res.w[1] = ten2k128[q3 + scale - 20].w[1];
1828 res.w[0] = ten2k128[q3 + scale - 20].w[0];
1830 res.w[0] = res.w[0] - 1; // borrow never occurs
1831 z_exp = z_exp - EXP_P1;
1832 e3 = e3 - 1;
1833 res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1];
1835 if (e3 == expmin) {
1836 if (R64 < 5 || (R64 == 5 && !is_inexact_lt_midpoint)) {
1837 ; // result not tiny (in round-to-nearest mode)
1838 } else {
1839 *pfpsf |= UNDERFLOW_EXCEPTION;
1842 } // end 10^(q3+scale-1)
1843 // set the inexact flag
1844 *pfpsf |= INEXACT_EXCEPTION;
1845 } else {
1846 if (p_sign == z_sign) {
1847 // if (z_sign), set as if for absolute value
1848 is_inexact_lt_midpoint = 1;
1849 } else { // if (p_sign != z_sign)
1850 // if (z_sign), set as if for absolute value
1851 is_inexact_gt_midpoint = 1;
1853 *pfpsf |= INEXACT_EXCEPTION;
1855 // the result is always inexact => set the inexact flag
1856 // Determine tininess:
1857 // if (exp > expmin)
1858 // the result is not tiny
1859 // else // if exp = emin
1860 // if (q3 + scale < p34)
1861 // the result is tiny
1862 // else // if (q3 + scale = p34)
1863 // if (C3 * 10^scale > 10^33)
1864 // the result is not tiny
1865 // else // if C3 * 10^scale = 10^33
1866 // if (xy * z > 0)
1867 // the result is not tiny
1868 // else // if (xy * z < 0)
1869 // if (z > 0)
1870 // if rnd_mode != RP
1871 // the result is tiny
1872 // else // if RP
1873 // the result is not tiny
1874 // else // if (z < 0)
1875 // if rnd_mode != RM
1876 // the result is tiny
1877 // else // if RM
1878 // the result is not tiny
1879 // endif
1880 // endif
1881 // endif
1882 // endif
1883 // endif
1884 // endif
1885 if ((e3 == expmin && (q3 + scale) < p34) ||
1886 (e3 == expmin && (q3 + scale) == p34 &&
1887 (res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull && // 10^33_high
1888 res.w[0] == 0x38c15b0a00000000ull && // 10^33_low
1889 z_sign != p_sign && ((!z_sign && rnd_mode != ROUNDING_UP) ||
1890 (z_sign && rnd_mode != ROUNDING_DOWN)))) {
1891 *pfpsf |= UNDERFLOW_EXCEPTION;
1893 if (rnd_mode != ROUNDING_TO_NEAREST) {
1894 rounding_correction (rnd_mode,
1895 is_inexact_lt_midpoint,
1896 is_inexact_gt_midpoint,
1897 is_midpoint_lt_even, is_midpoint_gt_even,
1898 e3, &res, pfpsf);
1900 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1901 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1902 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1903 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1904 BID_SWAP128 (res);
1905 BID_RETURN (res)
1907 } else if (p34 == delta) { // Case (1''B)
1909 // because Case (1''A) was treated above, e3 + 6176 >= p34 - q3
1910 // and C3 can be scaled up to p34 digits if needed
1912 // scale C3 to p34 digits if needed
1913 scale = p34 - q3; // 0 <= scale <= p34 - 1
1914 if (scale == 0) {
1915 res.w[1] = C3.w[1];
1916 res.w[0] = C3.w[0];
1917 } else if (q3 <= 19) { // z fits in 64 bits
1918 if (scale <= 19) { // 10^scale fits in 64 bits
1919 // 64 x 64 C3.w[0] * ten2k64[scale]
1920 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
1921 } else { // 10^scale fits in 128 bits
1922 // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1923 __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
1925 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
1926 // 64 x 128 ten2k64[scale] * C3
1927 __mul_128x64_to_128 (res, ten2k64[scale], C3);
1929 // subtract scale from the exponent
1930 z_exp = z_exp - ((UINT64) scale << 49);
1931 e3 = e3 - scale;
1932 // now z_sign, z_exp, and res correspond to a z scaled to p34 = 34 digits
1934 // determine whether x * y is less than, equal to, or greater than
1935 // 1/2 ulp (z)
1936 if (q4 <= 19) {
1937 if (C4.w[0] < midpoint64[q4 - 1]) { // < 1/2 ulp
1938 lt_half_ulp = 1;
1939 } else if (C4.w[0] == midpoint64[q4 - 1]) { // = 1/2 ulp
1940 eq_half_ulp = 1;
1941 } else { // > 1/2 ulp
1942 gt_half_ulp = 1;
1944 } else if (q4 <= 38) {
1945 if (C4.w[2] == 0 && (C4.w[1] < midpoint128[q4 - 20].w[1] ||
1946 (C4.w[1] == midpoint128[q4 - 20].w[1] &&
1947 C4.w[0] < midpoint128[q4 - 20].w[0]))) { // < 1/2 ulp
1948 lt_half_ulp = 1;
1949 } else if (C4.w[2] == 0 && C4.w[1] == midpoint128[q4 - 20].w[1] &&
1950 C4.w[0] == midpoint128[q4 - 20].w[0]) { // = 1/2 ulp
1951 eq_half_ulp = 1;
1952 } else { // > 1/2 ulp
1953 gt_half_ulp = 1;
1955 } else if (q4 <= 58) {
1956 if (C4.w[3] == 0 && (C4.w[2] < midpoint192[q4 - 39].w[2] ||
1957 (C4.w[2] == midpoint192[q4 - 39].w[2] &&
1958 C4.w[1] < midpoint192[q4 - 39].w[1]) ||
1959 (C4.w[2] == midpoint192[q4 - 39].w[2] &&
1960 C4.w[1] == midpoint192[q4 - 39].w[1] &&
1961 C4.w[0] < midpoint192[q4 - 39].w[0]))) { // < 1/2 ulp
1962 lt_half_ulp = 1;
1963 } else if (C4.w[3] == 0 && C4.w[2] == midpoint192[q4 - 39].w[2] &&
1964 C4.w[1] == midpoint192[q4 - 39].w[1] &&
1965 C4.w[0] == midpoint192[q4 - 39].w[0]) { // = 1/2 ulp
1966 eq_half_ulp = 1;
1967 } else { // > 1/2 ulp
1968 gt_half_ulp = 1;
1970 } else {
1971 if (C4.w[3] < midpoint256[q4 - 59].w[3] ||
1972 (C4.w[3] == midpoint256[q4 - 59].w[3] &&
1973 C4.w[2] < midpoint256[q4 - 59].w[2]) ||
1974 (C4.w[3] == midpoint256[q4 - 59].w[3] &&
1975 C4.w[2] == midpoint256[q4 - 59].w[2] &&
1976 C4.w[1] < midpoint256[q4 - 59].w[1]) ||
1977 (C4.w[3] == midpoint256[q4 - 59].w[3] &&
1978 C4.w[2] == midpoint256[q4 - 59].w[2] &&
1979 C4.w[1] == midpoint256[q4 - 59].w[1] &&
1980 C4.w[0] < midpoint256[q4 - 59].w[0])) { // < 1/2 ulp
1981 lt_half_ulp = 1;
1982 } else if (C4.w[3] == midpoint256[q4 - 59].w[3] &&
1983 C4.w[2] == midpoint256[q4 - 59].w[2] &&
1984 C4.w[1] == midpoint256[q4 - 59].w[1] &&
1985 C4.w[0] == midpoint256[q4 - 59].w[0]) { // = 1/2 ulp
1986 eq_half_ulp = 1;
1987 } else { // > 1/2 ulp
1988 gt_half_ulp = 1;
1992 if (p_sign == z_sign) {
1993 if (lt_half_ulp) {
1994 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
1995 // use the following to avoid double rounding errors when operating on
1996 // mixed formats in rounding to nearest
1997 is_inexact_lt_midpoint = 1; // if (z_sign), as if for absolute value
1998 } else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) {
1999 // add 1 ulp to the significand
2000 res.w[0]++;
2001 if (res.w[0] == 0x0ull)
2002 res.w[1]++;
2003 // check for rounding overflow, when coeff == 10^34
2004 if ((res.w[1] & MASK_COEFF) == 0x0001ed09bead87c0ull &&
2005 res.w[0] == 0x378d8e6400000000ull) { // coefficient = 10^34
2006 e3 = e3 + 1;
2007 // coeff = 10^33
2008 z_exp = ((UINT64) (e3 + 6176) << 49) & MASK_EXP;
2009 res.w[1] = 0x0000314dc6448d93ull;
2010 res.w[0] = 0x38c15b0a00000000ull;
2012 // end add 1 ulp
2013 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2014 if (eq_half_ulp) {
2015 is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value
2016 } else {
2017 is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value
2019 } else { // if (eq_half_ulp && !(res.w[0] & 0x01))
2020 // leave unchanged
2021 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2022 is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value
2024 // the result is always inexact, and never tiny
2025 // set the inexact flag
2026 *pfpsf |= INEXACT_EXCEPTION;
2027 // check for overflow
2028 if (e3 > expmax && rnd_mode == ROUNDING_TO_NEAREST) {
2029 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
2030 res.w[0] = 0x0000000000000000ull;
2031 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
2032 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2033 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2034 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2035 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2036 BID_SWAP128 (res);
2037 BID_RETURN (res)
2039 if (rnd_mode != ROUNDING_TO_NEAREST) {
2040 rounding_correction (rnd_mode,
2041 is_inexact_lt_midpoint,
2042 is_inexact_gt_midpoint,
2043 is_midpoint_lt_even, is_midpoint_gt_even,
2044 e3, &res, pfpsf);
2045 z_exp = res.w[1] & MASK_EXP;
2047 } else { // if (p_sign != z_sign)
2048 // consider two cases, because C3 * 10^scale = 10^33 is a special case
2049 if (res.w[1] != 0x0000314dc6448d93ull ||
2050 res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33
2051 if (lt_half_ulp) {
2052 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2053 // use the following to avoid double rounding errors when operating
2054 // on mixed formats in rounding to nearest
2055 is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value
2056 } else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) {
2057 // subtract 1 ulp from the significand
2058 res.w[0]--;
2059 if (res.w[0] == 0xffffffffffffffffull)
2060 res.w[1]--;
2061 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2062 if (eq_half_ulp) {
2063 is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value
2064 } else {
2065 is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value
2067 } else { // if (eq_half_ulp && !(res.w[0] & 0x01))
2068 // leave unchanged
2069 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2070 is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value
2072 // the result is always inexact, and never tiny
2073 // check for overflow for RN
2074 if (e3 > expmax) {
2075 if (rnd_mode == ROUNDING_TO_NEAREST) {
2076 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
2077 res.w[0] = 0x0000000000000000ull;
2078 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
2079 } else {
2080 rounding_correction (rnd_mode,
2081 is_inexact_lt_midpoint,
2082 is_inexact_gt_midpoint,
2083 is_midpoint_lt_even,
2084 is_midpoint_gt_even, e3, &res,
2085 pfpsf);
2087 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2088 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2089 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2090 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2091 BID_SWAP128 (res);
2092 BID_RETURN (res)
2094 // set the inexact flag
2095 *pfpsf |= INEXACT_EXCEPTION;
2096 if (rnd_mode != ROUNDING_TO_NEAREST) {
2097 rounding_correction (rnd_mode,
2098 is_inexact_lt_midpoint,
2099 is_inexact_gt_midpoint,
2100 is_midpoint_lt_even,
2101 is_midpoint_gt_even, e3, &res, pfpsf);
2103 z_exp = res.w[1] & MASK_EXP;
2104 } else { // if C3 * 10^scale = 10^33
2105 e3 = (z_exp >> 49) - 6176;
2106 if (e3 > expmin) {
2107 // the result is exact if exp > expmin and C4 = d*10^(q4-1),
2108 // where d = 1, 2, 3, ..., 9; it could be tiny too, but exact
2109 if (q4 == 1) {
2110 // if q4 = 1 the result is exact
2111 // result coefficient = 10^34 - C4
2112 res.w[1] = 0x0001ed09bead87c0ull;
2113 res.w[0] = 0x378d8e6400000000ull - C4.w[0];
2114 z_exp = z_exp - EXP_P1;
2115 e3 = e3 - 1;
2116 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2117 } else {
2118 // if q4 > 1 then truncate C4 from q4 digits to 1 digit;
2119 // x = q4-1, 1 <= x <= 67 and check if this operation is exact
2120 if (q4 <= 18) { // 2 <= q4 <= 18
2121 round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp,
2122 &is_midpoint_lt_even,
2123 &is_midpoint_gt_even,
2124 &is_inexact_lt_midpoint,
2125 &is_inexact_gt_midpoint);
2126 } else if (q4 <= 38) {
2127 P128.w[1] = C4.w[1];
2128 P128.w[0] = C4.w[0];
2129 round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp,
2130 &is_midpoint_lt_even,
2131 &is_midpoint_gt_even,
2132 &is_inexact_lt_midpoint,
2133 &is_inexact_gt_midpoint);
2134 R64 = R128.w[0]; // one decimal digit
2135 } else if (q4 <= 57) {
2136 P192.w[2] = C4.w[2];
2137 P192.w[1] = C4.w[1];
2138 P192.w[0] = C4.w[0];
2139 round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp,
2140 &is_midpoint_lt_even,
2141 &is_midpoint_gt_even,
2142 &is_inexact_lt_midpoint,
2143 &is_inexact_gt_midpoint);
2144 R64 = R192.w[0]; // one decimal digit
2145 } else { // if (q4 <= 68)
2146 round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp,
2147 &is_midpoint_lt_even,
2148 &is_midpoint_gt_even,
2149 &is_inexact_lt_midpoint,
2150 &is_inexact_gt_midpoint);
2151 R64 = R256.w[0]; // one decimal digit
2153 if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2154 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
2155 // the result is exact: 10^34 - R64
2156 // incr_exp = 0 with certainty
2157 z_exp = z_exp - EXP_P1;
2158 e3 = e3 - 1;
2159 res.w[1] =
2160 z_sign | (z_exp & MASK_EXP) | 0x0001ed09bead87c0ull;
2161 res.w[0] = 0x378d8e6400000000ull - R64;
2162 } else {
2163 // We want R64 to be the top digit of C4, but we actually
2164 // obtained (C4 * 10^(-q4+1))RN; a correction may be needed,
2165 // because the top digit is (C4 * 10^(-q4+1))RZ
2166 // however, if incr_exp = 1 then R64 = 10 with certainty
2167 if (incr_exp) {
2168 R64 = 10;
2170 // the result is inexact as C4 has more than 1 significant digit
2171 // and C3 * 10^scale = 10^33
2172 // example of case that is treated here:
2173 // 100...0 * 10^e3 - 0.41 * 10^e3 =
2174 // 0999...9.59 * 10^e3 -> rounds to 99...96*10^(e3-1)
2175 // note that (e3 > expmin}
2176 // in order to round, subtract R64 from 10^34 and then compare
2177 // C4 - R64 * 10^(q4-1) with 1/2 ulp
2178 // calculate 10^34 - R64
2179 res.w[1] = 0x0001ed09bead87c0ull;
2180 res.w[0] = 0x378d8e6400000000ull - R64;
2181 z_exp = z_exp - EXP_P1; // will be OR-ed with sign & significand
2182 // calculate C4 - R64 * 10^(q4-1); this is a rare case and
2183 // R64 is small, 1 <= R64 <= 9
2184 e3 = e3 - 1;
2185 if (is_inexact_lt_midpoint) {
2186 is_inexact_lt_midpoint = 0;
2187 is_inexact_gt_midpoint = 1;
2188 } else if (is_inexact_gt_midpoint) {
2189 is_inexact_gt_midpoint = 0;
2190 is_inexact_lt_midpoint = 1;
2191 } else if (is_midpoint_lt_even) {
2192 is_midpoint_lt_even = 0;
2193 is_midpoint_gt_even = 1;
2194 } else if (is_midpoint_gt_even) {
2195 is_midpoint_gt_even = 0;
2196 is_midpoint_lt_even = 1;
2197 } else {
2200 // the result is always inexact, and never tiny
2201 // check for overflow for RN
2202 if (e3 > expmax) {
2203 if (rnd_mode == ROUNDING_TO_NEAREST) {
2204 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
2205 res.w[0] = 0x0000000000000000ull;
2206 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
2207 } else {
2208 rounding_correction (rnd_mode,
2209 is_inexact_lt_midpoint,
2210 is_inexact_gt_midpoint,
2211 is_midpoint_lt_even,
2212 is_midpoint_gt_even, e3, &res,
2213 pfpsf);
2215 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2216 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2217 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2218 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2219 BID_SWAP128 (res);
2220 BID_RETURN (res)
2222 // set the inexact flag
2223 *pfpsf |= INEXACT_EXCEPTION;
2224 res.w[1] =
2225 z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1];
2226 if (rnd_mode != ROUNDING_TO_NEAREST) {
2227 rounding_correction (rnd_mode,
2228 is_inexact_lt_midpoint,
2229 is_inexact_gt_midpoint,
2230 is_midpoint_lt_even,
2231 is_midpoint_gt_even, e3, &res,
2232 pfpsf);
2234 z_exp = res.w[1] & MASK_EXP;
2235 } // end result is inexact
2236 } // end q4 > 1
2237 } else { // if (e3 = emin)
2238 // if e3 = expmin the result is also tiny (the condition for
2239 // tininess is C4 > 050...0 [q4 digits] which is met because
2240 // the msd of C4 is not zero)
2241 // the result is tiny and inexact in all rounding modes;
2242 // it is either 100...0 or 0999...9 (use lt_half_ulp, eq_half_ulp,
2243 // gt_half_ulp to calculate)
2244 // if (lt_half_ulp || eq_half_ulp) res = 10^33 stays unchanged
2246 // p_sign != z_sign so swap gt_half_ulp and lt_half_ulp
2247 if (gt_half_ulp) { // res = 10^33 - 1
2248 res.w[1] = 0x0000314dc6448d93ull;
2249 res.w[0] = 0x38c15b09ffffffffull;
2250 } else {
2251 res.w[1] = 0x0000314dc6448d93ull;
2252 res.w[0] = 0x38c15b0a00000000ull;
2254 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2255 *pfpsf |= UNDERFLOW_EXCEPTION; // inexact is set later
2257 if (eq_half_ulp) {
2258 is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value
2259 } else if (lt_half_ulp) {
2260 is_inexact_gt_midpoint = 1; //if(z_sign), as if for absolute value
2261 } else { // if (gt_half_ulp)
2262 is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value
2265 if (rnd_mode != ROUNDING_TO_NEAREST) {
2266 rounding_correction (rnd_mode,
2267 is_inexact_lt_midpoint,
2268 is_inexact_gt_midpoint,
2269 is_midpoint_lt_even,
2270 is_midpoint_gt_even, e3, &res,
2271 pfpsf);
2272 z_exp = res.w[1] & MASK_EXP;
2274 } // end e3 = emin
2275 // set the inexact flag (if the result was not exact)
2276 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
2277 is_midpoint_lt_even || is_midpoint_gt_even)
2278 *pfpsf |= INEXACT_EXCEPTION;
2279 } // end 10^33
2280 } // end if (p_sign != z_sign)
2281 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2282 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2283 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2284 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2285 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2286 BID_SWAP128 (res);
2287 BID_RETURN (res)
2289 } else if (((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2)
2290 (q3 <= delta && delta + q4 <= p34) || // Case (3)
2291 (delta < q3 && p34 < delta + q4) || // Case (4)
2292 (delta < q3 && q3 <= delta + q4 && delta + q4 <= p34) || // Case (5)
2293 (delta + q4 < q3)) && // Case (6)
2294 !(delta <= 1 && p_sign != z_sign)) { // Case (2), (3), (4), (5) or (6)
2296 // the result has the sign of z
2298 if ((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2)
2299 (delta < q3 && p34 < delta + q4)) { // Case (4)
2300 // round first the sum x * y + z with unbounded exponent
2301 // scale C3 up by scale = p34 - q3, 1 <= scale <= p34-1,
2302 // 1 <= scale <= 33
2303 // calculate res = C3 * 10^scale
2304 scale = p34 - q3;
2305 x0 = delta + q4 - p34;
2306 } else if (delta + q4 < q3) { // Case (6)
2307 // make Case (6) look like Case (3) or Case (5) with scale = 0
2308 // by scaling up C4 by 10^(q3 - delta - q4)
2309 scale = q3 - delta - q4; // 1 <= scale <= 33
2310 if (q4 <= 19) { // 1 <= scale <= 19; C4 fits in 64 bits
2311 if (scale <= 19) { // 10^scale fits in 64 bits
2312 // 64 x 64 C4.w[0] * ten2k64[scale]
2313 __mul_64x64_to_128MACH (P128, C4.w[0], ten2k64[scale]);
2314 } else { // 10^scale fits in 128 bits
2315 // 64 x 128 C4.w[0] * ten2k128[scale - 20]
2316 __mul_128x64_to_128 (P128, C4.w[0], ten2k128[scale - 20]);
2318 } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits
2319 // 64 x 128 ten2k64[scale] * C4
2320 __mul_128x64_to_128 (P128, ten2k64[scale], C4);
2322 C4.w[0] = P128.w[0];
2323 C4.w[1] = P128.w[1];
2324 // e4 does not need adjustment, as it is not used from this point on
2325 scale = 0;
2326 x0 = 0;
2327 // now Case (6) looks like Case (3) or Case (5) with scale = 0
2328 } else { // if Case (3) or Case (5)
2329 // Note: Case (3) is similar to Case (2), but scale differs and the
2330 // result is exact, unless it is tiny (so x0 = 0 when calculating the
2331 // result with unbounded exponent)
2333 // calculate first the sum x * y + z with unbounded exponent (exact)
2334 // scale C3 up by scale = delta + q4 - q3, 1 <= scale <= p34-1,
2335 // 1 <= scale <= 33
2336 // calculate res = C3 * 10^scale
2337 scale = delta + q4 - q3;
2338 x0 = 0;
2339 // Note: the comments which follow refer [mainly] to Case (2)]
2342 case2_repeat:
2343 if (scale == 0) { // this could happen e.g. if we return to case2_repeat
2344 // or in Case (4)
2345 res.w[1] = C3.w[1];
2346 res.w[0] = C3.w[0];
2347 } else if (q3 <= 19) { // 1 <= scale <= 19; z fits in 64 bits
2348 if (scale <= 19) { // 10^scale fits in 64 bits
2349 // 64 x 64 C3.w[0] * ten2k64[scale]
2350 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
2351 } else { // 10^scale fits in 128 bits
2352 // 64 x 128 C3.w[0] * ten2k128[scale - 20]
2353 __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
2355 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
2356 // 64 x 128 ten2k64[scale] * C3
2357 __mul_128x64_to_128 (res, ten2k64[scale], C3);
2359 // e3 is already calculated
2360 e3 = e3 - scale;
2361 // now res = C3 * 10^scale and e3 = e3 - scale
2362 // Note: C3 * 10^scale could be 10^34 if we returned to case2_repeat
2363 // because the result was too small
2365 // round C4 to nearest to q4 - x0 digits, where x0 = delta + q4 - p34,
2366 // 1 <= x0 <= min (q4 - 1, 2 * p34 - 1) <=> 1 <= x0 <= min (q4 - 1, 67)
2367 // Also: 1 <= q4 - x0 <= p34 -1 => 1 <= q4 - x0 <= 33 (so the result of
2368 // the rounding fits in 128 bits!)
2369 // x0 = delta + q4 - p34 (calculated before reaching case2_repeat)
2370 // because q3 + q4 - x0 <= P => x0 >= q3 + q4 - p34
2371 if (x0 == 0) { // this could happen only if we return to case2_repeat, or
2372 // for Case (3) or Case (6)
2373 R128.w[1] = C4.w[1];
2374 R128.w[0] = C4.w[0];
2375 } else if (q4 <= 18) {
2376 // 2 <= q4 <= 18, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 17
2377 round64_2_18 (q4, x0, C4.w[0], &R64, &incr_exp,
2378 &is_midpoint_lt_even, &is_midpoint_gt_even,
2379 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
2380 if (incr_exp) {
2381 // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17
2382 R64 = ten2k64[q4 - x0];
2384 R128.w[1] = 0;
2385 R128.w[0] = R64;
2386 } else if (q4 <= 38) {
2387 // 19 <= q4 <= 38, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 37
2388 P128.w[1] = C4.w[1];
2389 P128.w[0] = C4.w[0];
2390 round128_19_38 (q4, x0, P128, &R128, &incr_exp,
2391 &is_midpoint_lt_even, &is_midpoint_gt_even,
2392 &is_inexact_lt_midpoint,
2393 &is_inexact_gt_midpoint);
2394 if (incr_exp) {
2395 // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37
2396 if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
2397 R128.w[0] = ten2k64[q4 - x0];
2398 // R128.w[1] stays 0
2399 } else { // 20 <= q4 - x0 <= 37
2400 R128.w[0] = ten2k128[q4 - x0 - 20].w[0];
2401 R128.w[1] = ten2k128[q4 - x0 - 20].w[1];
2404 } else if (q4 <= 57) {
2405 // 38 <= q4 <= 57, max(1, q3+q4-p34) <= x0 <= q4 - 1, 5 <= x0 <= 56
2406 P192.w[2] = C4.w[2];
2407 P192.w[1] = C4.w[1];
2408 P192.w[0] = C4.w[0];
2409 round192_39_57 (q4, x0, P192, &R192, &incr_exp,
2410 &is_midpoint_lt_even, &is_midpoint_gt_even,
2411 &is_inexact_lt_midpoint,
2412 &is_inexact_gt_midpoint);
2413 // R192.w[2] is always 0
2414 if (incr_exp) {
2415 // R192 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 5, 1 <= q4 - x0 <= 52
2416 if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
2417 R192.w[0] = ten2k64[q4 - x0];
2418 // R192.w[1] stays 0
2419 // R192.w[2] stays 0
2420 } else { // 20 <= q4 - x0 <= 33
2421 R192.w[0] = ten2k128[q4 - x0 - 20].w[0];
2422 R192.w[1] = ten2k128[q4 - x0 - 20].w[1];
2423 // R192.w[2] stays 0
2426 R128.w[1] = R192.w[1];
2427 R128.w[0] = R192.w[0];
2428 } else {
2429 // 58 <= q4 <= 68, max(1, q3+q4-p34) <= x0 <= q4 - 1, 25 <= x0 <= 67
2430 round256_58_76 (q4, x0, C4, &R256, &incr_exp,
2431 &is_midpoint_lt_even, &is_midpoint_gt_even,
2432 &is_inexact_lt_midpoint,
2433 &is_inexact_gt_midpoint);
2434 // R256.w[3] and R256.w[2] are always 0
2435 if (incr_exp) {
2436 // R256 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 25, 1 <= q4 - x0 <= 43
2437 if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
2438 R256.w[0] = ten2k64[q4 - x0];
2439 // R256.w[1] stays 0
2440 // R256.w[2] stays 0
2441 // R256.w[3] stays 0
2442 } else { // 20 <= q4 - x0 <= 33
2443 R256.w[0] = ten2k128[q4 - x0 - 20].w[0];
2444 R256.w[1] = ten2k128[q4 - x0 - 20].w[1];
2445 // R256.w[2] stays 0
2446 // R256.w[3] stays 0
2449 R128.w[1] = R256.w[1];
2450 R128.w[0] = R256.w[0];
2452 // now add C3 * 10^scale in res and the signed top (q4-x0) digits of C4,
2453 // rounded to nearest, which were copied into R128
2454 if (z_sign == p_sign) {
2455 lsb = res.w[0] & 0x01; // lsb of C3 * 10^scale
2456 // the sum can result in [up to] p34 or p34 + 1 digits
2457 res.w[0] = res.w[0] + R128.w[0];
2458 res.w[1] = res.w[1] + R128.w[1];
2459 if (res.w[0] < R128.w[0])
2460 res.w[1]++; // carry
2461 // if res > 10^34 - 1 need to increase x0 and decrease scale by 1
2462 if (res.w[1] > 0x0001ed09bead87c0ull ||
2463 (res.w[1] == 0x0001ed09bead87c0ull &&
2464 res.w[0] > 0x378d8e63ffffffffull)) {
2465 // avoid double rounding error
2466 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
2467 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
2468 is_midpoint_lt_even0 = is_midpoint_lt_even;
2469 is_midpoint_gt_even0 = is_midpoint_gt_even;
2470 is_inexact_lt_midpoint = 0;
2471 is_inexact_gt_midpoint = 0;
2472 is_midpoint_lt_even = 0;
2473 is_midpoint_gt_even = 0;
2474 P128.w[1] = res.w[1];
2475 P128.w[0] = res.w[0];
2476 round128_19_38 (35, 1, P128, &res, &incr_exp,
2477 &is_midpoint_lt_even, &is_midpoint_gt_even,
2478 &is_inexact_lt_midpoint,
2479 &is_inexact_gt_midpoint);
2480 // incr_exp is 0 with certainty in this case
2481 // avoid a double rounding error
2482 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
2483 is_midpoint_lt_even) { // double rounding error upward
2484 // res = res - 1
2485 res.w[0]--;
2486 if (res.w[0] == 0xffffffffffffffffull)
2487 res.w[1]--;
2488 // Note: a double rounding error upward is not possible; for this
2489 // the result after the first rounding would have to be 99...95
2490 // (35 digits in all), possibly followed by a number of zeros; this
2491 // not possible in Cases (2)-(6) or (15)-(17) which may get here
2492 is_midpoint_lt_even = 0;
2493 is_inexact_lt_midpoint = 1;
2494 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
2495 is_midpoint_gt_even) { // double rounding error downward
2496 // res = res + 1
2497 res.w[0]++;
2498 if (res.w[0] == 0)
2499 res.w[1]++;
2500 is_midpoint_gt_even = 0;
2501 is_inexact_gt_midpoint = 1;
2502 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2503 !is_inexact_lt_midpoint
2504 && !is_inexact_gt_midpoint) {
2505 // if this second rounding was exact the result may still be
2506 // inexact because of the first rounding
2507 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
2508 is_inexact_gt_midpoint = 1;
2510 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
2511 is_inexact_lt_midpoint = 1;
2513 } else if (is_midpoint_gt_even &&
2514 (is_inexact_gt_midpoint0
2515 || is_midpoint_lt_even0)) {
2516 // pulled up to a midpoint
2517 is_inexact_lt_midpoint = 1;
2518 is_inexact_gt_midpoint = 0;
2519 is_midpoint_lt_even = 0;
2520 is_midpoint_gt_even = 0;
2521 } else if (is_midpoint_lt_even &&
2522 (is_inexact_lt_midpoint0
2523 || is_midpoint_gt_even0)) {
2524 // pulled down to a midpoint
2525 is_inexact_lt_midpoint = 0;
2526 is_inexact_gt_midpoint = 1;
2527 is_midpoint_lt_even = 0;
2528 is_midpoint_gt_even = 0;
2529 } else {
2532 // adjust exponent
2533 e3 = e3 + 1;
2534 if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2535 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
2536 if (is_midpoint_lt_even0 || is_midpoint_gt_even0 ||
2537 is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) {
2538 is_inexact_lt_midpoint = 1;
2541 } else {
2542 // this is the result rounded with unbounded exponent, unless a
2543 // correction is needed
2544 res.w[1] = res.w[1] & MASK_COEFF;
2545 if (lsb == 1) {
2546 if (is_midpoint_gt_even) {
2547 // res = res + 1
2548 is_midpoint_gt_even = 0;
2549 is_midpoint_lt_even = 1;
2550 res.w[0]++;
2551 if (res.w[0] == 0x0)
2552 res.w[1]++;
2553 // check for rounding overflow
2554 if (res.w[1] == 0x0001ed09bead87c0ull &&
2555 res.w[0] == 0x378d8e6400000000ull) {
2556 // res = 10^34 => rounding overflow
2557 res.w[1] = 0x0000314dc6448d93ull;
2558 res.w[0] = 0x38c15b0a00000000ull; // 10^33
2559 e3++;
2561 } else if (is_midpoint_lt_even) {
2562 // res = res - 1
2563 is_midpoint_lt_even = 0;
2564 is_midpoint_gt_even = 1;
2565 res.w[0]--;
2566 if (res.w[0] == 0xffffffffffffffffull)
2567 res.w[1]--;
2568 // if the result is pure zero, the sign depends on the rounding
2569 // mode (x*y and z had opposite signs)
2570 if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) {
2571 if (rnd_mode != ROUNDING_DOWN)
2572 z_sign = 0x0000000000000000ull;
2573 else
2574 z_sign = 0x8000000000000000ull;
2575 // the exponent is max (e3, expmin)
2576 res.w[1] = 0x0;
2577 res.w[0] = 0x0;
2578 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2579 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2580 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2581 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2582 BID_SWAP128 (res);
2583 BID_RETURN (res)
2585 } else {
2590 } else { // if (z_sign != p_sign)
2591 lsb = res.w[0] & 0x01; // lsb of C3 * 10^scale; R128 contains rounded C4
2592 // used to swap rounding indicators if p_sign != z_sign
2593 // the sum can result in [up to] p34 or p34 - 1 digits
2594 tmp64 = res.w[0];
2595 res.w[0] = res.w[0] - R128.w[0];
2596 res.w[1] = res.w[1] - R128.w[1];
2597 if (res.w[0] > tmp64)
2598 res.w[1]--; // borrow
2599 // if res < 10^33 and exp > expmin need to decrease x0 and
2600 // increase scale by 1
2601 if (e3 > expmin && ((res.w[1] < 0x0000314dc6448d93ull ||
2602 (res.w[1] == 0x0000314dc6448d93ull &&
2603 res.w[0] < 0x38c15b0a00000000ull)) ||
2604 (is_inexact_lt_midpoint
2605 && res.w[1] == 0x0000314dc6448d93ull
2606 && res.w[0] == 0x38c15b0a00000000ull))
2607 && x0 >= 1) {
2608 x0 = x0 - 1;
2609 // first restore e3, otherwise it will be too small
2610 e3 = e3 + scale;
2611 scale = scale + 1;
2612 is_inexact_lt_midpoint = 0;
2613 is_inexact_gt_midpoint = 0;
2614 is_midpoint_lt_even = 0;
2615 is_midpoint_gt_even = 0;
2616 incr_exp = 0;
2617 goto case2_repeat;
2619 // else this is the result rounded with unbounded exponent;
2620 // because the result has opposite sign to that of C4 which was
2621 // rounded, need to change the rounding indicators
2622 if (is_inexact_lt_midpoint) {
2623 is_inexact_lt_midpoint = 0;
2624 is_inexact_gt_midpoint = 1;
2625 } else if (is_inexact_gt_midpoint) {
2626 is_inexact_gt_midpoint = 0;
2627 is_inexact_lt_midpoint = 1;
2628 } else if (lsb == 0) {
2629 if (is_midpoint_lt_even) {
2630 is_midpoint_lt_even = 0;
2631 is_midpoint_gt_even = 1;
2632 } else if (is_midpoint_gt_even) {
2633 is_midpoint_gt_even = 0;
2634 is_midpoint_lt_even = 1;
2635 } else {
2638 } else if (lsb == 1) {
2639 if (is_midpoint_lt_even) {
2640 // res = res + 1
2641 res.w[0]++;
2642 if (res.w[0] == 0x0)
2643 res.w[1]++;
2644 // check for rounding overflow
2645 if (res.w[1] == 0x0001ed09bead87c0ull &&
2646 res.w[0] == 0x378d8e6400000000ull) {
2647 // res = 10^34 => rounding overflow
2648 res.w[1] = 0x0000314dc6448d93ull;
2649 res.w[0] = 0x38c15b0a00000000ull; // 10^33
2650 e3++;
2652 } else if (is_midpoint_gt_even) {
2653 // res = res - 1
2654 res.w[0]--;
2655 if (res.w[0] == 0xffffffffffffffffull)
2656 res.w[1]--;
2657 // if the result is pure zero, the sign depends on the rounding
2658 // mode (x*y and z had opposite signs)
2659 if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) {
2660 if (rnd_mode != ROUNDING_DOWN)
2661 z_sign = 0x0000000000000000ull;
2662 else
2663 z_sign = 0x8000000000000000ull;
2664 // the exponent is max (e3, expmin)
2665 res.w[1] = 0x0;
2666 res.w[0] = 0x0;
2667 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2668 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2669 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2670 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2671 BID_SWAP128 (res);
2672 BID_RETURN (res)
2674 } else {
2677 } else {
2681 // check for underflow
2682 if (e3 == expmin) { // and if significand < 10^33 => result is tiny
2683 if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull ||
2684 ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull &&
2685 res.w[0] < 0x38c15b0a00000000ull)) {
2686 is_tiny = 1;
2688 } else if (e3 < expmin) {
2689 // the result is tiny, so we must truncate more of res
2690 is_tiny = 1;
2691 x0 = expmin - e3;
2692 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
2693 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
2694 is_midpoint_lt_even0 = is_midpoint_lt_even;
2695 is_midpoint_gt_even0 = is_midpoint_gt_even;
2696 is_inexact_lt_midpoint = 0;
2697 is_inexact_gt_midpoint = 0;
2698 is_midpoint_lt_even = 0;
2699 is_midpoint_gt_even = 0;
2700 // determine the number of decimal digits in res
2701 if (res.w[1] == 0x0) {
2702 // between 1 and 19 digits
2703 for (ind = 1; ind <= 19; ind++) {
2704 if (res.w[0] < ten2k64[ind]) {
2705 break;
2708 // ind digits
2709 } else if (res.w[1] < ten2k128[0].w[1] ||
2710 (res.w[1] == ten2k128[0].w[1]
2711 && res.w[0] < ten2k128[0].w[0])) {
2712 // 20 digits
2713 ind = 20;
2714 } else { // between 21 and 38 digits
2715 for (ind = 1; ind <= 18; ind++) {
2716 if (res.w[1] < ten2k128[ind].w[1] ||
2717 (res.w[1] == ten2k128[ind].w[1] &&
2718 res.w[0] < ten2k128[ind].w[0])) {
2719 break;
2722 // ind + 20 digits
2723 ind = ind + 20;
2726 // at this point ind >= x0; because delta >= 2 on this path, the case
2727 // ind = x0 can occur only in Case (2) or case (3), when C3 has one
2728 // digit (q3 = 1) equal to 1 (C3 = 1), e3 is expmin (e3 = expmin),
2729 // the signs of x * y and z are opposite, and through cancellation
2730 // the most significant decimal digit in res has the weight
2731 // 10^(emin-1); however, it is clear that in this case the most
2732 // significant digit is 9, so the result before rounding is
2733 // 0.9... * 10^emin
2734 // Otherwise, ind > x0 because there are non-zero decimal digits in the
2735 // result with weight of at least 10^emin, and correction for underflow
2736 // can be carried out using the round*_*_2_* () routines
2737 if (x0 == ind) { // the result before rounding is 0.9... * 10^emin
2738 res.w[1] = 0x0;
2739 res.w[0] = 0x1;
2740 is_inexact_gt_midpoint = 1;
2741 } else if (ind <= 18) { // check that 2 <= ind
2742 // 2 <= ind <= 18, 1 <= x0 <= 17
2743 round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
2744 &is_midpoint_lt_even, &is_midpoint_gt_even,
2745 &is_inexact_lt_midpoint,
2746 &is_inexact_gt_midpoint);
2747 if (incr_exp) {
2748 // R64 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 17
2749 R64 = ten2k64[ind - x0];
2751 res.w[1] = 0;
2752 res.w[0] = R64;
2753 } else if (ind <= 38) {
2754 // 19 <= ind <= 38
2755 P128.w[1] = res.w[1];
2756 P128.w[0] = res.w[0];
2757 round128_19_38 (ind, x0, P128, &res, &incr_exp,
2758 &is_midpoint_lt_even, &is_midpoint_gt_even,
2759 &is_inexact_lt_midpoint,
2760 &is_inexact_gt_midpoint);
2761 if (incr_exp) {
2762 // R128 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 37
2763 if (ind - x0 <= 19) { // 1 <= ind - x0 <= 19
2764 res.w[0] = ten2k64[ind - x0];
2765 // res.w[1] stays 0
2766 } else { // 20 <= ind - x0 <= 37
2767 res.w[0] = ten2k128[ind - x0 - 20].w[0];
2768 res.w[1] = ten2k128[ind - x0 - 20].w[1];
2772 // avoid a double rounding error
2773 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
2774 is_midpoint_lt_even) { // double rounding error upward
2775 // res = res - 1
2776 res.w[0]--;
2777 if (res.w[0] == 0xffffffffffffffffull)
2778 res.w[1]--;
2779 // Note: a double rounding error upward is not possible; for this
2780 // the result after the first rounding would have to be 99...95
2781 // (35 digits in all), possibly followed by a number of zeros; this
2782 // not possible in Cases (2)-(6) which may get here
2783 is_midpoint_lt_even = 0;
2784 is_inexact_lt_midpoint = 1;
2785 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
2786 is_midpoint_gt_even) { // double rounding error downward
2787 // res = res + 1
2788 res.w[0]++;
2789 if (res.w[0] == 0)
2790 res.w[1]++;
2791 is_midpoint_gt_even = 0;
2792 is_inexact_gt_midpoint = 1;
2793 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2794 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
2795 // if this second rounding was exact the result may still be
2796 // inexact because of the first rounding
2797 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
2798 is_inexact_gt_midpoint = 1;
2800 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
2801 is_inexact_lt_midpoint = 1;
2803 } else if (is_midpoint_gt_even &&
2804 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
2805 // pulled up to a midpoint
2806 is_inexact_lt_midpoint = 1;
2807 is_inexact_gt_midpoint = 0;
2808 is_midpoint_lt_even = 0;
2809 is_midpoint_gt_even = 0;
2810 } else if (is_midpoint_lt_even &&
2811 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
2812 // pulled down to a midpoint
2813 is_inexact_lt_midpoint = 0;
2814 is_inexact_gt_midpoint = 1;
2815 is_midpoint_lt_even = 0;
2816 is_midpoint_gt_even = 0;
2817 } else {
2820 // adjust exponent
2821 e3 = e3 + x0;
2822 if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2823 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
2824 if (is_midpoint_lt_even0 || is_midpoint_gt_even0 ||
2825 is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) {
2826 is_inexact_lt_midpoint = 1;
2829 } else {
2830 ; // not underflow
2832 // check for inexact result
2833 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
2834 is_midpoint_lt_even || is_midpoint_gt_even) {
2835 // set the inexact flag
2836 *pfpsf |= INEXACT_EXCEPTION;
2837 if (is_tiny)
2838 *pfpsf |= UNDERFLOW_EXCEPTION;
2840 // now check for significand = 10^34 (may have resulted from going
2841 // back to case2_repeat)
2842 if (res.w[1] == 0x0001ed09bead87c0ull &&
2843 res.w[0] == 0x378d8e6400000000ull) { // if res = 10^34
2844 res.w[1] = 0x0000314dc6448d93ull; // res = 10^33
2845 res.w[0] = 0x38c15b0a00000000ull;
2846 e3 = e3 + 1;
2848 res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1];
2849 // check for overflow
2850 if (rnd_mode == ROUNDING_TO_NEAREST && e3 > expmax) {
2851 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
2852 res.w[0] = 0x0000000000000000ull;
2853 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
2855 if (rnd_mode != ROUNDING_TO_NEAREST) {
2856 rounding_correction (rnd_mode,
2857 is_inexact_lt_midpoint,
2858 is_inexact_gt_midpoint,
2859 is_midpoint_lt_even, is_midpoint_gt_even,
2860 e3, &res, pfpsf);
2862 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2863 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2864 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2865 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2866 BID_SWAP128 (res);
2867 BID_RETURN (res)
2869 } else {
2871 // we get here only if delta <= 1 in Cases (2), (3), (4), (5), or (6) and
2872 // the signs of x*y and z are opposite; in these cases massive
2873 // cancellation can occur, so it is better to scale either C3 or C4 and
2874 // to perform the subtraction before rounding; rounding is performed
2875 // next, depending on the number of decimal digits in the result and on
2876 // the exponent value
2877 // Note: overlow is not possible in this case
2878 // this is similar to Cases (15), (16), and (17)
2880 if (delta + q4 < q3) { // from Case (6)
2881 // Case (6) with 0<= delta <= 1 is similar to Cases (15), (16), and
2882 // (17) if we swap (C3, C4), (q3, q4), (e3, e4), (z_sign, p_sign)
2883 // and call add_and_round; delta stays positive
2884 // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3
2885 P128.w[1] = C3.w[1];
2886 P128.w[0] = C3.w[0];
2887 C3.w[1] = C4.w[1];
2888 C3.w[0] = C4.w[0];
2889 C4.w[1] = P128.w[1];
2890 C4.w[0] = P128.w[0];
2891 ind = q3;
2892 q3 = q4;
2893 q4 = ind;
2894 ind = e3;
2895 e3 = e4;
2896 e4 = ind;
2897 tmp_sign = z_sign;
2898 z_sign = p_sign;
2899 p_sign = tmp_sign;
2900 } else { // from Cases (2), (3), (4), (5)
2901 // In Cases (2), (3), (4), (5) with 0 <= delta <= 1 C3 has to be
2902 // scaled up by q4 + delta - q3; this is the same as in Cases (15),
2903 // (16), and (17) if we just change the sign of delta
2904 delta = -delta;
2906 add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4,
2907 rnd_mode, &is_midpoint_lt_even,
2908 &is_midpoint_gt_even, &is_inexact_lt_midpoint,
2909 &is_inexact_gt_midpoint, pfpsf, &res);
2910 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2911 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2912 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2913 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2914 BID_SWAP128 (res);
2915 BID_RETURN (res)
2919 } else { // if delta < 0
2921 delta = -delta;
2923 if (p34 < q4 && q4 <= delta) { // Case (7)
2925 // truncate C4 to p34 digits into res
2926 // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68
2927 x0 = q4 - p34;
2928 if (q4 <= 38) {
2929 P128.w[1] = C4.w[1];
2930 P128.w[0] = C4.w[0];
2931 round128_19_38 (q4, x0, P128, &res, &incr_exp,
2932 &is_midpoint_lt_even, &is_midpoint_gt_even,
2933 &is_inexact_lt_midpoint,
2934 &is_inexact_gt_midpoint);
2935 } else if (q4 <= 57) { // 35 <= q4 <= 57
2936 P192.w[2] = C4.w[2];
2937 P192.w[1] = C4.w[1];
2938 P192.w[0] = C4.w[0];
2939 round192_39_57 (q4, x0, P192, &R192, &incr_exp,
2940 &is_midpoint_lt_even, &is_midpoint_gt_even,
2941 &is_inexact_lt_midpoint,
2942 &is_inexact_gt_midpoint);
2943 res.w[0] = R192.w[0];
2944 res.w[1] = R192.w[1];
2945 } else { // if (q4 <= 68)
2946 round256_58_76 (q4, x0, C4, &R256, &incr_exp,
2947 &is_midpoint_lt_even, &is_midpoint_gt_even,
2948 &is_inexact_lt_midpoint,
2949 &is_inexact_gt_midpoint);
2950 res.w[0] = R256.w[0];
2951 res.w[1] = R256.w[1];
2953 e4 = e4 + x0;
2954 if (incr_exp) {
2955 e4 = e4 + 1;
2957 if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2958 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
2959 // if C4 rounded to p34 digits is exact then the result is inexact,
2960 // in a way that depends on the signs of x * y and z
2961 if (p_sign == z_sign) {
2962 is_inexact_lt_midpoint = 1;
2963 } else { // if (p_sign != z_sign)
2964 if (res.w[1] != 0x0000314dc6448d93ull ||
2965 res.w[0] != 0x38c15b0a00000000ull) { // res != 10^33
2966 is_inexact_gt_midpoint = 1;
2967 } else { // res = 10^33 and exact is a special case
2968 // if C3 < 1/2 ulp then res = 10^33 and is_inexact_gt_midpoint = 1
2969 // if C3 = 1/2 ulp then res = 10^33 and is_midpoint_lt_even = 1
2970 // if C3 > 1/2 ulp then res = 10^34-1 and is_inexact_lt_midpoint = 1
2971 // Note: ulp is really ulp/10 (after borrow which propagates to msd)
2972 if (delta > p34 + 1) { // C3 < 1/2
2973 // res = 10^33, unchanged
2974 is_inexact_gt_midpoint = 1;
2975 } else { // if (delta == p34 + 1)
2976 if (q3 <= 19) {
2977 if (C3.w[0] < midpoint64[q3 - 1]) { // C3 < 1/2 ulp
2978 // res = 10^33, unchanged
2979 is_inexact_gt_midpoint = 1;
2980 } else if (C3.w[0] == midpoint64[q3 - 1]) { // C3 = 1/2 ulp
2981 // res = 10^33, unchanged
2982 is_midpoint_lt_even = 1;
2983 } else { // if (C3.w[0] > midpoint64[q3-1]), C3 > 1/2 ulp
2984 res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
2985 res.w[0] = 0x378d8e63ffffffffull;
2986 e4 = e4 - 1;
2987 is_inexact_lt_midpoint = 1;
2989 } else { // if (20 <= q3 <=34)
2990 if (C3.w[1] < midpoint128[q3 - 20].w[1] ||
2991 (C3.w[1] == midpoint128[q3 - 20].w[1] &&
2992 C3.w[0] < midpoint128[q3 - 20].w[0])) { // C3 < 1/2 ulp
2993 // res = 10^33, unchanged
2994 is_inexact_gt_midpoint = 1;
2995 } else if (C3.w[1] == midpoint128[q3 - 20].w[1] &&
2996 C3.w[0] == midpoint128[q3 - 20].w[0]) { // C3 = 1/2 ulp
2997 // res = 10^33, unchanged
2998 is_midpoint_lt_even = 1;
2999 } else { // if (C3 > midpoint128[q3-20]), C3 > 1/2 ulp
3000 res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
3001 res.w[0] = 0x378d8e63ffffffffull;
3002 e4 = e4 - 1;
3003 is_inexact_lt_midpoint = 1;
3009 } else if (is_midpoint_lt_even) {
3010 if (z_sign != p_sign) {
3011 // needs correction: res = res - 1
3012 res.w[0] = res.w[0] - 1;
3013 if (res.w[0] == 0xffffffffffffffffull)
3014 res.w[1]--;
3015 // if it is (10^33-1)*10^e4 then the corect result is
3016 // (10^34-1)*10(e4-1)
3017 if (res.w[1] == 0x0000314dc6448d93ull &&
3018 res.w[0] == 0x38c15b09ffffffffull) {
3019 res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
3020 res.w[0] = 0x378d8e63ffffffffull;
3021 e4 = e4 - 1;
3023 is_midpoint_lt_even = 0;
3024 is_inexact_lt_midpoint = 1;
3025 } else { // if (z_sign == p_sign)
3026 is_midpoint_lt_even = 0;
3027 is_inexact_gt_midpoint = 1;
3029 } else if (is_midpoint_gt_even) {
3030 if (z_sign == p_sign) {
3031 // needs correction: res = res + 1 (cannot cross in the next binade)
3032 res.w[0] = res.w[0] + 1;
3033 if (res.w[0] == 0x0000000000000000ull)
3034 res.w[1]++;
3035 is_midpoint_gt_even = 0;
3036 is_inexact_gt_midpoint = 1;
3037 } else { // if (z_sign != p_sign)
3038 is_midpoint_gt_even = 0;
3039 is_inexact_lt_midpoint = 1;
3041 } else {
3042 ; // the rounded result is already correct
3044 // check for overflow
3045 if (rnd_mode == ROUNDING_TO_NEAREST && e4 > expmax) {
3046 res.w[1] = p_sign | 0x7800000000000000ull;
3047 res.w[0] = 0x0000000000000000ull;
3048 *pfpsf |= (OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
3049 } else { // no overflow or not RN
3050 p_exp = ((UINT64) (e4 + 6176) << 49);
3051 res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1];
3053 if (rnd_mode != ROUNDING_TO_NEAREST) {
3054 rounding_correction (rnd_mode,
3055 is_inexact_lt_midpoint,
3056 is_inexact_gt_midpoint,
3057 is_midpoint_lt_even, is_midpoint_gt_even,
3058 e4, &res, pfpsf);
3060 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
3061 is_midpoint_lt_even || is_midpoint_gt_even) {
3062 // set the inexact flag
3063 *pfpsf |= INEXACT_EXCEPTION;
3065 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3066 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3067 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3068 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3069 BID_SWAP128 (res);
3070 BID_RETURN (res)
3072 } else if ((q4 <= p34 && p34 <= delta) || // Case (8)
3073 (q4 <= delta && delta < p34 && p34 < delta + q3) || // Case (9)
3074 (q4 <= delta && delta + q3 <= p34) || // Case (10)
3075 (delta < q4 && q4 <= p34 && p34 < delta + q3) || // Case (13)
3076 (delta < q4 && q4 <= delta + q3 && delta + q3 <= p34) || // Case (14)
3077 (delta + q3 < q4 && q4 <= p34)) { // Case (18)
3079 // Case (8) is similar to Case (1), with C3 and C4 swapped
3080 // Case (9) is similar to Case (2), with C3 and C4 swapped
3081 // Case (10) is similar to Case (3), with C3 and C4 swapped
3082 // Case (13) is similar to Case (4), with C3 and C4 swapped
3083 // Case (14) is similar to Case (5), with C3 and C4 swapped
3084 // Case (18) is similar to Case (6), with C3 and C4 swapped
3086 // swap (C3, C4), (q3, q4), (e3, 34), (z_sign, p_sign), (z_exp, p_exp)
3087 // and go back to delta_ge_zero
3088 // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3
3089 P128.w[1] = C3.w[1];
3090 P128.w[0] = C3.w[0];
3091 C3.w[1] = C4.w[1];
3092 C3.w[0] = C4.w[0];
3093 C4.w[1] = P128.w[1];
3094 C4.w[0] = P128.w[0];
3095 ind = q3;
3096 q3 = q4;
3097 q4 = ind;
3098 ind = e3;
3099 e3 = e4;
3100 e4 = ind;
3101 tmp_sign = z_sign;
3102 z_sign = p_sign;
3103 p_sign = tmp_sign;
3104 tmp.ui64 = z_exp;
3105 z_exp = p_exp;
3106 p_exp = tmp.ui64;
3107 goto delta_ge_zero;
3109 } else if ((p34 <= delta && delta < q4 && q4 < delta + q3) || // Case (11)
3110 (delta < p34 && p34 < q4 && q4 < delta + q3)) { // Case (12)
3112 // round C3 to nearest to q3 - x0 digits, where x0 = e4 - e3,
3113 // 1 <= x0 <= q3 - 1 <= p34 - 1
3114 x0 = e4 - e3; // or x0 = delta + q3 - q4
3115 if (q3 <= 18) { // 2 <= q3 <= 18
3116 round64_2_18 (q3, x0, C3.w[0], &R64, &incr_exp,
3117 &is_midpoint_lt_even, &is_midpoint_gt_even,
3118 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
3119 // C3.w[1] = 0;
3120 C3.w[0] = R64;
3121 } else if (q3 <= 38) {
3122 round128_19_38 (q3, x0, C3, &R128, &incr_exp,
3123 &is_midpoint_lt_even, &is_midpoint_gt_even,
3124 &is_inexact_lt_midpoint,
3125 &is_inexact_gt_midpoint);
3126 C3.w[1] = R128.w[1];
3127 C3.w[0] = R128.w[0];
3129 // the rounded result has q3 - x0 digits
3130 // we want the exponent to be e4, so if incr_exp = 1 then
3131 // multiply the rounded result by 10 - it will still fit in 113 bits
3132 if (incr_exp) {
3133 // 64 x 128 -> 128
3134 P128.w[1] = C3.w[1];
3135 P128.w[0] = C3.w[0];
3136 __mul_64x128_to_128 (C3, ten2k64[1], P128);
3138 e3 = e3 + x0; // this is e4
3139 // now add/subtract the 256-bit C4 and the new (and shorter) 128-bit C3;
3140 // the result will have the sign of x * y; the exponent is e4
3141 R256.w[3] = 0;
3142 R256.w[2] = 0;
3143 R256.w[1] = C3.w[1];
3144 R256.w[0] = C3.w[0];
3145 if (p_sign == z_sign) { // R256 = C4 + R256
3146 add256 (C4, R256, &R256);
3147 } else { // if (p_sign != z_sign) { // R256 = C4 - R256
3148 sub256 (C4, R256, &R256); // the result cannot be pure zero
3149 // because the result has opposite sign to that of R256 which was
3150 // rounded, need to change the rounding indicators
3151 lsb = C4.w[0] & 0x01;
3152 if (is_inexact_lt_midpoint) {
3153 is_inexact_lt_midpoint = 0;
3154 is_inexact_gt_midpoint = 1;
3155 } else if (is_inexact_gt_midpoint) {
3156 is_inexact_gt_midpoint = 0;
3157 is_inexact_lt_midpoint = 1;
3158 } else if (lsb == 0) {
3159 if (is_midpoint_lt_even) {
3160 is_midpoint_lt_even = 0;
3161 is_midpoint_gt_even = 1;
3162 } else if (is_midpoint_gt_even) {
3163 is_midpoint_gt_even = 0;
3164 is_midpoint_lt_even = 1;
3165 } else {
3168 } else if (lsb == 1) {
3169 if (is_midpoint_lt_even) {
3170 // res = res + 1
3171 R256.w[0]++;
3172 if (R256.w[0] == 0x0) {
3173 R256.w[1]++;
3174 if (R256.w[1] == 0x0) {
3175 R256.w[2]++;
3176 if (R256.w[2] == 0x0) {
3177 R256.w[3]++;
3181 // no check for rounding overflow - R256 was a difference
3182 } else if (is_midpoint_gt_even) {
3183 // res = res - 1
3184 R256.w[0]--;
3185 if (R256.w[0] == 0xffffffffffffffffull) {
3186 R256.w[1]--;
3187 if (R256.w[1] == 0xffffffffffffffffull) {
3188 R256.w[2]--;
3189 if (R256.w[2] == 0xffffffffffffffffull) {
3190 R256.w[3]--;
3194 } else {
3197 } else {
3201 // determine the number of decimal digits in R256
3202 ind = nr_digits256 (R256); // ind >= p34
3203 // if R256 is sum, then ind > p34; if R256 is a difference, then
3204 // ind >= p34; this means that we can calculate the result rounded to
3205 // the destination precision, with unbounded exponent, starting from R256
3206 // and using the indicators from the rounding of C3 to avoid a double
3207 // rounding error
3209 if (ind < p34) {
3211 } else if (ind == p34) {
3212 // the result rounded to the destination precision with
3213 // unbounded exponent
3214 // is (-1)^p_sign * R256 * 10^e4
3215 res.w[1] = R256.w[1];
3216 res.w[0] = R256.w[0];
3217 } else { // if (ind > p34)
3218 // if more than P digits, round to nearest to P digits
3219 // round R256 to p34 digits
3220 x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68
3221 // save C3 rounding indicators to help avoid double rounding error
3222 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
3223 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
3224 is_midpoint_lt_even0 = is_midpoint_lt_even;
3225 is_midpoint_gt_even0 = is_midpoint_gt_even;
3226 // initialize rounding indicators
3227 is_inexact_lt_midpoint = 0;
3228 is_inexact_gt_midpoint = 0;
3229 is_midpoint_lt_even = 0;
3230 is_midpoint_gt_even = 0;
3231 // round to p34 digits; the result fits in 113 bits
3232 if (ind <= 38) {
3233 P128.w[1] = R256.w[1];
3234 P128.w[0] = R256.w[0];
3235 round128_19_38 (ind, x0, P128, &R128, &incr_exp,
3236 &is_midpoint_lt_even, &is_midpoint_gt_even,
3237 &is_inexact_lt_midpoint,
3238 &is_inexact_gt_midpoint);
3239 } else if (ind <= 57) {
3240 P192.w[2] = R256.w[2];
3241 P192.w[1] = R256.w[1];
3242 P192.w[0] = R256.w[0];
3243 round192_39_57 (ind, x0, P192, &R192, &incr_exp,
3244 &is_midpoint_lt_even, &is_midpoint_gt_even,
3245 &is_inexact_lt_midpoint,
3246 &is_inexact_gt_midpoint);
3247 R128.w[1] = R192.w[1];
3248 R128.w[0] = R192.w[0];
3249 } else { // if (ind <= 68)
3250 round256_58_76 (ind, x0, R256, &R256, &incr_exp,
3251 &is_midpoint_lt_even, &is_midpoint_gt_even,
3252 &is_inexact_lt_midpoint,
3253 &is_inexact_gt_midpoint);
3254 R128.w[1] = R256.w[1];
3255 R128.w[0] = R256.w[0];
3257 // the rounded result has p34 = 34 digits
3258 e4 = e4 + x0 + incr_exp;
3260 res.w[1] = R128.w[1];
3261 res.w[0] = R128.w[0];
3263 // avoid a double rounding error
3264 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
3265 is_midpoint_lt_even) { // double rounding error upward
3266 // res = res - 1
3267 res.w[0]--;
3268 if (res.w[0] == 0xffffffffffffffffull)
3269 res.w[1]--;
3270 is_midpoint_lt_even = 0;
3271 is_inexact_lt_midpoint = 1;
3272 // Note: a double rounding error upward is not possible; for this
3273 // the result after the first rounding would have to be 99...95
3274 // (35 digits in all), possibly followed by a number of zeros; this
3275 // not possible in Cases (2)-(6) or (15)-(17) which may get here
3276 // if this is 10^33 - 1 make it 10^34 - 1 and decrement exponent
3277 if (res.w[1] == 0x0000314dc6448d93ull &&
3278 res.w[0] == 0x38c15b09ffffffffull) { // 10^33 - 1
3279 res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
3280 res.w[0] = 0x378d8e63ffffffffull;
3281 e4--;
3283 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
3284 is_midpoint_gt_even) { // double rounding error downward
3285 // res = res + 1
3286 res.w[0]++;
3287 if (res.w[0] == 0)
3288 res.w[1]++;
3289 is_midpoint_gt_even = 0;
3290 is_inexact_gt_midpoint = 1;
3291 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
3292 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
3293 // if this second rounding was exact the result may still be
3294 // inexact because of the first rounding
3295 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
3296 is_inexact_gt_midpoint = 1;
3298 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
3299 is_inexact_lt_midpoint = 1;
3301 } else if (is_midpoint_gt_even &&
3302 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
3303 // pulled up to a midpoint
3304 is_inexact_lt_midpoint = 1;
3305 is_inexact_gt_midpoint = 0;
3306 is_midpoint_lt_even = 0;
3307 is_midpoint_gt_even = 0;
3308 } else if (is_midpoint_lt_even &&
3309 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
3310 // pulled down to a midpoint
3311 is_inexact_lt_midpoint = 0;
3312 is_inexact_gt_midpoint = 1;
3313 is_midpoint_lt_even = 0;
3314 is_midpoint_gt_even = 0;
3315 } else {
3320 // determine tininess
3321 if (rnd_mode == ROUNDING_TO_NEAREST) {
3322 if (e4 < expmin) {
3323 is_tiny = 1; // for other rounding modes apply correction
3325 } else {
3326 // for RM, RP, RZ, RA apply correction in order to determine tininess
3327 // but do not save the result; apply the correction to
3328 // (-1)^p_sign * res * 10^0
3329 P128.w[1] = p_sign | 0x3040000000000000ull | res.w[1];
3330 P128.w[0] = res.w[0];
3331 rounding_correction (rnd_mode,
3332 is_inexact_lt_midpoint,
3333 is_inexact_gt_midpoint,
3334 is_midpoint_lt_even, is_midpoint_gt_even,
3335 0, &P128, pfpsf);
3336 scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1
3337 // the number of digits in the significand is p34 = 34
3338 if (e4 + scale < expmin) {
3339 is_tiny = 1;
3343 // the result rounded to the destination precision with unbounded exponent
3344 // is (-1)^p_sign * res * 10^e4
3345 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1]; // RN
3346 // res.w[0] unchanged;
3347 // Note: res is correct only if expmin <= e4 <= expmax
3348 ind = p34; // the number of decimal digits in the signifcand of res
3350 // at this point we have the result rounded with unbounded exponent in
3351 // res and we know its tininess:
3352 // res = (-1)^p_sign * significand * 10^e4,
3353 // where q (significand) = ind = p34
3354 // Note: res is correct only if expmin <= e4 <= expmax
3356 // check for overflow if RN
3357 if (rnd_mode == ROUNDING_TO_NEAREST
3358 && (ind + e4) > (p34 + expmax)) {
3359 res.w[1] = p_sign | 0x7800000000000000ull;
3360 res.w[0] = 0x0000000000000000ull;
3361 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
3362 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3363 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3364 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3365 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3366 BID_SWAP128 (res);
3367 BID_RETURN (res)
3368 } // else not overflow or not RN, so continue
3370 // from this point on this is similar to the last part of the computation
3371 // for Cases (15), (16), (17)
3373 // if (e4 >= expmin) we have the result rounded with bounded exponent
3374 if (e4 < expmin) {
3375 x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res
3376 // where the result rounded [at most] once is
3377 // (-1)^p_sign * significand_res * 10^e4
3379 // avoid double rounding error
3380 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
3381 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
3382 is_midpoint_lt_even0 = is_midpoint_lt_even;
3383 is_midpoint_gt_even0 = is_midpoint_gt_even;
3384 is_inexact_lt_midpoint = 0;
3385 is_inexact_gt_midpoint = 0;
3386 is_midpoint_lt_even = 0;
3387 is_midpoint_gt_even = 0;
3389 if (x0 > ind) {
3390 // nothing is left of res when moving the decimal point left x0 digits
3391 is_inexact_lt_midpoint = 1;
3392 res.w[1] = p_sign | 0x0000000000000000ull;
3393 res.w[0] = 0x0000000000000000ull;
3394 e4 = expmin;
3395 } else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34
3396 // this is <, =, or > 1/2 ulp
3397 // compare the ind-digit value in the significand of res with
3398 // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is
3399 // less than, equal to, or greater than 1/2 ulp (significand of res)
3400 R128.w[1] = res.w[1] & MASK_COEFF;
3401 R128.w[0] = res.w[0];
3402 if (ind <= 19) {
3403 if (R128.w[0] < midpoint64[ind - 1]) { // < 1/2 ulp
3404 lt_half_ulp = 1;
3405 is_inexact_lt_midpoint = 1;
3406 } else if (R128.w[0] == midpoint64[ind - 1]) { // = 1/2 ulp
3407 eq_half_ulp = 1;
3408 is_midpoint_gt_even = 1;
3409 } else { // > 1/2 ulp
3410 gt_half_ulp = 1;
3411 is_inexact_gt_midpoint = 1;
3413 } else { // if (ind <= 38)
3414 if (R128.w[1] < midpoint128[ind - 20].w[1] ||
3415 (R128.w[1] == midpoint128[ind - 20].w[1] &&
3416 R128.w[0] < midpoint128[ind - 20].w[0])) { // < 1/2 ulp
3417 lt_half_ulp = 1;
3418 is_inexact_lt_midpoint = 1;
3419 } else if (R128.w[1] == midpoint128[ind - 20].w[1] &&
3420 R128.w[0] == midpoint128[ind - 20].w[0]) { // = 1/2 ulp
3421 eq_half_ulp = 1;
3422 is_midpoint_gt_even = 1;
3423 } else { // > 1/2 ulp
3424 gt_half_ulp = 1;
3425 is_inexact_gt_midpoint = 1;
3428 if (lt_half_ulp || eq_half_ulp) {
3429 // res = +0.0 * 10^expmin
3430 res.w[1] = 0x0000000000000000ull;
3431 res.w[0] = 0x0000000000000000ull;
3432 } else { // if (gt_half_ulp)
3433 // res = +1 * 10^expmin
3434 res.w[1] = 0x0000000000000000ull;
3435 res.w[0] = 0x0000000000000001ull;
3437 res.w[1] = p_sign | res.w[1];
3438 e4 = expmin;
3439 } else { // if (1 <= x0 <= ind - 1 <= 33)
3440 // round the ind-digit result to ind - x0 digits
3442 if (ind <= 18) { // 2 <= ind <= 18
3443 round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
3444 &is_midpoint_lt_even, &is_midpoint_gt_even,
3445 &is_inexact_lt_midpoint,
3446 &is_inexact_gt_midpoint);
3447 res.w[1] = 0x0;
3448 res.w[0] = R64;
3449 } else if (ind <= 38) {
3450 P128.w[1] = res.w[1] & MASK_COEFF;
3451 P128.w[0] = res.w[0];
3452 round128_19_38 (ind, x0, P128, &res, &incr_exp,
3453 &is_midpoint_lt_even, &is_midpoint_gt_even,
3454 &is_inexact_lt_midpoint,
3455 &is_inexact_gt_midpoint);
3457 e4 = e4 + x0; // expmin
3458 // we want the exponent to be expmin, so if incr_exp = 1 then
3459 // multiply the rounded result by 10 - it will still fit in 113 bits
3460 if (incr_exp) {
3461 // 64 x 128 -> 128
3462 P128.w[1] = res.w[1] & MASK_COEFF;
3463 P128.w[0] = res.w[0];
3464 __mul_64x128_to_128 (res, ten2k64[1], P128);
3466 res.w[1] =
3467 p_sign | ((UINT64) (e4 + 6176) << 49) | (res.
3468 w[1] & MASK_COEFF);
3469 // avoid a double rounding error
3470 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
3471 is_midpoint_lt_even) { // double rounding error upward
3472 // res = res - 1
3473 res.w[0]--;
3474 if (res.w[0] == 0xffffffffffffffffull)
3475 res.w[1]--;
3476 // Note: a double rounding error upward is not possible; for this
3477 // the result after the first rounding would have to be 99...95
3478 // (35 digits in all), possibly followed by a number of zeros; this
3479 // not possible in this underflow case
3480 is_midpoint_lt_even = 0;
3481 is_inexact_lt_midpoint = 1;
3482 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
3483 is_midpoint_gt_even) { // double rounding error downward
3484 // res = res + 1
3485 res.w[0]++;
3486 if (res.w[0] == 0)
3487 res.w[1]++;
3488 is_midpoint_gt_even = 0;
3489 is_inexact_gt_midpoint = 1;
3490 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
3491 !is_inexact_lt_midpoint
3492 && !is_inexact_gt_midpoint) {
3493 // if this second rounding was exact the result may still be
3494 // inexact because of the first rounding
3495 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
3496 is_inexact_gt_midpoint = 1;
3498 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
3499 is_inexact_lt_midpoint = 1;
3501 } else if (is_midpoint_gt_even &&
3502 (is_inexact_gt_midpoint0
3503 || is_midpoint_lt_even0)) {
3504 // pulled up to a midpoint
3505 is_inexact_lt_midpoint = 1;
3506 is_inexact_gt_midpoint = 0;
3507 is_midpoint_lt_even = 0;
3508 is_midpoint_gt_even = 0;
3509 } else if (is_midpoint_lt_even &&
3510 (is_inexact_lt_midpoint0
3511 || is_midpoint_gt_even0)) {
3512 // pulled down to a midpoint
3513 is_inexact_lt_midpoint = 0;
3514 is_inexact_gt_midpoint = 1;
3515 is_midpoint_lt_even = 0;
3516 is_midpoint_gt_even = 0;
3517 } else {
3522 // res contains the correct result
3523 // apply correction if not rounding to nearest
3524 if (rnd_mode != ROUNDING_TO_NEAREST) {
3525 rounding_correction (rnd_mode,
3526 is_inexact_lt_midpoint,
3527 is_inexact_gt_midpoint,
3528 is_midpoint_lt_even, is_midpoint_gt_even,
3529 e4, &res, pfpsf);
3531 if (is_midpoint_lt_even || is_midpoint_gt_even ||
3532 is_inexact_lt_midpoint || is_inexact_gt_midpoint) {
3533 // set the inexact flag
3534 *pfpsf |= INEXACT_EXCEPTION;
3535 if (is_tiny)
3536 *pfpsf |= UNDERFLOW_EXCEPTION;
3538 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3539 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3540 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3541 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3542 BID_SWAP128 (res);
3543 BID_RETURN (res)
3545 } else if ((p34 <= delta && delta + q3 <= q4) || // Case (15)
3546 (delta < p34 && p34 < delta + q3 && delta + q3 <= q4) || //Case (16)
3547 (delta + q3 <= p34 && p34 < q4)) { // Case (17)
3549 // calculate first the result rounded to the destination precision, with
3550 // unbounded exponent
3552 add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4,
3553 rnd_mode, &is_midpoint_lt_even,
3554 &is_midpoint_gt_even, &is_inexact_lt_midpoint,
3555 &is_inexact_gt_midpoint, pfpsf, &res);
3556 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3557 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3558 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3559 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3560 BID_SWAP128 (res);
3561 BID_RETURN (res)
3563 } else {
3567 } // end if delta < 0
3569 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3570 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3571 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3572 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3573 BID_SWAP128 (res);
3574 BID_RETURN (res)
3579 #if DECIMAL_CALL_BY_REFERENCE
3580 void
3581 bid128_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT128 * pz
3582 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3583 _EXC_INFO_PARAM) {
3584 UINT128 x = *px, y = *py, z = *pz;
3585 #if !DECIMAL_GLOBAL_ROUNDING
3586 unsigned int rnd_mode = *prnd_mode;
3587 #endif
3588 #else
3589 UINT128
3590 bid128_fma (UINT128 x, UINT128 y, UINT128 z
3591 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3592 _EXC_INFO_PARAM) {
3593 #endif
3594 int is_midpoint_lt_even, is_midpoint_gt_even,
3595 is_inexact_lt_midpoint, is_inexact_gt_midpoint;
3596 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3598 #if DECIMAL_CALL_BY_REFERENCE
3599 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3600 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3601 &res, &x, &y, &z
3602 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3603 _EXC_INFO_ARG);
3604 #else
3605 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3606 &is_inexact_lt_midpoint,
3607 &is_inexact_gt_midpoint, x, y,
3608 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3609 _EXC_INFO_ARG);
3610 #endif
3611 BID_RETURN (res);
3615 #if DECIMAL_CALL_BY_REFERENCE
3616 void
3617 bid128ddd_fma (UINT128 * pres, UINT64 * px, UINT64 * py, UINT64 * pz
3618 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3619 _EXC_INFO_PARAM) {
3620 UINT64 x = *px, y = *py, z = *pz;
3621 #if !DECIMAL_GLOBAL_ROUNDING
3622 unsigned int rnd_mode = *prnd_mode;
3623 #endif
3624 #else
3625 UINT128
3626 bid128ddd_fma (UINT64 x, UINT64 y, UINT64 z
3627 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3628 _EXC_INFO_PARAM) {
3629 #endif
3630 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3631 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3632 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3633 UINT128 x1, y1, z1;
3635 #if DECIMAL_CALL_BY_REFERENCE
3636 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3637 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3638 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3639 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3640 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3641 &res, &x1, &y1, &z1
3642 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3643 _EXC_INFO_ARG);
3644 #else
3645 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3646 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3647 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3648 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3649 &is_inexact_lt_midpoint,
3650 &is_inexact_gt_midpoint, x1, y1,
3651 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3652 _EXC_INFO_ARG);
3653 #endif
3654 BID_RETURN (res);
3658 #if DECIMAL_CALL_BY_REFERENCE
3659 void
3660 bid128ddq_fma (UINT128 * pres, UINT64 * px, UINT64 * py, UINT128 * pz
3661 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3662 _EXC_INFO_PARAM) {
3663 UINT64 x = *px, y = *py;
3664 UINT128 z = *pz;
3665 #if !DECIMAL_GLOBAL_ROUNDING
3666 unsigned int rnd_mode = *prnd_mode;
3667 #endif
3668 #else
3669 UINT128
3670 bid128ddq_fma (UINT64 x, UINT64 y, UINT128 z
3671 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3672 _EXC_INFO_PARAM) {
3673 #endif
3674 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3675 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3676 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3677 UINT128 x1, y1;
3679 #if DECIMAL_CALL_BY_REFERENCE
3680 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3681 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3682 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3683 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3684 &res, &x1, &y1, &z
3685 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3686 _EXC_INFO_ARG);
3687 #else
3688 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3689 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3690 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3691 &is_inexact_lt_midpoint,
3692 &is_inexact_gt_midpoint, x1, y1,
3693 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3694 _EXC_INFO_ARG);
3695 #endif
3696 BID_RETURN (res);
3700 #if DECIMAL_CALL_BY_REFERENCE
3701 void
3702 bid128dqd_fma (UINT128 * pres, UINT64 * px, UINT128 * py, UINT64 * pz
3703 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3704 _EXC_INFO_PARAM) {
3705 UINT64 x = *px, z = *pz;
3706 #if !DECIMAL_GLOBAL_ROUNDING
3707 unsigned int rnd_mode = *prnd_mode;
3708 #endif
3709 #else
3710 UINT128
3711 bid128dqd_fma (UINT64 x, UINT128 y, UINT64 z
3712 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3713 _EXC_INFO_PARAM) {
3714 #endif
3715 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3716 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3717 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3718 UINT128 x1, z1;
3720 #if DECIMAL_CALL_BY_REFERENCE
3721 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3722 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3723 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3724 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3725 &res, &x1, py, &z1
3726 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3727 _EXC_INFO_ARG);
3728 #else
3729 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3730 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3731 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3732 &is_inexact_lt_midpoint,
3733 &is_inexact_gt_midpoint, x1, y,
3734 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3735 _EXC_INFO_ARG);
3736 #endif
3737 BID_RETURN (res);
3741 #if DECIMAL_CALL_BY_REFERENCE
3742 void
3743 bid128dqq_fma (UINT128 * pres, UINT64 * px, UINT128 * py, UINT128 * pz
3744 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3745 _EXC_INFO_PARAM) {
3746 UINT64 x = *px;
3747 #if !DECIMAL_GLOBAL_ROUNDING
3748 unsigned int rnd_mode = *prnd_mode;
3749 #endif
3750 #else
3751 UINT128
3752 bid128dqq_fma (UINT64 x, UINT128 y, UINT128 z
3753 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3754 _EXC_INFO_PARAM) {
3755 #endif
3756 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3757 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3758 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3759 UINT128 x1;
3761 #if DECIMAL_CALL_BY_REFERENCE
3762 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3763 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3764 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3765 &res, &x1, py, pz
3766 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3767 _EXC_INFO_ARG);
3768 #else
3769 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3770 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3771 &is_inexact_lt_midpoint,
3772 &is_inexact_gt_midpoint, x1, y,
3773 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3774 _EXC_INFO_ARG);
3775 #endif
3776 BID_RETURN (res);
3780 #if DECIMAL_CALL_BY_REFERENCE
3781 void
3782 bid128qdd_fma (UINT128 * pres, UINT128 * px, UINT64 * py, UINT64 * pz
3783 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3784 _EXC_INFO_PARAM) {
3785 UINT64 y = *py, z = *pz;
3786 #if !DECIMAL_GLOBAL_ROUNDING
3787 unsigned int rnd_mode = *prnd_mode;
3788 #endif
3789 #else
3790 UINT128
3791 bid128qdd_fma (UINT128 x, UINT64 y, UINT64 z
3792 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3793 _EXC_INFO_PARAM) {
3794 #endif
3795 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3796 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3797 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3798 UINT128 y1, z1;
3800 #if DECIMAL_CALL_BY_REFERENCE
3801 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3802 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3803 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3804 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3805 &res, px, &y1, &z1
3806 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3807 _EXC_INFO_ARG);
3808 #else
3809 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3810 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3811 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3812 &is_inexact_lt_midpoint,
3813 &is_inexact_gt_midpoint, x, y1,
3814 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3815 _EXC_INFO_ARG);
3816 #endif
3817 BID_RETURN (res);
3821 #if DECIMAL_CALL_BY_REFERENCE
3822 void
3823 bid128qdq_fma (UINT128 * pres, UINT128 * px, UINT64 * py, UINT128 * pz
3824 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3825 _EXC_INFO_PARAM) {
3826 UINT64 y = *py;
3827 #if !DECIMAL_GLOBAL_ROUNDING
3828 unsigned int rnd_mode = *prnd_mode;
3829 #endif
3830 #else
3831 UINT128
3832 bid128qdq_fma (UINT128 x, UINT64 y, UINT128 z
3833 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3834 _EXC_INFO_PARAM) {
3835 #endif
3836 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3837 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3838 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3839 UINT128 y1;
3841 #if DECIMAL_CALL_BY_REFERENCE
3842 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3843 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3844 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3845 &res, px, &y1, pz
3846 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3847 _EXC_INFO_ARG);
3848 #else
3849 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3850 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3851 &is_inexact_lt_midpoint,
3852 &is_inexact_gt_midpoint, x, y1,
3853 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3854 _EXC_INFO_ARG);
3855 #endif
3856 BID_RETURN (res);
3860 #if DECIMAL_CALL_BY_REFERENCE
3861 void
3862 bid128qqd_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT64 * pz
3863 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3864 _EXC_INFO_PARAM) {
3865 UINT64 z = *pz;
3866 #if !DECIMAL_GLOBAL_ROUNDING
3867 unsigned int rnd_mode = *prnd_mode;
3868 #endif
3869 #else
3870 UINT128
3871 bid128qqd_fma (UINT128 x, UINT128 y, UINT64 z
3872 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3873 _EXC_INFO_PARAM) {
3874 #endif
3875 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3876 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3877 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3878 UINT128 z1;
3880 #if DECIMAL_CALL_BY_REFERENCE
3881 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3882 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3883 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3884 &res, px, py, &z1
3885 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3886 _EXC_INFO_ARG);
3887 #else
3888 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3889 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3890 &is_inexact_lt_midpoint,
3891 &is_inexact_gt_midpoint, x, y,
3892 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3893 _EXC_INFO_ARG);
3894 #endif
3895 BID_RETURN (res);
3898 // Note: bid128qqq_fma is represented by bid128_fma
3900 // Note: bid64ddd_fma is represented by bid64_fma
3902 #if DECIMAL_CALL_BY_REFERENCE
3903 void
3904 bid64ddq_fma (UINT64 * pres, UINT64 * px, UINT64 * py, UINT128 * pz
3905 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3906 _EXC_INFO_PARAM) {
3907 UINT64 x = *px, y = *py;
3908 #if !DECIMAL_GLOBAL_ROUNDING
3909 unsigned int rnd_mode = *prnd_mode;
3910 #endif
3911 #else
3912 UINT64
3913 bid64ddq_fma (UINT64 x, UINT64 y, UINT128 z
3914 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3915 _EXC_INFO_PARAM) {
3916 #endif
3917 UINT64 res1 = 0xbaddbaddbaddbaddull;
3918 UINT128 x1, y1;
3920 #if DECIMAL_CALL_BY_REFERENCE
3921 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3922 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3923 bid64qqq_fma (&res1, &x1, &y1, pz
3924 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3925 _EXC_INFO_ARG);
3926 #else
3927 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3928 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3929 res1 = bid64qqq_fma (x1, y1, z
3930 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3931 _EXC_INFO_ARG);
3932 #endif
3933 BID_RETURN (res1);
3937 #if DECIMAL_CALL_BY_REFERENCE
3938 void
3939 bid64dqd_fma (UINT64 * pres, UINT64 * px, UINT128 * py, UINT64 * pz
3940 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3941 _EXC_INFO_PARAM) {
3942 UINT64 x = *px, z = *pz;
3943 #if !DECIMAL_GLOBAL_ROUNDING
3944 unsigned int rnd_mode = *prnd_mode;
3945 #endif
3946 #else
3947 UINT64
3948 bid64dqd_fma (UINT64 x, UINT128 y, UINT64 z
3949 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3950 _EXC_INFO_PARAM) {
3951 #endif
3952 UINT64 res1 = 0xbaddbaddbaddbaddull;
3953 UINT128 x1, z1;
3955 #if DECIMAL_CALL_BY_REFERENCE
3956 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3957 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3958 bid64qqq_fma (&res1, &x1, py, &z1
3959 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3960 _EXC_INFO_ARG);
3961 #else
3962 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3963 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3964 res1 = bid64qqq_fma (x1, y, z1
3965 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3966 _EXC_INFO_ARG);
3967 #endif
3968 BID_RETURN (res1);
3972 #if DECIMAL_CALL_BY_REFERENCE
3973 void
3974 bid64dqq_fma (UINT64 * pres, UINT64 * px, UINT128 * py, UINT128 * pz
3975 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3976 _EXC_INFO_PARAM) {
3977 UINT64 x = *px;
3978 #if !DECIMAL_GLOBAL_ROUNDING
3979 unsigned int rnd_mode = *prnd_mode;
3980 #endif
3981 #else
3982 UINT64
3983 bid64dqq_fma (UINT64 x, UINT128 y, UINT128 z
3984 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3985 _EXC_INFO_PARAM) {
3986 #endif
3987 UINT64 res1 = 0xbaddbaddbaddbaddull;
3988 UINT128 x1;
3990 #if DECIMAL_CALL_BY_REFERENCE
3991 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3992 bid64qqq_fma (&res1, &x1, py, pz
3993 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3994 _EXC_INFO_ARG);
3995 #else
3996 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3997 res1 = bid64qqq_fma (x1, y, z
3998 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3999 _EXC_INFO_ARG);
4000 #endif
4001 BID_RETURN (res1);
4005 #if DECIMAL_CALL_BY_REFERENCE
4006 void
4007 bid64qdd_fma (UINT64 * pres, UINT128 * px, UINT64 * py, UINT64 * pz
4008 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4009 _EXC_INFO_PARAM) {
4010 UINT64 y = *py, z = *pz;
4011 #if !DECIMAL_GLOBAL_ROUNDING
4012 unsigned int rnd_mode = *prnd_mode;
4013 #endif
4014 #else
4015 UINT64
4016 bid64qdd_fma (UINT128 x, UINT64 y, UINT64 z
4017 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4018 _EXC_INFO_PARAM) {
4019 #endif
4020 UINT64 res1 = 0xbaddbaddbaddbaddull;
4021 UINT128 y1, z1;
4023 #if DECIMAL_CALL_BY_REFERENCE
4024 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4025 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4026 bid64qqq_fma (&res1, px, &y1, &z1
4027 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4028 _EXC_INFO_ARG);
4029 #else
4030 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4031 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4032 res1 = bid64qqq_fma (x, y1, z1
4033 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4034 _EXC_INFO_ARG);
4035 #endif
4036 BID_RETURN (res1);
4040 #if DECIMAL_CALL_BY_REFERENCE
4041 void
4042 bid64qdq_fma (UINT64 * pres, UINT128 * px, UINT64 * py, UINT128 * pz
4043 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4044 _EXC_INFO_PARAM) {
4045 UINT64 y = *py;
4046 #if !DECIMAL_GLOBAL_ROUNDING
4047 unsigned int rnd_mode = *prnd_mode;
4048 #endif
4049 #else
4050 UINT64
4051 bid64qdq_fma (UINT128 x, UINT64 y, UINT128 z
4052 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4053 _EXC_INFO_PARAM) {
4054 #endif
4055 UINT64 res1 = 0xbaddbaddbaddbaddull;
4056 UINT128 y1;
4058 #if DECIMAL_CALL_BY_REFERENCE
4059 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4060 bid64qqq_fma (&res1, px, &y1, pz
4061 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4062 _EXC_INFO_ARG);
4063 #else
4064 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4065 res1 = bid64qqq_fma (x, y1, z
4066 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4067 _EXC_INFO_ARG);
4068 #endif
4069 BID_RETURN (res1);
4073 #if DECIMAL_CALL_BY_REFERENCE
4074 void
4075 bid64qqd_fma (UINT64 * pres, UINT128 * px, UINT128 * py, UINT64 * pz
4076 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4077 _EXC_INFO_PARAM) {
4078 UINT64 z = *pz;
4079 #if !DECIMAL_GLOBAL_ROUNDING
4080 unsigned int rnd_mode = *prnd_mode;
4081 #endif
4082 #else
4083 UINT64
4084 bid64qqd_fma (UINT128 x, UINT128 y, UINT64 z
4085 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4086 _EXC_INFO_PARAM) {
4087 #endif
4088 UINT64 res1 = 0xbaddbaddbaddbaddull;
4089 UINT128 z1;
4091 #if DECIMAL_CALL_BY_REFERENCE
4092 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4093 bid64qqq_fma (&res1, px, py, &z1
4094 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4095 _EXC_INFO_ARG);
4096 #else
4097 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4098 res1 = bid64qqq_fma (x, y, z1
4099 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4100 _EXC_INFO_ARG);
4101 #endif
4102 BID_RETURN (res1);
4106 #if DECIMAL_CALL_BY_REFERENCE
4107 void
4108 bid64qqq_fma (UINT64 * pres, UINT128 * px, UINT128 * py, UINT128 * pz
4109 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4110 _EXC_INFO_PARAM) {
4111 UINT128 x = *px, y = *py, z = *pz;
4112 #if !DECIMAL_GLOBAL_ROUNDING
4113 unsigned int rnd_mode = *prnd_mode;
4114 #endif
4115 #else
4116 UINT64
4117 bid64qqq_fma (UINT128 x, UINT128 y, UINT128 z
4118 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4119 _EXC_INFO_PARAM) {
4120 #endif
4121 int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0,
4122 is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0;
4123 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
4124 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
4125 int incr_exp;
4126 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
4127 UINT128 res128 = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
4128 UINT64 res1 = 0xbaddbaddbaddbaddull;
4129 unsigned int save_fpsf; // needed because of the call to bid128_ext_fma
4130 UINT64 sign;
4131 UINT64 exp;
4132 int unbexp;
4133 UINT128 C;
4134 BID_UI64DOUBLE tmp;
4135 int nr_bits;
4136 int q, x0;
4137 int scale;
4138 int lt_half_ulp = 0, eq_half_ulp = 0;
4140 // Note: for rounding modes other than RN or RA, the result can be obtained
4141 // by rounding first to BID128 and then to BID64
4143 save_fpsf = *pfpsf; // sticky bits - caller value must be preserved
4144 *pfpsf = 0;
4146 #if DECIMAL_CALL_BY_REFERENCE
4147 bid128_ext_fma (&is_midpoint_lt_even0, &is_midpoint_gt_even0,
4148 &is_inexact_lt_midpoint0, &is_inexact_gt_midpoint0,
4149 &res, &x, &y, &z
4150 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4151 _EXC_INFO_ARG);
4152 #else
4153 res = bid128_ext_fma (&is_midpoint_lt_even0, &is_midpoint_gt_even0,
4154 &is_inexact_lt_midpoint0,
4155 &is_inexact_gt_midpoint0, x, y,
4156 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4157 _EXC_INFO_ARG);
4158 #endif
4160 if ((rnd_mode == ROUNDING_DOWN) || (rnd_mode == ROUNDING_UP) ||
4161 (rnd_mode == ROUNDING_TO_ZERO) || // no double rounding error is possible
4162 ((res.w[HIGH_128W] & MASK_NAN) == MASK_NAN) || //res=QNaN (cannot be SNaN)
4163 ((res.w[HIGH_128W] & MASK_ANY_INF) == MASK_INF)) { // result is infinity
4164 #if DECIMAL_CALL_BY_REFERENCE
4165 bid128_to_bid64 (&res1, &res _RND_MODE_ARG _EXC_FLAGS_ARG);
4166 #else
4167 res1 = bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG);
4168 #endif
4169 // determine the unbiased exponent of the result
4170 unbexp = ((res1 >> 53) & 0x3ff) - 398;
4172 // if subnormal, res1 must have exp = -398
4173 // if tiny and inexact set underflow and inexact status flags
4174 if (!((res1 & MASK_NAN) == MASK_NAN) && // res1 not NaN
4175 (unbexp == -398)
4176 && ((res1 & MASK_BINARY_SIG1) < 1000000000000000ull)
4177 && (is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0
4178 || is_midpoint_lt_even0 || is_midpoint_gt_even0)) {
4179 // set the inexact flag and the underflow flag
4180 *pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION);
4181 } else if (is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 ||
4182 is_midpoint_lt_even0 || is_midpoint_gt_even0) {
4183 // set the inexact flag and the underflow flag
4184 *pfpsf |= INEXACT_EXCEPTION;
4187 *pfpsf |= save_fpsf;
4188 BID_RETURN (res1);
4189 } // else continue, and use rounding to nearest to round to 16 digits
4191 // at this point the result is rounded to nearest (even or away) to 34 digits
4192 // (or less if exact), and it is zero or finite non-zero canonical [sub]normal
4193 sign = res.w[HIGH_128W] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
4194 exp = res.w[HIGH_128W] & MASK_EXP; // biased and shifted left 49 bits
4195 unbexp = (exp >> 49) - 6176;
4196 C.w[1] = res.w[HIGH_128W] & MASK_COEFF;
4197 C.w[0] = res.w[LOW_128W];
4199 if ((C.w[1] == 0x0 && C.w[0] == 0x0) || // result is zero
4200 (unbexp <= (-398 - 35)) || (unbexp >= (369 + 16))) {
4201 // clear under/overflow
4202 #if DECIMAL_CALL_BY_REFERENCE
4203 bid128_to_bid64 (&res1, &res _RND_MODE_ARG _EXC_FLAGS_ARG);
4204 #else
4205 res1 = bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG);
4206 #endif
4207 *pfpsf |= save_fpsf;
4208 BID_RETURN (res1);
4209 } // else continue
4211 // -398 - 34 <= unbexp <= 369 + 15
4212 if (rnd_mode == ROUNDING_TIES_AWAY) {
4213 // apply correction, if needed, to make the result rounded to nearest-even
4214 if (is_midpoint_gt_even) {
4215 // res = res - 1
4216 res1--; // res1 is now even
4217 } // else the result is already correctly rounded to nearest-even
4219 // at this point the result is finite, non-zero canonical normal or subnormal,
4220 // and in most cases overflow or underflow will not occur
4222 // determine the number of digits q in the result
4223 // q = nr. of decimal digits in x
4224 // determine first the nr. of bits in x
4225 if (C.w[1] == 0) {
4226 if (C.w[0] >= 0x0020000000000000ull) { // x >= 2^53
4227 // split the 64-bit value in two 32-bit halves to avoid rounding errors
4228 if (C.w[0] >= 0x0000000100000000ull) { // x >= 2^32
4229 tmp.d = (double) (C.w[0] >> 32); // exact conversion
4230 nr_bits =
4231 33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
4232 } else { // x < 2^32
4233 tmp.d = (double) (C.w[0]); // exact conversion
4234 nr_bits =
4235 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
4237 } else { // if x < 2^53
4238 tmp.d = (double) C.w[0]; // exact conversion
4239 nr_bits =
4240 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
4242 } else { // C.w[1] != 0 => nr. bits = 64 + nr_bits (C.w[1])
4243 tmp.d = (double) C.w[1]; // exact conversion
4244 nr_bits =
4245 65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
4247 q = nr_digits[nr_bits - 1].digits;
4248 if (q == 0) {
4249 q = nr_digits[nr_bits - 1].digits1;
4250 if (C.w[1] > nr_digits[nr_bits - 1].threshold_hi ||
4251 (C.w[1] == nr_digits[nr_bits - 1].threshold_hi &&
4252 C.w[0] >= nr_digits[nr_bits - 1].threshold_lo))
4253 q++;
4255 // if q > 16, round to nearest even to 16 digits (but for underflow it may
4256 // have to be truncated even more)
4257 if (q > 16) {
4258 x0 = q - 16;
4259 if (q <= 18) {
4260 round64_2_18 (q, x0, C.w[0], &res1, &incr_exp,
4261 &is_midpoint_lt_even, &is_midpoint_gt_even,
4262 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
4263 } else { // 19 <= q <= 34
4264 round128_19_38 (q, x0, C, &res128, &incr_exp,
4265 &is_midpoint_lt_even, &is_midpoint_gt_even,
4266 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
4267 res1 = res128.w[0]; // the result fits in 64 bits
4269 unbexp = unbexp + x0;
4270 if (incr_exp)
4271 unbexp++;
4272 q = 16; // need to set in case denormalization is necessary
4273 } else {
4274 // the result does not require a second rounding (and it must have
4275 // been exact in the first rounding, since q <= 16)
4276 res1 = C.w[0];
4279 // avoid a double rounding error
4280 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
4281 is_midpoint_lt_even) { // double rounding error upward
4282 // res = res - 1
4283 res1--; // res1 becomes odd
4284 is_midpoint_lt_even = 0;
4285 is_inexact_lt_midpoint = 1;
4286 if (res1 == 0x00038d7ea4c67fffull) { // 10^15 - 1
4287 res1 = 0x002386f26fc0ffffull; // 10^16 - 1
4288 unbexp--;
4290 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
4291 is_midpoint_gt_even) { // double rounding error downward
4292 // res = res + 1
4293 res1++; // res1 becomes odd (so it cannot be 10^16)
4294 is_midpoint_gt_even = 0;
4295 is_inexact_gt_midpoint = 1;
4296 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
4297 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
4298 // if this second rounding was exact the result may still be
4299 // inexact because of the first rounding
4300 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
4301 is_inexact_gt_midpoint = 1;
4303 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
4304 is_inexact_lt_midpoint = 1;
4306 } else if (is_midpoint_gt_even &&
4307 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
4308 // pulled up to a midpoint
4309 is_inexact_lt_midpoint = 1;
4310 is_inexact_gt_midpoint = 0;
4311 is_midpoint_lt_even = 0;
4312 is_midpoint_gt_even = 0;
4313 } else if (is_midpoint_lt_even &&
4314 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
4315 // pulled down to a midpoint
4316 is_inexact_lt_midpoint = 0;
4317 is_inexact_gt_midpoint = 1;
4318 is_midpoint_lt_even = 0;
4319 is_midpoint_gt_even = 0;
4320 } else {
4323 // this is the result rounded correctly to nearest even, with unbounded exp.
4325 // check for overflow
4326 if (q + unbexp > P16 + expmax16) {
4327 res1 = sign | 0x7800000000000000ull;
4328 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
4329 *pfpsf |= save_fpsf;
4330 BID_RETURN (res1)
4331 } else if (unbexp > expmax16) { // q + unbexp <= P16 + expmax16
4332 // not overflow; the result must be exact, and we can multiply res1 by
4333 // 10^(unbexp - expmax16) and the product will fit in 16 decimal digits
4334 scale = unbexp - expmax16;
4335 res1 = res1 * ten2k64[scale]; // res1 * 10^scale
4336 unbexp = expmax16; // unbexp - scale
4337 } else {
4338 ; // continue
4341 // check for underflow
4342 if (q + unbexp < P16 + expmin16) {
4343 if (unbexp < expmin16) {
4344 // we must truncate more of res
4345 x0 = expmin16 - unbexp; // x0 >= 1
4346 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
4347 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
4348 is_midpoint_lt_even0 = is_midpoint_lt_even;
4349 is_midpoint_gt_even0 = is_midpoint_gt_even;
4350 is_inexact_lt_midpoint = 0;
4351 is_inexact_gt_midpoint = 0;
4352 is_midpoint_lt_even = 0;
4353 is_midpoint_gt_even = 0;
4354 // the number of decimal digits in res1 is q
4355 if (x0 < q) { // 1 <= x0 <= q-1 => round res to q - x0 digits
4356 // 2 <= q <= 16, 1 <= x0 <= 15
4357 round64_2_18 (q, x0, res1, &res1, &incr_exp,
4358 &is_midpoint_lt_even, &is_midpoint_gt_even,
4359 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
4360 if (incr_exp) {
4361 // res1 = 10^(q-x0), 1 <= q - x0 <= q - 1, 1 <= q - x0 <= 15
4362 res1 = ten2k64[q - x0];
4364 unbexp = unbexp + x0; // expmin16
4365 } else if (x0 == q) {
4366 // the second rounding is for 0.d(0)d(1)...d(q-1) * 10^emin
4367 // determine relationship with 1/2 ulp
4368 // q <= 16
4369 if (res1 < midpoint64[q - 1]) { // < 1/2 ulp
4370 lt_half_ulp = 1;
4371 is_inexact_lt_midpoint = 1;
4372 } else if (res1 == midpoint64[q - 1]) { // = 1/2 ulp
4373 eq_half_ulp = 1;
4374 is_midpoint_gt_even = 1;
4375 } else { // > 1/2 ulp
4376 // gt_half_ulp = 1;
4377 is_inexact_gt_midpoint = 1;
4379 if (lt_half_ulp || eq_half_ulp) {
4380 // res = +0.0 * 10^expmin16
4381 res1 = 0x0000000000000000ull;
4382 } else { // if (gt_half_ulp)
4383 // res = +1 * 10^expmin16
4384 res1 = 0x0000000000000001ull;
4386 unbexp = expmin16;
4387 } else { // if (x0 > q)
4388 // the second rounding is for 0.0...d(0)d(1)...d(q-1) * 10^emin
4389 res1 = 0x0000000000000000ull;
4390 unbexp = expmin16;
4391 is_inexact_lt_midpoint = 1;
4393 // avoid a double rounding error
4394 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
4395 is_midpoint_lt_even) { // double rounding error upward
4396 // res = res - 1
4397 res1--; // res1 becomes odd
4398 is_midpoint_lt_even = 0;
4399 is_inexact_lt_midpoint = 1;
4400 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
4401 is_midpoint_gt_even) { // double rounding error downward
4402 // res = res + 1
4403 res1++; // res1 becomes odd
4404 is_midpoint_gt_even = 0;
4405 is_inexact_gt_midpoint = 1;
4406 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
4407 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
4408 // if this rounding was exact the result may still be
4409 // inexact because of the previous roundings
4410 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
4411 is_inexact_gt_midpoint = 1;
4413 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
4414 is_inexact_lt_midpoint = 1;
4416 } else if (is_midpoint_gt_even &&
4417 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
4418 // pulled up to a midpoint
4419 is_inexact_lt_midpoint = 1;
4420 is_inexact_gt_midpoint = 0;
4421 is_midpoint_lt_even = 0;
4422 is_midpoint_gt_even = 0;
4423 } else if (is_midpoint_lt_even &&
4424 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
4425 // pulled down to a midpoint
4426 is_inexact_lt_midpoint = 0;
4427 is_inexact_gt_midpoint = 1;
4428 is_midpoint_lt_even = 0;
4429 is_midpoint_gt_even = 0;
4430 } else {
4434 // else if unbexp >= emin then q < P (because q + unbexp < P16 + expmin16)
4435 // and the result is tiny and exact
4437 // check for inexact result
4438 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
4439 is_midpoint_lt_even || is_midpoint_gt_even ||
4440 is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 ||
4441 is_midpoint_lt_even0 || is_midpoint_gt_even0) {
4442 // set the inexact flag and the underflow flag
4443 *pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION);
4445 } else if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
4446 is_midpoint_lt_even || is_midpoint_gt_even) {
4447 *pfpsf |= INEXACT_EXCEPTION;
4449 // this is the result rounded correctly to nearest, with bounded exponent
4451 if (rnd_mode == ROUNDING_TIES_AWAY && is_midpoint_gt_even) { // correction
4452 // res = res + 1
4453 res1++; // res1 is now odd
4454 } // else the result is already correct
4456 // assemble the result
4457 if (res1 < 0x0020000000000000ull) { // res < 2^53
4458 res1 = sign | ((UINT64) (unbexp + 398) << 53) | res1;
4459 } else { // res1 >= 2^53
4460 res1 = sign | MASK_STEERING_BITS |
4461 ((UINT64) (unbexp + 398) << 51) | (res1 & MASK_BINARY_SIG2);
4463 *pfpsf |= save_fpsf;
4464 BID_RETURN (res1);