1 /* Copyright (C) 2007-2018 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 3, or (at your option) any later
10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
24 #include "bid_internal.h"
26 /*****************************************************************************
27 * BID64_round_integral_exact
28 ****************************************************************************/
30 #if DECIMAL_CALL_BY_REFERENCE
32 bid64_round_integral_exact (UINT64
* pres
,
34 px _RND_MODE_PARAM _EXC_FLAGS_PARAM
35 _EXC_MASKS_PARAM _EXC_INFO_PARAM
) {
37 #if !DECIMAL_GLOBAL_ROUNDING
38 unsigned int rnd_mode
= *prnd_mode
;
42 bid64_round_integral_exact (UINT64 x _RND_MODE_PARAM _EXC_FLAGS_PARAM
43 _EXC_MASKS_PARAM _EXC_INFO_PARAM
) {
46 UINT64 res
= 0xbaddbaddbaddbaddull
;
48 int exp
; // unbiased exponent
49 // Note: C1 represents the significand (UINT64)
54 // UINT64 res is C* at first - represents up to 16 decimal digits <= 54 bits
55 UINT128 fstar
= { {0x0ull
, 0x0ull
} };
58 x_sign
= x
& MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
60 // check for NaNs and infinities
61 if ((x
& MASK_NAN
) == MASK_NAN
) { // check for NaN
62 if ((x
& 0x0003ffffffffffffull
) > 999999999999999ull)
63 x
= x
& 0xfe00000000000000ull
; // clear G6-G12 and the payload bits
65 x
= x
& 0xfe03ffffffffffffull
; // clear G6-G12
66 if ((x
& MASK_SNAN
) == MASK_SNAN
) { // SNaN
68 *pfpsf
|= INVALID_EXCEPTION
;
69 // return quiet (SNaN)
70 res
= x
& 0xfdffffffffffffffull
;
75 } else if ((x
& MASK_INF
) == MASK_INF
) { // check for Infinity
76 res
= x_sign
| 0x7800000000000000ull
;
80 if ((x
& MASK_STEERING_BITS
) == MASK_STEERING_BITS
) {
81 // if the steering bits are 11 (condition will be 0), then
82 // the exponent is G[0:w+1]
83 exp
= ((x
& MASK_BINARY_EXPONENT2
) >> 51) - 398;
84 C1
= (x
& MASK_BINARY_SIG2
) | MASK_BINARY_OR2
;
85 if (C1
> 9999999999999999ull) { // non-canonical
88 } else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS)
89 exp
= ((x
& MASK_BINARY_EXPONENT1
) >> 53) - 398;
90 C1
= (x
& MASK_BINARY_SIG1
);
93 // if x is 0 or non-canonical return 0 preserving the sign bit and
94 // the preferred exponent of MAX(Q(x), 0)
98 res
= x_sign
| (((UINT64
) exp
+ 398) << 53);
101 // x is a finite non-zero number (not 0, non-canonical, or special)
104 case ROUNDING_TO_NEAREST
:
105 case ROUNDING_TIES_AWAY
:
106 // return 0 if (exp <= -(p+1))
108 res
= x_sign
| 0x31c0000000000000ull
;
109 *pfpsf
|= INEXACT_EXCEPTION
;
114 // return 0 if (exp <= -p)
117 res
= 0xb1c0000000000001ull
;
119 res
= 0x31c0000000000000ull
;
121 *pfpsf
|= INEXACT_EXCEPTION
;
126 // return 0 if (exp <= -p)
129 res
= 0xb1c0000000000000ull
;
131 res
= 0x31c0000000000001ull
;
133 *pfpsf
|= INEXACT_EXCEPTION
;
137 case ROUNDING_TO_ZERO
:
138 // return 0 if (exp <= -p)
140 res
= x_sign
| 0x31c0000000000000ull
;
141 *pfpsf
|= INEXACT_EXCEPTION
;
147 // q = nr. of decimal digits in x (1 <= q <= 54)
148 // determine first the nr. of bits in x
149 if (C1
>= 0x0020000000000000ull
) { // x >= 2^53
151 } else { // if x < 2^53
152 tmp1
.d
= (double) C1
; // exact conversion
154 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
155 q
= nr_digits
[x_nr_bits
- 1].digits
;
157 q
= nr_digits
[x_nr_bits
- 1].digits1
;
158 if (C1
>= nr_digits
[x_nr_bits
- 1].threshold_lo
)
163 if (exp
>= 0) { // -exp <= 0
164 // the argument is an integer already
170 case ROUNDING_TO_NEAREST
:
171 if ((q
+ exp
) >= 0) { // exp < 0 and 1 <= -exp <= q
172 // need to shift right -exp digits from the coefficient; exp will be 0
173 ind
= -exp
; // 1 <= ind <= 16; ind is a synonym for 'x'
174 // chop off ind digits from the lower part of C1
175 // C1 = C1 + 1/2 * 10^x where the result C1 fits in 64 bits
176 // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
177 C1
= C1
+ midpoint64
[ind
- 1];
178 // calculate C* and f*
179 // C* is actually floor(C*) in this case
180 // C* and f* need shifting and masking, as shown by
181 // shiftright128[] and maskhigh128[]
183 // kx = 10^(-x) = ten2mk64[ind - 1]
184 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
185 // the approximation of 10^(-x) was rounded up to 64 bits
186 __mul_64x64_to_128 (P128
, C1
, ten2mk64
[ind
- 1]);
188 // if (0 < f* < 10^(-x)) then the result is a midpoint
189 // if floor(C*) is even then C* = floor(C*) - logical right
190 // shift; C* has p decimal digits, correct by Prop. 1)
191 // else if floor(C*) is odd C* = floor(C*)-1 (logical right
192 // shift; C* has p decimal digits, correct by Pr. 1)
194 // C* = floor(C*) (logical right shift; C has p decimal digits,
195 // correct by Property 1)
198 if (ind
- 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
201 fstar
.w
[0] = P128
.w
[0];
202 } else if (ind
- 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
203 shift
= shiftright128
[ind
- 1]; // 3 <= shift <= 63
204 res
= (P128
.w
[1] >> shift
);
205 fstar
.w
[1] = P128
.w
[1] & maskhigh128
[ind
- 1];
206 fstar
.w
[0] = P128
.w
[0];
208 // if (0 < f* < 10^(-x)) then the result is a midpoint
209 // since round_to_even, subtract 1 if current result is odd
210 if ((res
& 0x0000000000000001ull
) && (fstar
.w
[1] == 0)
211 && (fstar
.w
[0] < ten2mk64
[ind
- 1])) {
214 // determine inexactness of the rounding of C*
215 // if (0 < f* - 1/2 < 10^(-x)) then
216 // the result is exact
217 // else // if (f* - 1/2 > T*) then
218 // the result is inexact
220 if (fstar
.w
[0] > 0x8000000000000000ull
) {
221 // f* > 1/2 and the result may be exact
222 // fstar.w[0] - 0x8000000000000000ull is f* - 1/2
223 if ((fstar
.w
[0] - 0x8000000000000000ull
) > ten2mk64
[ind
- 1]) {
224 // set the inexact flag
225 *pfpsf
|= INEXACT_EXCEPTION
;
226 } // else the result is exact
227 } else { // the result is inexact; f2* <= 1/2
228 // set the inexact flag
229 *pfpsf
|= INEXACT_EXCEPTION
;
231 } else { // if 3 <= ind - 1 <= 21
232 if (fstar
.w
[1] > onehalf128
[ind
- 1] ||
233 (fstar
.w
[1] == onehalf128
[ind
- 1] && fstar
.w
[0])) {
234 // f2* > 1/2 and the result may be exact
235 // Calculate f2* - 1/2
236 if (fstar
.w
[1] > onehalf128
[ind
- 1]
237 || fstar
.w
[0] > ten2mk64
[ind
- 1]) {
238 // set the inexact flag
239 *pfpsf
|= INEXACT_EXCEPTION
;
240 } // else the result is exact
241 } else { // the result is inexact; f2* <= 1/2
242 // set the inexact flag
243 *pfpsf
|= INEXACT_EXCEPTION
;
246 // set exponent to zero as it was negative before.
247 res
= x_sign
| 0x31c0000000000000ull
| res
;
249 } else { // if exp < 0 and q + exp < 0
250 // the result is +0 or -0
251 res
= x_sign
| 0x31c0000000000000ull
;
252 *pfpsf
|= INEXACT_EXCEPTION
;
256 case ROUNDING_TIES_AWAY
:
257 if ((q
+ exp
) >= 0) { // exp < 0 and 1 <= -exp <= q
258 // need to shift right -exp digits from the coefficient; exp will be 0
259 ind
= -exp
; // 1 <= ind <= 16; ind is a synonym for 'x'
260 // chop off ind digits from the lower part of C1
261 // C1 = C1 + 1/2 * 10^x where the result C1 fits in 64 bits
262 // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
263 C1
= C1
+ midpoint64
[ind
- 1];
264 // calculate C* and f*
265 // C* is actually floor(C*) in this case
266 // C* and f* need shifting and masking, as shown by
267 // shiftright128[] and maskhigh128[]
269 // kx = 10^(-x) = ten2mk64[ind - 1]
270 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
271 // the approximation of 10^(-x) was rounded up to 64 bits
272 __mul_64x64_to_128 (P128
, C1
, ten2mk64
[ind
- 1]);
274 // if (0 < f* < 10^(-x)) then the result is a midpoint
275 // C* = floor(C*) - logical right shift; C* has p decimal digits,
276 // correct by Prop. 1)
278 // C* = floor(C*) (logical right shift; C has p decimal digits,
279 // correct by Property 1)
282 if (ind
- 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
285 fstar
.w
[0] = P128
.w
[0];
286 } else if (ind
- 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
287 shift
= shiftright128
[ind
- 1]; // 3 <= shift <= 63
288 res
= (P128
.w
[1] >> shift
);
289 fstar
.w
[1] = P128
.w
[1] & maskhigh128
[ind
- 1];
290 fstar
.w
[0] = P128
.w
[0];
292 // midpoints are already rounded correctly
293 // determine inexactness of the rounding of C*
294 // if (0 < f* - 1/2 < 10^(-x)) then
295 // the result is exact
296 // else // if (f* - 1/2 > T*) then
297 // the result is inexact
299 if (fstar
.w
[0] > 0x8000000000000000ull
) {
300 // f* > 1/2 and the result may be exact
301 // fstar.w[0] - 0x8000000000000000ull is f* - 1/2
302 if ((fstar
.w
[0] - 0x8000000000000000ull
) > ten2mk64
[ind
- 1]) {
303 // set the inexact flag
304 *pfpsf
|= INEXACT_EXCEPTION
;
305 } // else the result is exact
306 } else { // the result is inexact; f2* <= 1/2
307 // set the inexact flag
308 *pfpsf
|= INEXACT_EXCEPTION
;
310 } else { // if 3 <= ind - 1 <= 21
311 if (fstar
.w
[1] > onehalf128
[ind
- 1] ||
312 (fstar
.w
[1] == onehalf128
[ind
- 1] && fstar
.w
[0])) {
313 // f2* > 1/2 and the result may be exact
314 // Calculate f2* - 1/2
315 if (fstar
.w
[1] > onehalf128
[ind
- 1]
316 || fstar
.w
[0] > ten2mk64
[ind
- 1]) {
317 // set the inexact flag
318 *pfpsf
|= INEXACT_EXCEPTION
;
319 } // else the result is exact
320 } else { // the result is inexact; f2* <= 1/2
321 // set the inexact flag
322 *pfpsf
|= INEXACT_EXCEPTION
;
325 // set exponent to zero as it was negative before.
326 res
= x_sign
| 0x31c0000000000000ull
| res
;
328 } else { // if exp < 0 and q + exp < 0
329 // the result is +0 or -0
330 res
= x_sign
| 0x31c0000000000000ull
;
331 *pfpsf
|= INEXACT_EXCEPTION
;
336 if ((q
+ exp
) > 0) { // exp < 0 and 1 <= -exp < q
337 // need to shift right -exp digits from the coefficient; exp will be 0
338 ind
= -exp
; // 1 <= ind <= 16; ind is a synonym for 'x'
339 // chop off ind digits from the lower part of C1
340 // C1 fits in 64 bits
341 // calculate C* and f*
342 // C* is actually floor(C*) in this case
343 // C* and f* need shifting and masking, as shown by
344 // shiftright128[] and maskhigh128[]
346 // kx = 10^(-x) = ten2mk64[ind - 1]
348 // the approximation of 10^(-x) was rounded up to 64 bits
349 __mul_64x64_to_128 (P128
, C1
, ten2mk64
[ind
- 1]);
351 // C* = floor(C*) (logical right shift; C has p decimal digits,
352 // correct by Property 1)
353 // if (0 < f* < 10^(-x)) then the result is exact
356 if (ind
- 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
359 fstar
.w
[0] = P128
.w
[0];
360 } else if (ind
- 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
361 shift
= shiftright128
[ind
- 1]; // 3 <= shift <= 63
362 res
= (P128
.w
[1] >> shift
);
363 fstar
.w
[1] = P128
.w
[1] & maskhigh128
[ind
- 1];
364 fstar
.w
[0] = P128
.w
[0];
366 // if (f* > 10^(-x)) then the result is inexact
367 if ((fstar
.w
[1] != 0) || (fstar
.w
[0] >= ten2mk64
[ind
- 1])) {
369 // if negative and not exact, increment magnitude
372 *pfpsf
|= INEXACT_EXCEPTION
;
374 // set exponent to zero as it was negative before.
375 res
= x_sign
| 0x31c0000000000000ull
| res
;
377 } else { // if exp < 0 and q + exp <= 0
378 // the result is +0 or -1
380 res
= 0xb1c0000000000001ull
;
382 res
= 0x31c0000000000000ull
;
384 *pfpsf
|= INEXACT_EXCEPTION
;
389 if ((q
+ exp
) > 0) { // exp < 0 and 1 <= -exp < q
390 // need to shift right -exp digits from the coefficient; exp will be 0
391 ind
= -exp
; // 1 <= ind <= 16; ind is a synonym for 'x'
392 // chop off ind digits from the lower part of C1
393 // C1 fits in 64 bits
394 // calculate C* and f*
395 // C* is actually floor(C*) in this case
396 // C* and f* need shifting and masking, as shown by
397 // shiftright128[] and maskhigh128[]
399 // kx = 10^(-x) = ten2mk64[ind - 1]
401 // the approximation of 10^(-x) was rounded up to 64 bits
402 __mul_64x64_to_128 (P128
, C1
, ten2mk64
[ind
- 1]);
404 // C* = floor(C*) (logical right shift; C has p decimal digits,
405 // correct by Property 1)
406 // if (0 < f* < 10^(-x)) then the result is exact
409 if (ind
- 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
412 fstar
.w
[0] = P128
.w
[0];
413 } else if (ind
- 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
414 shift
= shiftright128
[ind
- 1]; // 3 <= shift <= 63
415 res
= (P128
.w
[1] >> shift
);
416 fstar
.w
[1] = P128
.w
[1] & maskhigh128
[ind
- 1];
417 fstar
.w
[0] = P128
.w
[0];
419 // if (f* > 10^(-x)) then the result is inexact
420 if ((fstar
.w
[1] != 0) || (fstar
.w
[0] >= ten2mk64
[ind
- 1])) {
422 // if positive and not exact, increment magnitude
425 *pfpsf
|= INEXACT_EXCEPTION
;
427 // set exponent to zero as it was negative before.
428 res
= x_sign
| 0x31c0000000000000ull
| res
;
430 } else { // if exp < 0 and q + exp <= 0
431 // the result is -0 or +1
433 res
= 0xb1c0000000000000ull
;
435 res
= 0x31c0000000000001ull
;
437 *pfpsf
|= INEXACT_EXCEPTION
;
441 case ROUNDING_TO_ZERO
:
442 if ((q
+ exp
) >= 0) { // exp < 0 and 1 <= -exp <= q
443 // need to shift right -exp digits from the coefficient; exp will be 0
444 ind
= -exp
; // 1 <= ind <= 16; ind is a synonym for 'x'
445 // chop off ind digits from the lower part of C1
446 // C1 fits in 127 bits
447 // calculate C* and f*
448 // C* is actually floor(C*) in this case
449 // C* and f* need shifting and masking, as shown by
450 // shiftright128[] and maskhigh128[]
452 // kx = 10^(-x) = ten2mk64[ind - 1]
454 // the approximation of 10^(-x) was rounded up to 64 bits
455 __mul_64x64_to_128 (P128
, C1
, ten2mk64
[ind
- 1]);
457 // C* = floor(C*) (logical right shift; C has p decimal digits,
458 // correct by Property 1)
459 // if (0 < f* < 10^(-x)) then the result is exact
462 if (ind
- 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
465 fstar
.w
[0] = P128
.w
[0];
466 } else if (ind
- 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
467 shift
= shiftright128
[ind
- 1]; // 3 <= shift <= 63
468 res
= (P128
.w
[1] >> shift
);
469 fstar
.w
[1] = P128
.w
[1] & maskhigh128
[ind
- 1];
470 fstar
.w
[0] = P128
.w
[0];
472 // if (f* > 10^(-x)) then the result is inexact
473 if ((fstar
.w
[1] != 0) || (fstar
.w
[0] >= ten2mk64
[ind
- 1])) {
474 *pfpsf
|= INEXACT_EXCEPTION
;
476 // set exponent to zero as it was negative before.
477 res
= x_sign
| 0x31c0000000000000ull
| res
;
479 } else { // if exp < 0 and q + exp < 0
480 // the result is +0 or -0
481 res
= x_sign
| 0x31c0000000000000ull
;
482 *pfpsf
|= INEXACT_EXCEPTION
;
490 /*****************************************************************************
491 * BID64_round_integral_nearest_even
492 ****************************************************************************/
494 #if DECIMAL_CALL_BY_REFERENCE
496 bid64_round_integral_nearest_even (UINT64
* pres
,
498 px _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
503 bid64_round_integral_nearest_even (UINT64 x _EXC_FLAGS_PARAM
504 _EXC_MASKS_PARAM _EXC_INFO_PARAM
) {
507 UINT64 res
= 0xbaddbaddbaddbaddull
;
509 int exp
; // unbiased exponent
510 // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
518 x_sign
= x
& MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
520 // check for NaNs and infinities
521 if ((x
& MASK_NAN
) == MASK_NAN
) { // check for NaN
522 if ((x
& 0x0003ffffffffffffull
) > 999999999999999ull)
523 x
= x
& 0xfe00000000000000ull
; // clear G6-G12 and the payload bits
525 x
= x
& 0xfe03ffffffffffffull
; // clear G6-G12
526 if ((x
& MASK_SNAN
) == MASK_SNAN
) { // SNaN
528 *pfpsf
|= INVALID_EXCEPTION
;
529 // return quiet (SNaN)
530 res
= x
& 0xfdffffffffffffffull
;
535 } else if ((x
& MASK_INF
) == MASK_INF
) { // check for Infinity
536 res
= x_sign
| 0x7800000000000000ull
;
540 if ((x
& MASK_STEERING_BITS
) == MASK_STEERING_BITS
) {
541 // if the steering bits are 11 (condition will be 0), then
542 // the exponent is G[0:w+1]
543 exp
= ((x
& MASK_BINARY_EXPONENT2
) >> 51) - 398;
544 C1
= (x
& MASK_BINARY_SIG2
) | MASK_BINARY_OR2
;
545 if (C1
> 9999999999999999ull) { // non-canonical
548 } else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS)
549 exp
= ((x
& MASK_BINARY_EXPONENT1
) >> 53) - 398;
550 C1
= (x
& MASK_BINARY_SIG1
);
553 // if x is 0 or non-canonical
557 res
= x_sign
| (((UINT64
) exp
+ 398) << 53);
560 // x is a finite non-zero number (not 0, non-canonical, or special)
562 // return 0 if (exp <= -(p+1))
564 res
= x_sign
| 0x31c0000000000000ull
;
567 // q = nr. of decimal digits in x (1 <= q <= 54)
568 // determine first the nr. of bits in x
569 if (C1
>= 0x0020000000000000ull
) { // x >= 2^53
571 } else { // if x < 2^53
572 tmp1
.d
= (double) C1
; // exact conversion
574 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
575 q
= nr_digits
[x_nr_bits
- 1].digits
;
577 q
= nr_digits
[x_nr_bits
- 1].digits1
;
578 if (C1
>= nr_digits
[x_nr_bits
- 1].threshold_lo
)
583 if (exp
>= 0) { // -exp <= 0
584 // the argument is an integer already
587 } else if ((q
+ exp
) >= 0) { // exp < 0 and 1 <= -exp <= q
588 // need to shift right -exp digits from the coefficient; the exp will be 0
589 ind
= -exp
; // 1 <= ind <= 16; ind is a synonym for 'x'
590 // chop off ind digits from the lower part of C1
591 // C1 = C1 + 1/2 * 10^x where the result C1 fits in 64 bits
592 // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
593 C1
= C1
+ midpoint64
[ind
- 1];
594 // calculate C* and f*
595 // C* is actually floor(C*) in this case
596 // C* and f* need shifting and masking, as shown by
597 // shiftright128[] and maskhigh128[]
599 // kx = 10^(-x) = ten2mk64[ind - 1]
600 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
601 // the approximation of 10^(-x) was rounded up to 64 bits
602 __mul_64x64_to_128 (P128
, C1
, ten2mk64
[ind
- 1]);
604 // if (0 < f* < 10^(-x)) then the result is a midpoint
605 // if floor(C*) is even then C* = floor(C*) - logical right
606 // shift; C* has p decimal digits, correct by Prop. 1)
607 // else if floor(C*) is odd C* = floor(C*)-1 (logical right
608 // shift; C* has p decimal digits, correct by Pr. 1)
610 // C* = floor(C*) (logical right shift; C has p decimal digits,
611 // correct by Property 1)
614 if (ind
- 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
617 fstar
.w
[0] = P128
.w
[0];
618 } else if (ind
- 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
619 shift
= shiftright128
[ind
- 1]; // 3 <= shift <= 63
620 res
= (P128
.w
[1] >> shift
);
621 fstar
.w
[1] = P128
.w
[1] & maskhigh128
[ind
- 1];
622 fstar
.w
[0] = P128
.w
[0];
624 // if (0 < f* < 10^(-x)) then the result is a midpoint
625 // since round_to_even, subtract 1 if current result is odd
626 if ((res
& 0x0000000000000001ull
) && (fstar
.w
[1] == 0)
627 && (fstar
.w
[0] < ten2mk64
[ind
- 1])) {
630 // set exponent to zero as it was negative before.
631 res
= x_sign
| 0x31c0000000000000ull
| res
;
633 } else { // if exp < 0 and q + exp < 0
634 // the result is +0 or -0
635 res
= x_sign
| 0x31c0000000000000ull
;
640 /*****************************************************************************
641 * BID64_round_integral_negative
642 *****************************************************************************/
644 #if DECIMAL_CALL_BY_REFERENCE
646 bid64_round_integral_negative (UINT64
* pres
,
648 px _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
653 bid64_round_integral_negative (UINT64 x _EXC_FLAGS_PARAM
654 _EXC_MASKS_PARAM _EXC_INFO_PARAM
) {
657 UINT64 res
= 0xbaddbaddbaddbaddull
;
659 int exp
; // unbiased exponent
660 // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
665 // UINT64 res is C* at first - represents up to 34 decimal digits ~ 113 bits
669 x_sign
= x
& MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
671 // check for NaNs and infinities
672 if ((x
& MASK_NAN
) == MASK_NAN
) { // check for NaN
673 if ((x
& 0x0003ffffffffffffull
) > 999999999999999ull)
674 x
= x
& 0xfe00000000000000ull
; // clear G6-G12 and the payload bits
676 x
= x
& 0xfe03ffffffffffffull
; // clear G6-G12
677 if ((x
& MASK_SNAN
) == MASK_SNAN
) { // SNaN
679 *pfpsf
|= INVALID_EXCEPTION
;
680 // return quiet (SNaN)
681 res
= x
& 0xfdffffffffffffffull
;
686 } else if ((x
& MASK_INF
) == MASK_INF
) { // check for Infinity
687 res
= x_sign
| 0x7800000000000000ull
;
691 if ((x
& MASK_STEERING_BITS
) == MASK_STEERING_BITS
) {
692 // if the steering bits are 11 (condition will be 0), then
693 // the exponent is G[0:w+1]
694 exp
= ((x
& MASK_BINARY_EXPONENT2
) >> 51) - 398;
695 C1
= (x
& MASK_BINARY_SIG2
) | MASK_BINARY_OR2
;
696 if (C1
> 9999999999999999ull) { // non-canonical
699 } else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS)
700 exp
= ((x
& MASK_BINARY_EXPONENT1
) >> 53) - 398;
701 C1
= (x
& MASK_BINARY_SIG1
);
704 // if x is 0 or non-canonical
708 res
= x_sign
| (((UINT64
) exp
+ 398) << 53);
711 // x is a finite non-zero number (not 0, non-canonical, or special)
713 // return 0 if (exp <= -p)
716 res
= 0xb1c0000000000001ull
;
718 res
= 0x31c0000000000000ull
;
722 // q = nr. of decimal digits in x (1 <= q <= 54)
723 // determine first the nr. of bits in x
724 if (C1
>= 0x0020000000000000ull
) { // x >= 2^53
726 } else { // if x < 2^53
727 tmp1
.d
= (double) C1
; // exact conversion
729 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
730 q
= nr_digits
[x_nr_bits
- 1].digits
;
732 q
= nr_digits
[x_nr_bits
- 1].digits1
;
733 if (C1
>= nr_digits
[x_nr_bits
- 1].threshold_lo
)
738 if (exp
>= 0) { // -exp <= 0
739 // the argument is an integer already
742 } else if ((q
+ exp
) > 0) { // exp < 0 and 1 <= -exp < q
743 // need to shift right -exp digits from the coefficient; the exp will be 0
744 ind
= -exp
; // 1 <= ind <= 16; ind is a synonym for 'x'
745 // chop off ind digits from the lower part of C1
746 // C1 fits in 64 bits
747 // calculate C* and f*
748 // C* is actually floor(C*) in this case
749 // C* and f* need shifting and masking, as shown by
750 // shiftright128[] and maskhigh128[]
752 // kx = 10^(-x) = ten2mk64[ind - 1]
754 // the approximation of 10^(-x) was rounded up to 64 bits
755 __mul_64x64_to_128 (P128
, C1
, ten2mk64
[ind
- 1]);
757 // C* = floor(C*) (logical right shift; C has p decimal digits,
758 // correct by Property 1)
759 // if (0 < f* < 10^(-x)) then the result is exact
762 if (ind
- 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
765 fstar
.w
[0] = P128
.w
[0];
766 } else if (ind
- 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
767 shift
= shiftright128
[ind
- 1]; // 3 <= shift <= 63
768 res
= (P128
.w
[1] >> shift
);
769 fstar
.w
[1] = P128
.w
[1] & maskhigh128
[ind
- 1];
770 fstar
.w
[0] = P128
.w
[0];
772 // if (f* > 10^(-x)) then the result is inexact
774 && ((fstar
.w
[1] != 0) || (fstar
.w
[0] >= ten2mk64
[ind
- 1]))) {
775 // if negative and not exact, increment magnitude
778 // set exponent to zero as it was negative before.
779 res
= x_sign
| 0x31c0000000000000ull
| res
;
781 } else { // if exp < 0 and q + exp <= 0
782 // the result is +0 or -1
784 res
= 0xb1c0000000000001ull
;
786 res
= 0x31c0000000000000ull
;
792 /*****************************************************************************
793 * BID64_round_integral_positive
794 ****************************************************************************/
796 #if DECIMAL_CALL_BY_REFERENCE
798 bid64_round_integral_positive (UINT64
* pres
,
800 px _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
805 bid64_round_integral_positive (UINT64 x _EXC_FLAGS_PARAM
806 _EXC_MASKS_PARAM _EXC_INFO_PARAM
) {
809 UINT64 res
= 0xbaddbaddbaddbaddull
;
811 int exp
; // unbiased exponent
812 // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
817 // UINT64 res is C* at first - represents up to 34 decimal digits ~ 113 bits
821 x_sign
= x
& MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
823 // check for NaNs and infinities
824 if ((x
& MASK_NAN
) == MASK_NAN
) { // check for NaN
825 if ((x
& 0x0003ffffffffffffull
) > 999999999999999ull)
826 x
= x
& 0xfe00000000000000ull
; // clear G6-G12 and the payload bits
828 x
= x
& 0xfe03ffffffffffffull
; // clear G6-G12
829 if ((x
& MASK_SNAN
) == MASK_SNAN
) { // SNaN
831 *pfpsf
|= INVALID_EXCEPTION
;
832 // return quiet (SNaN)
833 res
= x
& 0xfdffffffffffffffull
;
838 } else if ((x
& MASK_INF
) == MASK_INF
) { // check for Infinity
839 res
= x_sign
| 0x7800000000000000ull
;
843 if ((x
& MASK_STEERING_BITS
) == MASK_STEERING_BITS
) {
844 // if the steering bits are 11 (condition will be 0), then
845 // the exponent is G[0:w+1]
846 exp
= ((x
& MASK_BINARY_EXPONENT2
) >> 51) - 398;
847 C1
= (x
& MASK_BINARY_SIG2
) | MASK_BINARY_OR2
;
848 if (C1
> 9999999999999999ull) { // non-canonical
851 } else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS)
852 exp
= ((x
& MASK_BINARY_EXPONENT1
) >> 53) - 398;
853 C1
= (x
& MASK_BINARY_SIG1
);
856 // if x is 0 or non-canonical
860 res
= x_sign
| (((UINT64
) exp
+ 398) << 53);
863 // x is a finite non-zero number (not 0, non-canonical, or special)
865 // return 0 if (exp <= -p)
868 res
= 0xb1c0000000000000ull
;
870 res
= 0x31c0000000000001ull
;
874 // q = nr. of decimal digits in x (1 <= q <= 54)
875 // determine first the nr. of bits in x
876 if (C1
>= 0x0020000000000000ull
) { // x >= 2^53
878 } else { // if x < 2^53
879 tmp1
.d
= (double) C1
; // exact conversion
881 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
882 q
= nr_digits
[x_nr_bits
- 1].digits
;
884 q
= nr_digits
[x_nr_bits
- 1].digits1
;
885 if (C1
>= nr_digits
[x_nr_bits
- 1].threshold_lo
)
890 if (exp
>= 0) { // -exp <= 0
891 // the argument is an integer already
894 } else if ((q
+ exp
) > 0) { // exp < 0 and 1 <= -exp < q
895 // need to shift right -exp digits from the coefficient; the exp will be 0
896 ind
= -exp
; // 1 <= ind <= 16; ind is a synonym for 'x'
897 // chop off ind digits from the lower part of C1
898 // C1 fits in 64 bits
899 // calculate C* and f*
900 // C* is actually floor(C*) in this case
901 // C* and f* need shifting and masking, as shown by
902 // shiftright128[] and maskhigh128[]
904 // kx = 10^(-x) = ten2mk64[ind - 1]
906 // the approximation of 10^(-x) was rounded up to 64 bits
907 __mul_64x64_to_128 (P128
, C1
, ten2mk64
[ind
- 1]);
909 // C* = floor(C*) (logical right shift; C has p decimal digits,
910 // correct by Property 1)
911 // if (0 < f* < 10^(-x)) then the result is exact
914 if (ind
- 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
917 fstar
.w
[0] = P128
.w
[0];
918 } else if (ind
- 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
919 shift
= shiftright128
[ind
- 1]; // 3 <= shift <= 63
920 res
= (P128
.w
[1] >> shift
);
921 fstar
.w
[1] = P128
.w
[1] & maskhigh128
[ind
- 1];
922 fstar
.w
[0] = P128
.w
[0];
924 // if (f* > 10^(-x)) then the result is inexact
926 && ((fstar
.w
[1] != 0) || (fstar
.w
[0] >= ten2mk64
[ind
- 1]))) {
927 // if positive and not exact, increment magnitude
930 // set exponent to zero as it was negative before.
931 res
= x_sign
| 0x31c0000000000000ull
| res
;
933 } else { // if exp < 0 and q + exp <= 0
934 // the result is -0 or +1
936 res
= 0xb1c0000000000000ull
;
938 res
= 0x31c0000000000001ull
;
944 /*****************************************************************************
945 * BID64_round_integral_zero
946 ****************************************************************************/
948 #if DECIMAL_CALL_BY_REFERENCE
950 bid64_round_integral_zero (UINT64
* pres
,
952 px _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
957 bid64_round_integral_zero (UINT64 x _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
961 UINT64 res
= 0xbaddbaddbaddbaddull
;
963 int exp
; // unbiased exponent
964 // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
969 // UINT64 res is C* at first - represents up to 34 decimal digits ~ 113 bits
972 x_sign
= x
& MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
974 // check for NaNs and infinities
975 if ((x
& MASK_NAN
) == MASK_NAN
) { // check for NaN
976 if ((x
& 0x0003ffffffffffffull
) > 999999999999999ull)
977 x
= x
& 0xfe00000000000000ull
; // clear G6-G12 and the payload bits
979 x
= x
& 0xfe03ffffffffffffull
; // clear G6-G12
980 if ((x
& MASK_SNAN
) == MASK_SNAN
) { // SNaN
982 *pfpsf
|= INVALID_EXCEPTION
;
983 // return quiet (SNaN)
984 res
= x
& 0xfdffffffffffffffull
;
989 } else if ((x
& MASK_INF
) == MASK_INF
) { // check for Infinity
990 res
= x_sign
| 0x7800000000000000ull
;
994 if ((x
& MASK_STEERING_BITS
) == MASK_STEERING_BITS
) {
995 // if the steering bits are 11 (condition will be 0), then
996 // the exponent is G[0:w+1]
997 exp
= ((x
& MASK_BINARY_EXPONENT2
) >> 51) - 398;
998 C1
= (x
& MASK_BINARY_SIG2
) | MASK_BINARY_OR2
;
999 if (C1
> 9999999999999999ull) { // non-canonical
1002 } else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS)
1003 exp
= ((x
& MASK_BINARY_EXPONENT1
) >> 53) - 398;
1004 C1
= (x
& MASK_BINARY_SIG1
);
1007 // if x is 0 or non-canonical
1011 res
= x_sign
| (((UINT64
) exp
+ 398) << 53);
1014 // x is a finite non-zero number (not 0, non-canonical, or special)
1016 // return 0 if (exp <= -p)
1018 res
= x_sign
| 0x31c0000000000000ull
;
1021 // q = nr. of decimal digits in x (1 <= q <= 54)
1022 // determine first the nr. of bits in x
1023 if (C1
>= 0x0020000000000000ull
) { // x >= 2^53
1025 } else { // if x < 2^53
1026 tmp1
.d
= (double) C1
; // exact conversion
1028 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1029 q
= nr_digits
[x_nr_bits
- 1].digits
;
1031 q
= nr_digits
[x_nr_bits
- 1].digits1
;
1032 if (C1
>= nr_digits
[x_nr_bits
- 1].threshold_lo
)
1037 if (exp
>= 0) { // -exp <= 0
1038 // the argument is an integer already
1041 } else if ((q
+ exp
) >= 0) { // exp < 0 and 1 <= -exp <= q
1042 // need to shift right -exp digits from the coefficient; the exp will be 0
1043 ind
= -exp
; // 1 <= ind <= 16; ind is a synonym for 'x'
1044 // chop off ind digits from the lower part of C1
1045 // C1 fits in 127 bits
1046 // calculate C* and f*
1047 // C* is actually floor(C*) in this case
1048 // C* and f* need shifting and masking, as shown by
1049 // shiftright128[] and maskhigh128[]
1051 // kx = 10^(-x) = ten2mk64[ind - 1]
1052 // C* = C1 * 10^(-x)
1053 // the approximation of 10^(-x) was rounded up to 64 bits
1054 __mul_64x64_to_128 (P128
, C1
, ten2mk64
[ind
- 1]);
1056 // C* = floor(C*) (logical right shift; C has p decimal digits,
1057 // correct by Property 1)
1058 // if (0 < f* < 10^(-x)) then the result is exact
1059 // n = C* * 10^(e+x)
1061 if (ind
- 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
1063 // redundant fstar.w[1] = 0;
1064 // redundant fstar.w[0] = P128.w[0];
1065 } else if (ind
- 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
1066 shift
= shiftright128
[ind
- 1]; // 3 <= shift <= 63
1067 res
= (P128
.w
[1] >> shift
);
1068 // redundant fstar.w[1] = P128.w[1] & maskhigh128[ind - 1];
1069 // redundant fstar.w[0] = P128.w[0];
1071 // if (f* > 10^(-x)) then the result is inexact
1072 // if ((fstar.w[1] != 0) || (fstar.w[0] >= ten2mk64[ind-1])){
1075 // set exponent to zero as it was negative before.
1076 res
= x_sign
| 0x31c0000000000000ull
| res
;
1078 } else { // if exp < 0 and q + exp < 0
1079 // the result is +0 or -0
1080 res
= x_sign
| 0x31c0000000000000ull
;
1085 /*****************************************************************************
1086 * BID64_round_integral_nearest_away
1087 ****************************************************************************/
1089 #if DECIMAL_CALL_BY_REFERENCE
1091 bid64_round_integral_nearest_away (UINT64
* pres
,
1093 px _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
1098 bid64_round_integral_nearest_away (UINT64 x _EXC_FLAGS_PARAM
1099 _EXC_MASKS_PARAM _EXC_INFO_PARAM
) {
1102 UINT64 res
= 0xbaddbaddbaddbaddull
;
1104 int exp
; // unbiased exponent
1105 // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
1106 BID_UI64DOUBLE tmp1
;
1112 x_sign
= x
& MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
1114 // check for NaNs and infinities
1115 if ((x
& MASK_NAN
) == MASK_NAN
) { // check for NaN
1116 if ((x
& 0x0003ffffffffffffull
) > 999999999999999ull)
1117 x
= x
& 0xfe00000000000000ull
; // clear G6-G12 and the payload bits
1119 x
= x
& 0xfe03ffffffffffffull
; // clear G6-G12
1120 if ((x
& MASK_SNAN
) == MASK_SNAN
) { // SNaN
1122 *pfpsf
|= INVALID_EXCEPTION
;
1123 // return quiet (SNaN)
1124 res
= x
& 0xfdffffffffffffffull
;
1129 } else if ((x
& MASK_INF
) == MASK_INF
) { // check for Infinity
1130 res
= x_sign
| 0x7800000000000000ull
;
1134 if ((x
& MASK_STEERING_BITS
) == MASK_STEERING_BITS
) {
1135 // if the steering bits are 11 (condition will be 0), then
1136 // the exponent is G[0:w+1]
1137 exp
= ((x
& MASK_BINARY_EXPONENT2
) >> 51) - 398;
1138 C1
= (x
& MASK_BINARY_SIG2
) | MASK_BINARY_OR2
;
1139 if (C1
> 9999999999999999ull) { // non-canonical
1142 } else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS)
1143 exp
= ((x
& MASK_BINARY_EXPONENT1
) >> 53) - 398;
1144 C1
= (x
& MASK_BINARY_SIG1
);
1147 // if x is 0 or non-canonical
1151 res
= x_sign
| (((UINT64
) exp
+ 398) << 53);
1154 // x is a finite non-zero number (not 0, non-canonical, or special)
1156 // return 0 if (exp <= -(p+1))
1158 res
= x_sign
| 0x31c0000000000000ull
;
1161 // q = nr. of decimal digits in x (1 <= q <= 54)
1162 // determine first the nr. of bits in x
1163 if (C1
>= 0x0020000000000000ull
) { // x >= 2^53
1165 } else { // if x < 2^53
1166 tmp1
.d
= (double) C1
; // exact conversion
1168 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1169 q
= nr_digits
[x_nr_bits
- 1].digits
;
1171 q
= nr_digits
[x_nr_bits
- 1].digits1
;
1172 if (C1
>= nr_digits
[x_nr_bits
- 1].threshold_lo
)
1177 if (exp
>= 0) { // -exp <= 0
1178 // the argument is an integer already
1181 } else if ((q
+ exp
) >= 0) { // exp < 0 and 1 <= -exp <= q
1182 // need to shift right -exp digits from the coefficient; the exp will be 0
1183 ind
= -exp
; // 1 <= ind <= 16; ind is a synonym for 'x'
1184 // chop off ind digits from the lower part of C1
1185 // C1 = C1 + 1/2 * 10^x where the result C1 fits in 64 bits
1186 // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
1187 C1
= C1
+ midpoint64
[ind
- 1];
1188 // calculate C* and f*
1189 // C* is actually floor(C*) in this case
1190 // C* and f* need shifting and masking, as shown by
1191 // shiftright128[] and maskhigh128[]
1193 // kx = 10^(-x) = ten2mk64[ind - 1]
1194 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
1195 // the approximation of 10^(-x) was rounded up to 64 bits
1196 __mul_64x64_to_128 (P128
, C1
, ten2mk64
[ind
- 1]);
1198 // if (0 < f* < 10^(-x)) then the result is a midpoint
1199 // C* = floor(C*) - logical right shift; C* has p decimal digits,
1200 // correct by Prop. 1)
1202 // C* = floor(C*) (logical right shift; C has p decimal digits,
1203 // correct by Property 1)
1204 // n = C* * 10^(e+x)
1206 if (ind
- 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
1208 } else if (ind
- 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
1209 shift
= shiftright128
[ind
- 1]; // 3 <= shift <= 63
1210 res
= (P128
.w
[1] >> shift
);
1212 // midpoints are already rounded correctly
1213 // set exponent to zero as it was negative before.
1214 res
= x_sign
| 0x31c0000000000000ull
| res
;
1216 } else { // if exp < 0 and q + exp < 0
1217 // the result is +0 or -0
1218 res
= x_sign
| 0x31c0000000000000ull
;