1 /* Copyright (C) 2007 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 2, or (at your option) any later
10 In addition to the permissions in the GNU General Public License, the
11 Free Software Foundation gives you unlimited permission to link the
12 compiled version of this file into combinations with other programs,
13 and to distribute those combinations without any restriction coming
14 from the use of this file. (The General Public License restrictions
15 do apply in other respects; for example, they cover modification of
16 the file, and distribution when not linked into a combine
19 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
20 WARRANTY; without even the implied warranty of MERCHANTABILITY or
21 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
24 You should have received a copy of the GNU General Public License
25 along with GCC; see the file COPYING. If not, write to the Free
26 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
29 #include "bid_internal.h"
31 /*****************************************************************************
32 * BID64_round_integral_exact
33 ****************************************************************************/
35 #if DECIMAL_CALL_BY_REFERENCE
37 bid64_round_integral_exact (UINT64
* pres
,
39 px _RND_MODE_PARAM _EXC_FLAGS_PARAM
40 _EXC_MASKS_PARAM _EXC_INFO_PARAM
) {
42 #if !DECIMAL_GLOBAL_ROUNDING
43 unsigned int rnd_mode
= *prnd_mode
;
47 bid64_round_integral_exact (UINT64 x _RND_MODE_PARAM _EXC_FLAGS_PARAM
48 _EXC_MASKS_PARAM _EXC_INFO_PARAM
) {
51 UINT64 res
= 0xbaddbaddbaddbaddull
;
53 int exp
; // unbiased exponent
54 // Note: C1 represents the significand (UINT64)
59 // UINT64 res is C* at first - represents up to 16 decimal digits <= 54 bits
60 UINT128 fstar
= { {0x0ull
, 0x0ull
} };
63 x_sign
= x
& MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
65 // check for NaNs and infinities
66 if ((x
& MASK_NAN
) == MASK_NAN
) { // check for NaN
67 if ((x
& 0x0003ffffffffffffull
) > 999999999999999ull)
68 x
= x
& 0xfe00000000000000ull
; // clear G6-G12 and the payload bits
70 x
= x
& 0xfe03ffffffffffffull
; // clear G6-G12
71 if ((x
& MASK_SNAN
) == MASK_SNAN
) { // SNaN
73 *pfpsf
|= INVALID_EXCEPTION
;
74 // return quiet (SNaN)
75 res
= x
& 0xfdffffffffffffffull
;
80 } else if ((x
& MASK_INF
) == MASK_INF
) { // check for Infinity
81 res
= x_sign
| 0x7800000000000000ull
;
85 if ((x
& MASK_STEERING_BITS
) == MASK_STEERING_BITS
) {
86 // if the steering bits are 11 (condition will be 0), then
87 // the exponent is G[0:w+1]
88 exp
= ((x
& MASK_BINARY_EXPONENT2
) >> 51) - 398;
89 C1
= (x
& MASK_BINARY_SIG2
) | MASK_BINARY_OR2
;
90 if (C1
> 9999999999999999ull) { // non-canonical
93 } else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS)
94 exp
= ((x
& MASK_BINARY_EXPONENT1
) >> 53) - 398;
95 C1
= (x
& MASK_BINARY_SIG1
);
98 // if x is 0 or non-canonical return 0 preserving the sign bit and
99 // the preferred exponent of MAX(Q(x), 0)
103 res
= x_sign
| (((UINT64
) exp
+ 398) << 53);
106 // x is a finite non-zero number (not 0, non-canonical, or special)
109 case ROUNDING_TO_NEAREST
:
110 case ROUNDING_TIES_AWAY
:
111 // return 0 if (exp <= -(p+1))
113 res
= x_sign
| 0x31c0000000000000ull
;
114 *pfpsf
|= INEXACT_EXCEPTION
;
119 // return 0 if (exp <= -p)
122 res
= 0xb1c0000000000001ull
;
124 res
= 0x31c0000000000000ull
;
126 *pfpsf
|= INEXACT_EXCEPTION
;
131 // return 0 if (exp <= -p)
134 res
= 0xb1c0000000000000ull
;
136 res
= 0x31c0000000000001ull
;
138 *pfpsf
|= INEXACT_EXCEPTION
;
142 case ROUNDING_TO_ZERO
:
143 // return 0 if (exp <= -p)
145 res
= x_sign
| 0x31c0000000000000ull
;
146 *pfpsf
|= INEXACT_EXCEPTION
;
152 // q = nr. of decimal digits in x (1 <= q <= 54)
153 // determine first the nr. of bits in x
154 if (C1
>= 0x0020000000000000ull
) { // x >= 2^53
156 } else { // if x < 2^53
157 tmp1
.d
= (double) C1
; // exact conversion
159 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
160 q
= nr_digits
[x_nr_bits
- 1].digits
;
162 q
= nr_digits
[x_nr_bits
- 1].digits1
;
163 if (C1
>= nr_digits
[x_nr_bits
- 1].threshold_lo
)
168 if (exp
>= 0) { // -exp <= 0
169 // the argument is an integer already
175 case ROUNDING_TO_NEAREST
:
176 if ((q
+ exp
) >= 0) { // exp < 0 and 1 <= -exp <= q
177 // need to shift right -exp digits from the coefficient; exp will be 0
178 ind
= -exp
; // 1 <= ind <= 16; ind is a synonym for 'x'
179 // chop off ind digits from the lower part of C1
180 // C1 = C1 + 1/2 * 10^x where the result C1 fits in 64 bits
181 // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
182 C1
= C1
+ midpoint64
[ind
- 1];
183 // calculate C* and f*
184 // C* is actually floor(C*) in this case
185 // C* and f* need shifting and masking, as shown by
186 // shiftright128[] and maskhigh128[]
188 // kx = 10^(-x) = ten2mk64[ind - 1]
189 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
190 // the approximation of 10^(-x) was rounded up to 64 bits
191 __mul_64x64_to_128 (P128
, C1
, ten2mk64
[ind
- 1]);
193 // if (0 < f* < 10^(-x)) then the result is a midpoint
194 // if floor(C*) is even then C* = floor(C*) - logical right
195 // shift; C* has p decimal digits, correct by Prop. 1)
196 // else if floor(C*) is odd C* = floor(C*)-1 (logical right
197 // shift; C* has p decimal digits, correct by Pr. 1)
199 // C* = floor(C*) (logical right shift; C has p decimal digits,
200 // correct by Property 1)
203 if (ind
- 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
206 fstar
.w
[0] = P128
.w
[0];
207 } else if (ind
- 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
208 shift
= shiftright128
[ind
- 1]; // 3 <= shift <= 63
209 res
= (P128
.w
[1] >> shift
);
210 fstar
.w
[1] = P128
.w
[1] & maskhigh128
[ind
- 1];
211 fstar
.w
[0] = P128
.w
[0];
213 // if (0 < f* < 10^(-x)) then the result is a midpoint
214 // since round_to_even, subtract 1 if current result is odd
215 if ((res
& 0x0000000000000001ull
) && (fstar
.w
[1] == 0)
216 && (fstar
.w
[0] < ten2mk64
[ind
- 1])) {
219 // determine inexactness of the rounding of C*
220 // if (0 < f* - 1/2 < 10^(-x)) then
221 // the result is exact
222 // else // if (f* - 1/2 > T*) then
223 // the result is inexact
225 if (fstar
.w
[0] > 0x8000000000000000ull
) {
226 // f* > 1/2 and the result may be exact
227 // fstar.w[0] - 0x8000000000000000ull is f* - 1/2
228 if ((fstar
.w
[0] - 0x8000000000000000ull
) > ten2mk64
[ind
- 1]) {
229 // set the inexact flag
230 *pfpsf
|= INEXACT_EXCEPTION
;
231 } // else the result is exact
232 } else { // the result is inexact; f2* <= 1/2
233 // set the inexact flag
234 *pfpsf
|= INEXACT_EXCEPTION
;
236 } else { // if 3 <= ind - 1 <= 21
237 if (fstar
.w
[1] > onehalf128
[ind
- 1] ||
238 (fstar
.w
[1] == onehalf128
[ind
- 1] && fstar
.w
[0])) {
239 // f2* > 1/2 and the result may be exact
240 // Calculate f2* - 1/2
241 if (fstar
.w
[1] > onehalf128
[ind
- 1]
242 || fstar
.w
[0] > ten2mk64
[ind
- 1]) {
243 // set the inexact flag
244 *pfpsf
|= INEXACT_EXCEPTION
;
245 } // else the result is exact
246 } else { // the result is inexact; f2* <= 1/2
247 // set the inexact flag
248 *pfpsf
|= INEXACT_EXCEPTION
;
251 // set exponent to zero as it was negative before.
252 res
= x_sign
| 0x31c0000000000000ull
| res
;
254 } else { // if exp < 0 and q + exp < 0
255 // the result is +0 or -0
256 res
= x_sign
| 0x31c0000000000000ull
;
257 *pfpsf
|= INEXACT_EXCEPTION
;
261 case ROUNDING_TIES_AWAY
:
262 if ((q
+ exp
) >= 0) { // exp < 0 and 1 <= -exp <= q
263 // need to shift right -exp digits from the coefficient; exp will be 0
264 ind
= -exp
; // 1 <= ind <= 16; ind is a synonym for 'x'
265 // chop off ind digits from the lower part of C1
266 // C1 = C1 + 1/2 * 10^x where the result C1 fits in 64 bits
267 // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
268 C1
= C1
+ midpoint64
[ind
- 1];
269 // calculate C* and f*
270 // C* is actually floor(C*) in this case
271 // C* and f* need shifting and masking, as shown by
272 // shiftright128[] and maskhigh128[]
274 // kx = 10^(-x) = ten2mk64[ind - 1]
275 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
276 // the approximation of 10^(-x) was rounded up to 64 bits
277 __mul_64x64_to_128 (P128
, C1
, ten2mk64
[ind
- 1]);
279 // if (0 < f* < 10^(-x)) then the result is a midpoint
280 // C* = floor(C*) - logical right shift; C* has p decimal digits,
281 // correct by Prop. 1)
283 // C* = floor(C*) (logical right shift; C has p decimal digits,
284 // correct by Property 1)
287 if (ind
- 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
290 fstar
.w
[0] = P128
.w
[0];
291 } else if (ind
- 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
292 shift
= shiftright128
[ind
- 1]; // 3 <= shift <= 63
293 res
= (P128
.w
[1] >> shift
);
294 fstar
.w
[1] = P128
.w
[1] & maskhigh128
[ind
- 1];
295 fstar
.w
[0] = P128
.w
[0];
297 // midpoints are already rounded correctly
298 // determine inexactness of the rounding of C*
299 // if (0 < f* - 1/2 < 10^(-x)) then
300 // the result is exact
301 // else // if (f* - 1/2 > T*) then
302 // the result is inexact
304 if (fstar
.w
[0] > 0x8000000000000000ull
) {
305 // f* > 1/2 and the result may be exact
306 // fstar.w[0] - 0x8000000000000000ull is f* - 1/2
307 if ((fstar
.w
[0] - 0x8000000000000000ull
) > ten2mk64
[ind
- 1]) {
308 // set the inexact flag
309 *pfpsf
|= INEXACT_EXCEPTION
;
310 } // else the result is exact
311 } else { // the result is inexact; f2* <= 1/2
312 // set the inexact flag
313 *pfpsf
|= INEXACT_EXCEPTION
;
315 } else { // if 3 <= ind - 1 <= 21
316 if (fstar
.w
[1] > onehalf128
[ind
- 1] ||
317 (fstar
.w
[1] == onehalf128
[ind
- 1] && fstar
.w
[0])) {
318 // f2* > 1/2 and the result may be exact
319 // Calculate f2* - 1/2
320 if (fstar
.w
[1] > onehalf128
[ind
- 1]
321 || fstar
.w
[0] > ten2mk64
[ind
- 1]) {
322 // set the inexact flag
323 *pfpsf
|= INEXACT_EXCEPTION
;
324 } // else the result is exact
325 } else { // the result is inexact; f2* <= 1/2
326 // set the inexact flag
327 *pfpsf
|= INEXACT_EXCEPTION
;
330 // set exponent to zero as it was negative before.
331 res
= x_sign
| 0x31c0000000000000ull
| res
;
333 } else { // if exp < 0 and q + exp < 0
334 // the result is +0 or -0
335 res
= x_sign
| 0x31c0000000000000ull
;
336 *pfpsf
|= INEXACT_EXCEPTION
;
341 if ((q
+ exp
) > 0) { // exp < 0 and 1 <= -exp < q
342 // need to shift right -exp digits from the coefficient; exp will be 0
343 ind
= -exp
; // 1 <= ind <= 16; ind is a synonym for 'x'
344 // chop off ind digits from the lower part of C1
345 // C1 fits in 64 bits
346 // calculate C* and f*
347 // C* is actually floor(C*) in this case
348 // C* and f* need shifting and masking, as shown by
349 // shiftright128[] and maskhigh128[]
351 // kx = 10^(-x) = ten2mk64[ind - 1]
353 // the approximation of 10^(-x) was rounded up to 64 bits
354 __mul_64x64_to_128 (P128
, C1
, ten2mk64
[ind
- 1]);
356 // C* = floor(C*) (logical right shift; C has p decimal digits,
357 // correct by Property 1)
358 // if (0 < f* < 10^(-x)) then the result is exact
361 if (ind
- 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
364 fstar
.w
[0] = P128
.w
[0];
365 } else if (ind
- 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
366 shift
= shiftright128
[ind
- 1]; // 3 <= shift <= 63
367 res
= (P128
.w
[1] >> shift
);
368 fstar
.w
[1] = P128
.w
[1] & maskhigh128
[ind
- 1];
369 fstar
.w
[0] = P128
.w
[0];
371 // if (f* > 10^(-x)) then the result is inexact
372 if ((fstar
.w
[1] != 0) || (fstar
.w
[0] >= ten2mk64
[ind
- 1])) {
374 // if negative and not exact, increment magnitude
377 *pfpsf
|= INEXACT_EXCEPTION
;
379 // set exponent to zero as it was negative before.
380 res
= x_sign
| 0x31c0000000000000ull
| res
;
382 } else { // if exp < 0 and q + exp <= 0
383 // the result is +0 or -1
385 res
= 0xb1c0000000000001ull
;
387 res
= 0x31c0000000000000ull
;
389 *pfpsf
|= INEXACT_EXCEPTION
;
394 if ((q
+ exp
) > 0) { // exp < 0 and 1 <= -exp < q
395 // need to shift right -exp digits from the coefficient; exp will be 0
396 ind
= -exp
; // 1 <= ind <= 16; ind is a synonym for 'x'
397 // chop off ind digits from the lower part of C1
398 // C1 fits in 64 bits
399 // calculate C* and f*
400 // C* is actually floor(C*) in this case
401 // C* and f* need shifting and masking, as shown by
402 // shiftright128[] and maskhigh128[]
404 // kx = 10^(-x) = ten2mk64[ind - 1]
406 // the approximation of 10^(-x) was rounded up to 64 bits
407 __mul_64x64_to_128 (P128
, C1
, ten2mk64
[ind
- 1]);
409 // C* = floor(C*) (logical right shift; C has p decimal digits,
410 // correct by Property 1)
411 // if (0 < f* < 10^(-x)) then the result is exact
414 if (ind
- 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
417 fstar
.w
[0] = P128
.w
[0];
418 } else if (ind
- 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
419 shift
= shiftright128
[ind
- 1]; // 3 <= shift <= 63
420 res
= (P128
.w
[1] >> shift
);
421 fstar
.w
[1] = P128
.w
[1] & maskhigh128
[ind
- 1];
422 fstar
.w
[0] = P128
.w
[0];
424 // if (f* > 10^(-x)) then the result is inexact
425 if ((fstar
.w
[1] != 0) || (fstar
.w
[0] >= ten2mk64
[ind
- 1])) {
427 // if positive and not exact, increment magnitude
430 *pfpsf
|= INEXACT_EXCEPTION
;
432 // set exponent to zero as it was negative before.
433 res
= x_sign
| 0x31c0000000000000ull
| res
;
435 } else { // if exp < 0 and q + exp <= 0
436 // the result is -0 or +1
438 res
= 0xb1c0000000000000ull
;
440 res
= 0x31c0000000000001ull
;
442 *pfpsf
|= INEXACT_EXCEPTION
;
446 case ROUNDING_TO_ZERO
:
447 if ((q
+ exp
) >= 0) { // exp < 0 and 1 <= -exp <= q
448 // need to shift right -exp digits from the coefficient; exp will be 0
449 ind
= -exp
; // 1 <= ind <= 16; ind is a synonym for 'x'
450 // chop off ind digits from the lower part of C1
451 // C1 fits in 127 bits
452 // calculate C* and f*
453 // C* is actually floor(C*) in this case
454 // C* and f* need shifting and masking, as shown by
455 // shiftright128[] and maskhigh128[]
457 // kx = 10^(-x) = ten2mk64[ind - 1]
459 // the approximation of 10^(-x) was rounded up to 64 bits
460 __mul_64x64_to_128 (P128
, C1
, ten2mk64
[ind
- 1]);
462 // C* = floor(C*) (logical right shift; C has p decimal digits,
463 // correct by Property 1)
464 // if (0 < f* < 10^(-x)) then the result is exact
467 if (ind
- 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
470 fstar
.w
[0] = P128
.w
[0];
471 } else if (ind
- 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
472 shift
= shiftright128
[ind
- 1]; // 3 <= shift <= 63
473 res
= (P128
.w
[1] >> shift
);
474 fstar
.w
[1] = P128
.w
[1] & maskhigh128
[ind
- 1];
475 fstar
.w
[0] = P128
.w
[0];
477 // if (f* > 10^(-x)) then the result is inexact
478 if ((fstar
.w
[1] != 0) || (fstar
.w
[0] >= ten2mk64
[ind
- 1])) {
479 *pfpsf
|= INEXACT_EXCEPTION
;
481 // set exponent to zero as it was negative before.
482 res
= x_sign
| 0x31c0000000000000ull
| res
;
484 } else { // if exp < 0 and q + exp < 0
485 // the result is +0 or -0
486 res
= x_sign
| 0x31c0000000000000ull
;
487 *pfpsf
|= INEXACT_EXCEPTION
;
495 /*****************************************************************************
496 * BID64_round_integral_nearest_even
497 ****************************************************************************/
499 #if DECIMAL_CALL_BY_REFERENCE
501 bid64_round_integral_nearest_even (UINT64
* pres
,
503 px _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
508 bid64_round_integral_nearest_even (UINT64 x _EXC_FLAGS_PARAM
509 _EXC_MASKS_PARAM _EXC_INFO_PARAM
) {
512 UINT64 res
= 0xbaddbaddbaddbaddull
;
514 int exp
; // unbiased exponent
515 // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
523 x_sign
= x
& MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
525 // check for NaNs and infinities
526 if ((x
& MASK_NAN
) == MASK_NAN
) { // check for NaN
527 if ((x
& 0x0003ffffffffffffull
) > 999999999999999ull)
528 x
= x
& 0xfe00000000000000ull
; // clear G6-G12 and the payload bits
530 x
= x
& 0xfe03ffffffffffffull
; // clear G6-G12
531 if ((x
& MASK_SNAN
) == MASK_SNAN
) { // SNaN
533 *pfpsf
|= INVALID_EXCEPTION
;
534 // return quiet (SNaN)
535 res
= x
& 0xfdffffffffffffffull
;
540 } else if ((x
& MASK_INF
) == MASK_INF
) { // check for Infinity
541 res
= x_sign
| 0x7800000000000000ull
;
545 if ((x
& MASK_STEERING_BITS
) == MASK_STEERING_BITS
) {
546 // if the steering bits are 11 (condition will be 0), then
547 // the exponent is G[0:w+1]
548 exp
= ((x
& MASK_BINARY_EXPONENT2
) >> 51) - 398;
549 C1
= (x
& MASK_BINARY_SIG2
) | MASK_BINARY_OR2
;
550 if (C1
> 9999999999999999ull) { // non-canonical
553 } else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS)
554 exp
= ((x
& MASK_BINARY_EXPONENT1
) >> 53) - 398;
555 C1
= (x
& MASK_BINARY_SIG1
);
558 // if x is 0 or non-canonical
562 res
= x_sign
| (((UINT64
) exp
+ 398) << 53);
565 // x is a finite non-zero number (not 0, non-canonical, or special)
567 // return 0 if (exp <= -(p+1))
569 res
= x_sign
| 0x31c0000000000000ull
;
572 // q = nr. of decimal digits in x (1 <= q <= 54)
573 // determine first the nr. of bits in x
574 if (C1
>= 0x0020000000000000ull
) { // x >= 2^53
576 } else { // if x < 2^53
577 tmp1
.d
= (double) C1
; // exact conversion
579 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
580 q
= nr_digits
[x_nr_bits
- 1].digits
;
582 q
= nr_digits
[x_nr_bits
- 1].digits1
;
583 if (C1
>= nr_digits
[x_nr_bits
- 1].threshold_lo
)
588 if (exp
>= 0) { // -exp <= 0
589 // the argument is an integer already
592 } else if ((q
+ exp
) >= 0) { // exp < 0 and 1 <= -exp <= q
593 // need to shift right -exp digits from the coefficient; the exp will be 0
594 ind
= -exp
; // 1 <= ind <= 16; ind is a synonym for 'x'
595 // chop off ind digits from the lower part of C1
596 // C1 = C1 + 1/2 * 10^x where the result C1 fits in 64 bits
597 // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
598 C1
= C1
+ midpoint64
[ind
- 1];
599 // calculate C* and f*
600 // C* is actually floor(C*) in this case
601 // C* and f* need shifting and masking, as shown by
602 // shiftright128[] and maskhigh128[]
604 // kx = 10^(-x) = ten2mk64[ind - 1]
605 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
606 // the approximation of 10^(-x) was rounded up to 64 bits
607 __mul_64x64_to_128 (P128
, C1
, ten2mk64
[ind
- 1]);
609 // if (0 < f* < 10^(-x)) then the result is a midpoint
610 // if floor(C*) is even then C* = floor(C*) - logical right
611 // shift; C* has p decimal digits, correct by Prop. 1)
612 // else if floor(C*) is odd C* = floor(C*)-1 (logical right
613 // shift; C* has p decimal digits, correct by Pr. 1)
615 // C* = floor(C*) (logical right shift; C has p decimal digits,
616 // correct by Property 1)
619 if (ind
- 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
622 fstar
.w
[0] = P128
.w
[0];
623 } else if (ind
- 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
624 shift
= shiftright128
[ind
- 1]; // 3 <= shift <= 63
625 res
= (P128
.w
[1] >> shift
);
626 fstar
.w
[1] = P128
.w
[1] & maskhigh128
[ind
- 1];
627 fstar
.w
[0] = P128
.w
[0];
629 // if (0 < f* < 10^(-x)) then the result is a midpoint
630 // since round_to_even, subtract 1 if current result is odd
631 if ((res
& 0x0000000000000001ull
) && (fstar
.w
[1] == 0)
632 && (fstar
.w
[0] < ten2mk64
[ind
- 1])) {
635 // set exponent to zero as it was negative before.
636 res
= x_sign
| 0x31c0000000000000ull
| res
;
638 } else { // if exp < 0 and q + exp < 0
639 // the result is +0 or -0
640 res
= x_sign
| 0x31c0000000000000ull
;
645 /*****************************************************************************
646 * BID64_round_integral_negative
647 *****************************************************************************/
649 #if DECIMAL_CALL_BY_REFERENCE
651 bid64_round_integral_negative (UINT64
* pres
,
653 px _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
658 bid64_round_integral_negative (UINT64 x _EXC_FLAGS_PARAM
659 _EXC_MASKS_PARAM _EXC_INFO_PARAM
) {
662 UINT64 res
= 0xbaddbaddbaddbaddull
;
664 int exp
; // unbiased exponent
665 // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
670 // UINT64 res is C* at first - represents up to 34 decimal digits ~ 113 bits
674 x_sign
= x
& MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
676 // check for NaNs and infinities
677 if ((x
& MASK_NAN
) == MASK_NAN
) { // check for NaN
678 if ((x
& 0x0003ffffffffffffull
) > 999999999999999ull)
679 x
= x
& 0xfe00000000000000ull
; // clear G6-G12 and the payload bits
681 x
= x
& 0xfe03ffffffffffffull
; // clear G6-G12
682 if ((x
& MASK_SNAN
) == MASK_SNAN
) { // SNaN
684 *pfpsf
|= INVALID_EXCEPTION
;
685 // return quiet (SNaN)
686 res
= x
& 0xfdffffffffffffffull
;
691 } else if ((x
& MASK_INF
) == MASK_INF
) { // check for Infinity
692 res
= x_sign
| 0x7800000000000000ull
;
696 if ((x
& MASK_STEERING_BITS
) == MASK_STEERING_BITS
) {
697 // if the steering bits are 11 (condition will be 0), then
698 // the exponent is G[0:w+1]
699 exp
= ((x
& MASK_BINARY_EXPONENT2
) >> 51) - 398;
700 C1
= (x
& MASK_BINARY_SIG2
) | MASK_BINARY_OR2
;
701 if (C1
> 9999999999999999ull) { // non-canonical
704 } else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS)
705 exp
= ((x
& MASK_BINARY_EXPONENT1
) >> 53) - 398;
706 C1
= (x
& MASK_BINARY_SIG1
);
709 // if x is 0 or non-canonical
713 res
= x_sign
| (((UINT64
) exp
+ 398) << 53);
716 // x is a finite non-zero number (not 0, non-canonical, or special)
718 // return 0 if (exp <= -p)
721 res
= 0xb1c0000000000001ull
;
723 res
= 0x31c0000000000000ull
;
727 // q = nr. of decimal digits in x (1 <= q <= 54)
728 // determine first the nr. of bits in x
729 if (C1
>= 0x0020000000000000ull
) { // x >= 2^53
731 } else { // if x < 2^53
732 tmp1
.d
= (double) C1
; // exact conversion
734 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
735 q
= nr_digits
[x_nr_bits
- 1].digits
;
737 q
= nr_digits
[x_nr_bits
- 1].digits1
;
738 if (C1
>= nr_digits
[x_nr_bits
- 1].threshold_lo
)
743 if (exp
>= 0) { // -exp <= 0
744 // the argument is an integer already
747 } else if ((q
+ exp
) > 0) { // exp < 0 and 1 <= -exp < q
748 // need to shift right -exp digits from the coefficient; the exp will be 0
749 ind
= -exp
; // 1 <= ind <= 16; ind is a synonym for 'x'
750 // chop off ind digits from the lower part of C1
751 // C1 fits in 64 bits
752 // calculate C* and f*
753 // C* is actually floor(C*) in this case
754 // C* and f* need shifting and masking, as shown by
755 // shiftright128[] and maskhigh128[]
757 // kx = 10^(-x) = ten2mk64[ind - 1]
759 // the approximation of 10^(-x) was rounded up to 64 bits
760 __mul_64x64_to_128 (P128
, C1
, ten2mk64
[ind
- 1]);
762 // C* = floor(C*) (logical right shift; C has p decimal digits,
763 // correct by Property 1)
764 // if (0 < f* < 10^(-x)) then the result is exact
767 if (ind
- 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
770 fstar
.w
[0] = P128
.w
[0];
771 } else if (ind
- 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
772 shift
= shiftright128
[ind
- 1]; // 3 <= shift <= 63
773 res
= (P128
.w
[1] >> shift
);
774 fstar
.w
[1] = P128
.w
[1] & maskhigh128
[ind
- 1];
775 fstar
.w
[0] = P128
.w
[0];
777 // if (f* > 10^(-x)) then the result is inexact
779 && ((fstar
.w
[1] != 0) || (fstar
.w
[0] >= ten2mk64
[ind
- 1]))) {
780 // if negative and not exact, increment magnitude
783 // set exponent to zero as it was negative before.
784 res
= x_sign
| 0x31c0000000000000ull
| res
;
786 } else { // if exp < 0 and q + exp <= 0
787 // the result is +0 or -1
789 res
= 0xb1c0000000000001ull
;
791 res
= 0x31c0000000000000ull
;
797 /*****************************************************************************
798 * BID64_round_integral_positive
799 ****************************************************************************/
801 #if DECIMAL_CALL_BY_REFERENCE
803 bid64_round_integral_positive (UINT64
* pres
,
805 px _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
810 bid64_round_integral_positive (UINT64 x _EXC_FLAGS_PARAM
811 _EXC_MASKS_PARAM _EXC_INFO_PARAM
) {
814 UINT64 res
= 0xbaddbaddbaddbaddull
;
816 int exp
; // unbiased exponent
817 // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
822 // UINT64 res is C* at first - represents up to 34 decimal digits ~ 113 bits
826 x_sign
= x
& MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
828 // check for NaNs and infinities
829 if ((x
& MASK_NAN
) == MASK_NAN
) { // check for NaN
830 if ((x
& 0x0003ffffffffffffull
) > 999999999999999ull)
831 x
= x
& 0xfe00000000000000ull
; // clear G6-G12 and the payload bits
833 x
= x
& 0xfe03ffffffffffffull
; // clear G6-G12
834 if ((x
& MASK_SNAN
) == MASK_SNAN
) { // SNaN
836 *pfpsf
|= INVALID_EXCEPTION
;
837 // return quiet (SNaN)
838 res
= x
& 0xfdffffffffffffffull
;
843 } else if ((x
& MASK_INF
) == MASK_INF
) { // check for Infinity
844 res
= x_sign
| 0x7800000000000000ull
;
848 if ((x
& MASK_STEERING_BITS
) == MASK_STEERING_BITS
) {
849 // if the steering bits are 11 (condition will be 0), then
850 // the exponent is G[0:w+1]
851 exp
= ((x
& MASK_BINARY_EXPONENT2
) >> 51) - 398;
852 C1
= (x
& MASK_BINARY_SIG2
) | MASK_BINARY_OR2
;
853 if (C1
> 9999999999999999ull) { // non-canonical
856 } else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS)
857 exp
= ((x
& MASK_BINARY_EXPONENT1
) >> 53) - 398;
858 C1
= (x
& MASK_BINARY_SIG1
);
861 // if x is 0 or non-canonical
865 res
= x_sign
| (((UINT64
) exp
+ 398) << 53);
868 // x is a finite non-zero number (not 0, non-canonical, or special)
870 // return 0 if (exp <= -p)
873 res
= 0xb1c0000000000000ull
;
875 res
= 0x31c0000000000001ull
;
879 // q = nr. of decimal digits in x (1 <= q <= 54)
880 // determine first the nr. of bits in x
881 if (C1
>= 0x0020000000000000ull
) { // x >= 2^53
883 } else { // if x < 2^53
884 tmp1
.d
= (double) C1
; // exact conversion
886 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
887 q
= nr_digits
[x_nr_bits
- 1].digits
;
889 q
= nr_digits
[x_nr_bits
- 1].digits1
;
890 if (C1
>= nr_digits
[x_nr_bits
- 1].threshold_lo
)
895 if (exp
>= 0) { // -exp <= 0
896 // the argument is an integer already
899 } else if ((q
+ exp
) > 0) { // exp < 0 and 1 <= -exp < q
900 // need to shift right -exp digits from the coefficient; the exp will be 0
901 ind
= -exp
; // 1 <= ind <= 16; ind is a synonym for 'x'
902 // chop off ind digits from the lower part of C1
903 // C1 fits in 64 bits
904 // calculate C* and f*
905 // C* is actually floor(C*) in this case
906 // C* and f* need shifting and masking, as shown by
907 // shiftright128[] and maskhigh128[]
909 // kx = 10^(-x) = ten2mk64[ind - 1]
911 // the approximation of 10^(-x) was rounded up to 64 bits
912 __mul_64x64_to_128 (P128
, C1
, ten2mk64
[ind
- 1]);
914 // C* = floor(C*) (logical right shift; C has p decimal digits,
915 // correct by Property 1)
916 // if (0 < f* < 10^(-x)) then the result is exact
919 if (ind
- 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
922 fstar
.w
[0] = P128
.w
[0];
923 } else if (ind
- 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
924 shift
= shiftright128
[ind
- 1]; // 3 <= shift <= 63
925 res
= (P128
.w
[1] >> shift
);
926 fstar
.w
[1] = P128
.w
[1] & maskhigh128
[ind
- 1];
927 fstar
.w
[0] = P128
.w
[0];
929 // if (f* > 10^(-x)) then the result is inexact
931 && ((fstar
.w
[1] != 0) || (fstar
.w
[0] >= ten2mk64
[ind
- 1]))) {
932 // if positive and not exact, increment magnitude
935 // set exponent to zero as it was negative before.
936 res
= x_sign
| 0x31c0000000000000ull
| res
;
938 } else { // if exp < 0 and q + exp <= 0
939 // the result is -0 or +1
941 res
= 0xb1c0000000000000ull
;
943 res
= 0x31c0000000000001ull
;
949 /*****************************************************************************
950 * BID64_round_integral_zero
951 ****************************************************************************/
953 #if DECIMAL_CALL_BY_REFERENCE
955 bid64_round_integral_zero (UINT64
* pres
,
957 px _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
962 bid64_round_integral_zero (UINT64 x _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
966 UINT64 res
= 0xbaddbaddbaddbaddull
;
968 int exp
; // unbiased exponent
969 // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
974 // UINT64 res is C* at first - represents up to 34 decimal digits ~ 113 bits
977 x_sign
= x
& MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
979 // check for NaNs and infinities
980 if ((x
& MASK_NAN
) == MASK_NAN
) { // check for NaN
981 if ((x
& 0x0003ffffffffffffull
) > 999999999999999ull)
982 x
= x
& 0xfe00000000000000ull
; // clear G6-G12 and the payload bits
984 x
= x
& 0xfe03ffffffffffffull
; // clear G6-G12
985 if ((x
& MASK_SNAN
) == MASK_SNAN
) { // SNaN
987 *pfpsf
|= INVALID_EXCEPTION
;
988 // return quiet (SNaN)
989 res
= x
& 0xfdffffffffffffffull
;
994 } else if ((x
& MASK_INF
) == MASK_INF
) { // check for Infinity
995 res
= x_sign
| 0x7800000000000000ull
;
999 if ((x
& MASK_STEERING_BITS
) == MASK_STEERING_BITS
) {
1000 // if the steering bits are 11 (condition will be 0), then
1001 // the exponent is G[0:w+1]
1002 exp
= ((x
& MASK_BINARY_EXPONENT2
) >> 51) - 398;
1003 C1
= (x
& MASK_BINARY_SIG2
) | MASK_BINARY_OR2
;
1004 if (C1
> 9999999999999999ull) { // non-canonical
1007 } else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS)
1008 exp
= ((x
& MASK_BINARY_EXPONENT1
) >> 53) - 398;
1009 C1
= (x
& MASK_BINARY_SIG1
);
1012 // if x is 0 or non-canonical
1016 res
= x_sign
| (((UINT64
) exp
+ 398) << 53);
1019 // x is a finite non-zero number (not 0, non-canonical, or special)
1021 // return 0 if (exp <= -p)
1023 res
= x_sign
| 0x31c0000000000000ull
;
1026 // q = nr. of decimal digits in x (1 <= q <= 54)
1027 // determine first the nr. of bits in x
1028 if (C1
>= 0x0020000000000000ull
) { // x >= 2^53
1030 } else { // if x < 2^53
1031 tmp1
.d
= (double) C1
; // exact conversion
1033 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1034 q
= nr_digits
[x_nr_bits
- 1].digits
;
1036 q
= nr_digits
[x_nr_bits
- 1].digits1
;
1037 if (C1
>= nr_digits
[x_nr_bits
- 1].threshold_lo
)
1042 if (exp
>= 0) { // -exp <= 0
1043 // the argument is an integer already
1046 } else if ((q
+ exp
) >= 0) { // exp < 0 and 1 <= -exp <= q
1047 // need to shift right -exp digits from the coefficient; the exp will be 0
1048 ind
= -exp
; // 1 <= ind <= 16; ind is a synonym for 'x'
1049 // chop off ind digits from the lower part of C1
1050 // C1 fits in 127 bits
1051 // calculate C* and f*
1052 // C* is actually floor(C*) in this case
1053 // C* and f* need shifting and masking, as shown by
1054 // shiftright128[] and maskhigh128[]
1056 // kx = 10^(-x) = ten2mk64[ind - 1]
1057 // C* = C1 * 10^(-x)
1058 // the approximation of 10^(-x) was rounded up to 64 bits
1059 __mul_64x64_to_128 (P128
, C1
, ten2mk64
[ind
- 1]);
1061 // C* = floor(C*) (logical right shift; C has p decimal digits,
1062 // correct by Property 1)
1063 // if (0 < f* < 10^(-x)) then the result is exact
1064 // n = C* * 10^(e+x)
1066 if (ind
- 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
1068 // redundant fstar.w[1] = 0;
1069 // redundant fstar.w[0] = P128.w[0];
1070 } else if (ind
- 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
1071 shift
= shiftright128
[ind
- 1]; // 3 <= shift <= 63
1072 res
= (P128
.w
[1] >> shift
);
1073 // redundant fstar.w[1] = P128.w[1] & maskhigh128[ind - 1];
1074 // redundant fstar.w[0] = P128.w[0];
1076 // if (f* > 10^(-x)) then the result is inexact
1077 // if ((fstar.w[1] != 0) || (fstar.w[0] >= ten2mk64[ind-1])){
1080 // set exponent to zero as it was negative before.
1081 res
= x_sign
| 0x31c0000000000000ull
| res
;
1083 } else { // if exp < 0 and q + exp < 0
1084 // the result is +0 or -0
1085 res
= x_sign
| 0x31c0000000000000ull
;
1090 /*****************************************************************************
1091 * BID64_round_integral_nearest_away
1092 ****************************************************************************/
1094 #if DECIMAL_CALL_BY_REFERENCE
1096 bid64_round_integral_nearest_away (UINT64
* pres
,
1098 px _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
1103 bid64_round_integral_nearest_away (UINT64 x _EXC_FLAGS_PARAM
1104 _EXC_MASKS_PARAM _EXC_INFO_PARAM
) {
1107 UINT64 res
= 0xbaddbaddbaddbaddull
;
1109 int exp
; // unbiased exponent
1110 // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
1111 BID_UI64DOUBLE tmp1
;
1117 x_sign
= x
& MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
1119 // check for NaNs and infinities
1120 if ((x
& MASK_NAN
) == MASK_NAN
) { // check for NaN
1121 if ((x
& 0x0003ffffffffffffull
) > 999999999999999ull)
1122 x
= x
& 0xfe00000000000000ull
; // clear G6-G12 and the payload bits
1124 x
= x
& 0xfe03ffffffffffffull
; // clear G6-G12
1125 if ((x
& MASK_SNAN
) == MASK_SNAN
) { // SNaN
1127 *pfpsf
|= INVALID_EXCEPTION
;
1128 // return quiet (SNaN)
1129 res
= x
& 0xfdffffffffffffffull
;
1134 } else if ((x
& MASK_INF
) == MASK_INF
) { // check for Infinity
1135 res
= x_sign
| 0x7800000000000000ull
;
1139 if ((x
& MASK_STEERING_BITS
) == MASK_STEERING_BITS
) {
1140 // if the steering bits are 11 (condition will be 0), then
1141 // the exponent is G[0:w+1]
1142 exp
= ((x
& MASK_BINARY_EXPONENT2
) >> 51) - 398;
1143 C1
= (x
& MASK_BINARY_SIG2
) | MASK_BINARY_OR2
;
1144 if (C1
> 9999999999999999ull) { // non-canonical
1147 } else { // if ((x & MASK_STEERING_BITS) != MASK_STEERING_BITS)
1148 exp
= ((x
& MASK_BINARY_EXPONENT1
) >> 53) - 398;
1149 C1
= (x
& MASK_BINARY_SIG1
);
1152 // if x is 0 or non-canonical
1156 res
= x_sign
| (((UINT64
) exp
+ 398) << 53);
1159 // x is a finite non-zero number (not 0, non-canonical, or special)
1161 // return 0 if (exp <= -(p+1))
1163 res
= x_sign
| 0x31c0000000000000ull
;
1166 // q = nr. of decimal digits in x (1 <= q <= 54)
1167 // determine first the nr. of bits in x
1168 if (C1
>= 0x0020000000000000ull
) { // x >= 2^53
1170 } else { // if x < 2^53
1171 tmp1
.d
= (double) C1
; // exact conversion
1173 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1174 q
= nr_digits
[x_nr_bits
- 1].digits
;
1176 q
= nr_digits
[x_nr_bits
- 1].digits1
;
1177 if (C1
>= nr_digits
[x_nr_bits
- 1].threshold_lo
)
1182 if (exp
>= 0) { // -exp <= 0
1183 // the argument is an integer already
1186 } else if ((q
+ exp
) >= 0) { // exp < 0 and 1 <= -exp <= q
1187 // need to shift right -exp digits from the coefficient; the exp will be 0
1188 ind
= -exp
; // 1 <= ind <= 16; ind is a synonym for 'x'
1189 // chop off ind digits from the lower part of C1
1190 // C1 = C1 + 1/2 * 10^x where the result C1 fits in 64 bits
1191 // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
1192 C1
= C1
+ midpoint64
[ind
- 1];
1193 // calculate C* and f*
1194 // C* is actually floor(C*) in this case
1195 // C* and f* need shifting and masking, as shown by
1196 // shiftright128[] and maskhigh128[]
1198 // kx = 10^(-x) = ten2mk64[ind - 1]
1199 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
1200 // the approximation of 10^(-x) was rounded up to 64 bits
1201 __mul_64x64_to_128 (P128
, C1
, ten2mk64
[ind
- 1]);
1203 // if (0 < f* < 10^(-x)) then the result is a midpoint
1204 // C* = floor(C*) - logical right shift; C* has p decimal digits,
1205 // correct by Prop. 1)
1207 // C* = floor(C*) (logical right shift; C has p decimal digits,
1208 // correct by Property 1)
1209 // n = C* * 10^(e+x)
1211 if (ind
- 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
1213 } else if (ind
- 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
1214 shift
= shiftright128
[ind
- 1]; // 3 <= shift <= 63
1215 res
= (P128
.w
[1] >> shift
);
1217 // midpoints are already rounded correctly
1218 // set exponent to zero as it was negative before.
1219 res
= x_sign
| 0x31c0000000000000ull
| res
;
1221 } else { // if exp < 0 and q + exp < 0
1222 // the result is +0 or -0
1223 res
= x_sign
| 0x31c0000000000000ull
;