1 /* Copyright (C) 2007 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 2, or (at your option) any later
10 In addition to the permissions in the GNU General Public License, the
11 Free Software Foundation gives you unlimited permission to link the
12 compiled version of this file into combinations with other programs,
13 and to distribute those combinations without any restriction coming
14 from the use of this file. (The General Public License restrictions
15 do apply in other respects; for example, they cover modification of
16 the file, and distribution when not linked into a combine
19 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
20 WARRANTY; without even the implied warranty of MERCHANTABILITY or
21 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
24 You should have received a copy of the GNU General Public License
25 along with GCC; see the file COPYING. If not, write to the Free
26 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
31 #include "bid_internal.h"
33 /*****************************************************************************
34 * BID128_round_integral_exact
35 ****************************************************************************/
37 BID128_FUNCTION_ARG1 (bid128_round_integral_exact
, x
)
39 UINT128 res
= { {0xbaddbaddbaddbaddull
, 0xbaddbaddbaddbaddull
}
43 int exp
; // unbiased exponent
44 // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
47 unsigned int x_nr_bits
;
53 // check for NaN or Infinity
54 if ((x
.w
[1] & MASK_SPECIAL
) == MASK_SPECIAL
) {
56 if ((x
.w
[1] & MASK_NAN
) == MASK_NAN
) { // x is NAN
57 // if x = NaN, then res = Q (x)
58 // check first for non-canonical NaN payload
59 if (((x
.w
[1] & 0x00003fffffffffffull
) > 0x0000314dc6448d93ull
) ||
60 (((x
.w
[1] & 0x00003fffffffffffull
) == 0x0000314dc6448d93ull
) &&
61 (x
.w
[0] > 0x38c15b09ffffffffull
))) {
62 x
.w
[1] = x
.w
[1] & 0xffffc00000000000ull
;
65 if ((x
.w
[1] & MASK_SNAN
) == MASK_SNAN
) { // x is SNAN
67 *pfpsf
|= INVALID_EXCEPTION
;
69 res
.w
[1] = x
.w
[1] & 0xfc003fffffffffffull
; // clear out also G[6]-G[16]
73 res
.w
[1] = x
.w
[1] & 0xfc003fffffffffffull
; // clear out G[6]-G[16]
77 } else { // x is not a NaN, so it must be infinity
78 if ((x
.w
[1] & MASK_SIGN
) == 0x0ull
) { // x is +inf
80 res
.w
[1] = 0x7800000000000000ull
;
81 res
.w
[0] = 0x0000000000000000ull
;
84 res
.w
[1] = 0xf800000000000000ull
;
85 res
.w
[0] = 0x0000000000000000ull
;
91 x_sign
= x
.w
[1] & MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
92 C1
.w
[1] = x
.w
[1] & MASK_COEFF
;
95 // check for non-canonical values (treated as zero)
96 if ((x
.w
[1] & 0x6000000000000000ull
) == 0x6000000000000000ull
) { // G0_G1=11
98 x_exp
= (x
.w
[1] << 2) & MASK_EXP
; // biased and shifted left 49 bits
99 C1
.w
[1] = 0; // significand high
100 C1
.w
[0] = 0; // significand low
101 } else { // G0_G1 != 11
102 x_exp
= x
.w
[1] & MASK_EXP
; // biased and shifted left 49 bits
103 if (C1
.w
[1] > 0x0001ed09bead87c0ull
||
104 (C1
.w
[1] == 0x0001ed09bead87c0ull
105 && C1
.w
[0] > 0x378d8e63ffffffffull
)) {
106 // x is non-canonical if coefficient is larger than 10^34 -1
109 } else { // canonical
114 // test for input equal to zero
115 if ((C1
.w
[1] == 0x0ull
) && (C1
.w
[0] == 0x0ull
)) {
117 // return 0 preserving the sign bit and the preferred exponent
119 if (x_exp
<= (0x1820ull
<< 49)) {
120 res
.w
[1] = (x
.w
[1] & 0x8000000000000000ull
) | 0x3040000000000000ull
;
122 res
.w
[1] = x_sign
| x_exp
;
124 res
.w
[0] = 0x0000000000000000ull
;
127 // x is not special and is not zero
130 case ROUNDING_TO_NEAREST
:
131 case ROUNDING_TIES_AWAY
:
132 // if (exp <= -(p+1)) return 0.0
133 if (x_exp
<= 0x2ffa000000000000ull
) { // 0x2ffa000000000000ull == -35
134 res
.w
[1] = x_sign
| 0x3040000000000000ull
;
135 res
.w
[0] = 0x0000000000000000ull
;
136 *pfpsf
|= INEXACT_EXCEPTION
;
141 // if (exp <= -p) return -1.0 or +0.0
142 if (x_exp
<= 0x2ffc000000000000ull
) { // 0x2ffa000000000000ull == -34
144 // if negative, return negative 1, because we know coefficient
145 // is non-zero (would have been caught above)
146 res
.w
[1] = 0xb040000000000000ull
;
147 res
.w
[0] = 0x0000000000000001ull
;
149 // if positive, return positive 0, because we know coefficient is
150 // non-zero (would have been caught above)
151 res
.w
[1] = 0x3040000000000000ull
;
152 res
.w
[0] = 0x0000000000000000ull
;
154 *pfpsf
|= INEXACT_EXCEPTION
;
159 // if (exp <= -p) return -0.0 or +1.0
160 if (x_exp
<= 0x2ffc000000000000ull
) { // 0x2ffc000000000000ull == -34
162 // if negative, return negative 0, because we know the coefficient
163 // is non-zero (would have been caught above)
164 res
.w
[1] = 0xb040000000000000ull
;
165 res
.w
[0] = 0x0000000000000000ull
;
167 // if positive, return positive 1, because we know coefficient is
168 // non-zero (would have been caught above)
169 res
.w
[1] = 0x3040000000000000ull
;
170 res
.w
[0] = 0x0000000000000001ull
;
172 *pfpsf
|= INEXACT_EXCEPTION
;
176 case ROUNDING_TO_ZERO
:
177 // if (exp <= -p) return -0.0 or +0.0
178 if (x_exp
<= 0x2ffc000000000000ull
) { // 0x2ffc000000000000ull == -34
179 res
.w
[1] = x_sign
| 0x3040000000000000ull
;
180 res
.w
[0] = 0x0000000000000000ull
;
181 *pfpsf
|= INEXACT_EXCEPTION
;
187 // q = nr. of decimal digits in x
188 // determine first the nr. of bits in x
190 if (C1
.w
[0] >= 0x0020000000000000ull
) { // x >= 2^53
191 // split the 64-bit value in two 32-bit halves to avoid rounding errors
192 if (C1
.w
[0] >= 0x0000000100000000ull
) { // x >= 2^32
193 tmp1
.d
= (double) (C1
.w
[0] >> 32); // exact conversion
195 33 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
197 tmp1
.d
= (double) (C1
.w
[0]); // exact conversion
199 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
201 } else { // if x < 2^53
202 tmp1
.d
= (double) C1
.w
[0]; // exact conversion
204 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
206 } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
207 tmp1
.d
= (double) C1
.w
[1]; // exact conversion
209 65 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
212 q
= nr_digits
[x_nr_bits
- 1].digits
;
214 q
= nr_digits
[x_nr_bits
- 1].digits1
;
215 if (C1
.w
[1] > nr_digits
[x_nr_bits
- 1].threshold_hi
||
216 (C1
.w
[1] == nr_digits
[x_nr_bits
- 1].threshold_hi
&&
217 C1
.w
[0] >= nr_digits
[x_nr_bits
- 1].threshold_lo
))
220 exp
= (x_exp
>> 49) - 6176;
221 if (exp
>= 0) { // -exp <= 0
222 // the argument is an integer already
229 case ROUNDING_TO_NEAREST
:
230 if ((q
+ exp
) >= 0) { // exp < 0 and 1 <= -exp <= q
231 // need to shift right -exp digits from the coefficient; exp will be 0
232 ind
= -exp
; // 1 <= ind <= 34; ind is a synonym for 'x'
233 // chop off ind digits from the lower part of C1
234 // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits
237 C1
.w
[0] = C1
.w
[0] + midpoint64
[ind
- 1];
239 C1
.w
[0] = C1
.w
[0] + midpoint128
[ind
- 20].w
[0];
240 C1
.w
[1] = C1
.w
[1] + midpoint128
[ind
- 20].w
[1];
244 // calculate C* and f*
245 // C* is actually floor(C*) in this case
246 // C* and f* need shifting and masking, as shown by
247 // shiftright128[] and maskhigh128[]
249 // kx = 10^(-x) = ten2mk128[ind - 1]
250 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
251 // the approximation of 10^(-x) was rounded up to 118 bits
252 __mul_128x128_to_256 (P256
, C1
, ten2mk128
[ind
- 1]);
253 // determine the value of res and fstar
255 // determine inexactness of the rounding of C*
256 // if (0 < f* - 1/2 < 10^(-x)) then
257 // the result is exact
258 // else // if (f* - 1/2 > T*) then
259 // the result is inexact
260 // Note: we are going to use ten2mk128[] instead of ten2mk128trunc[]
262 if (ind
- 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
263 // redundant shift = shiftright128[ind - 1]; // shift = 0
264 res
.w
[1] = P256
.w
[3];
265 res
.w
[0] = P256
.w
[2];
266 // redundant fstar.w[3] = 0;
267 // redundant fstar.w[2] = 0;
268 fstar
.w
[1] = P256
.w
[1];
269 fstar
.w
[0] = P256
.w
[0];
270 // fraction f* < 10^(-x) <=> midpoint
271 // f* is in the right position to be compared with
272 // 10^(-x) from ten2mk128[]
273 // if 0 < fstar < 10^(-x), subtract 1 if odd (for rounding to even)
274 if ((res
.w
[0] & 0x0000000000000001ull
) && // is result odd?
275 ((fstar
.w
[1] < (ten2mk128
[ind
- 1].w
[1]))
276 || ((fstar
.w
[1] == ten2mk128
[ind
- 1].w
[1])
277 && (fstar
.w
[0] < ten2mk128
[ind
- 1].w
[0])))) {
278 // subract 1 to make even
279 if (res
.w
[0]-- == 0) {
283 if (fstar
.w
[1] > 0x8000000000000000ull
||
284 (fstar
.w
[1] == 0x8000000000000000ull
285 && fstar
.w
[0] > 0x0ull
)) {
286 // f* > 1/2 and the result may be exact
287 tmp64
= fstar
.w
[1] - 0x8000000000000000ull
; // f* - 1/2
288 if (tmp64
> ten2mk128
[ind
- 1].w
[1] ||
289 (tmp64
== ten2mk128
[ind
- 1].w
[1] &&
290 fstar
.w
[0] >= ten2mk128
[ind
- 1].w
[0])) {
291 // set the inexact flag
292 *pfpsf
|= INEXACT_EXCEPTION
;
293 } // else the result is exact
294 } else { // the result is inexact; f2* <= 1/2
295 // set the inexact flag
296 *pfpsf
|= INEXACT_EXCEPTION
;
298 } else if (ind
- 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
299 shift
= shiftright128
[ind
- 1]; // 3 <= shift <= 63
300 res
.w
[1] = (P256
.w
[3] >> shift
);
301 res
.w
[0] = (P256
.w
[3] << (64 - shift
)) | (P256
.w
[2] >> shift
);
302 // redundant fstar.w[3] = 0;
303 fstar
.w
[2] = P256
.w
[2] & maskhigh128
[ind
- 1];
304 fstar
.w
[1] = P256
.w
[1];
305 fstar
.w
[0] = P256
.w
[0];
306 // fraction f* < 10^(-x) <=> midpoint
307 // f* is in the right position to be compared with
308 // 10^(-x) from ten2mk128[]
309 if ((res
.w
[0] & 0x0000000000000001ull
) && // is result odd?
310 fstar
.w
[2] == 0 && (fstar
.w
[1] < ten2mk128
[ind
- 1].w
[1] ||
311 (fstar
.w
[1] == ten2mk128
[ind
- 1].w
[1] &&
312 fstar
.w
[0] < ten2mk128
[ind
- 1].w
[0]))) {
313 // subract 1 to make even
314 if (res
.w
[0]-- == 0) {
318 if (fstar
.w
[2] > onehalf128
[ind
- 1] ||
319 (fstar
.w
[2] == onehalf128
[ind
- 1]
320 && (fstar
.w
[1] || fstar
.w
[0]))) {
321 // f2* > 1/2 and the result may be exact
322 // Calculate f2* - 1/2
323 tmp64
= fstar
.w
[2] - onehalf128
[ind
- 1];
324 if (tmp64
|| fstar
.w
[1] > ten2mk128
[ind
- 1].w
[1] ||
325 (fstar
.w
[1] == ten2mk128
[ind
- 1].w
[1] &&
326 fstar
.w
[0] >= ten2mk128
[ind
- 1].w
[0])) {
327 // set the inexact flag
328 *pfpsf
|= INEXACT_EXCEPTION
;
329 } // else the result is exact
330 } else { // the result is inexact; f2* <= 1/2
331 // set the inexact flag
332 *pfpsf
|= INEXACT_EXCEPTION
;
334 } else { // 22 <= ind - 1 <= 33
335 shift
= shiftright128
[ind
- 1] - 64; // 2 <= shift <= 38
337 res
.w
[0] = P256
.w
[3] >> shift
;
338 fstar
.w
[3] = P256
.w
[3] & maskhigh128
[ind
- 1];
339 fstar
.w
[2] = P256
.w
[2];
340 fstar
.w
[1] = P256
.w
[1];
341 fstar
.w
[0] = P256
.w
[0];
342 // fraction f* < 10^(-x) <=> midpoint
343 // f* is in the right position to be compared with
344 // 10^(-x) from ten2mk128[]
345 if ((res
.w
[0] & 0x0000000000000001ull
) && // is result odd?
346 fstar
.w
[3] == 0 && fstar
.w
[2] == 0 &&
347 (fstar
.w
[1] < ten2mk128
[ind
- 1].w
[1] ||
348 (fstar
.w
[1] == ten2mk128
[ind
- 1].w
[1] &&
349 fstar
.w
[0] < ten2mk128
[ind
- 1].w
[0]))) {
350 // subract 1 to make even
351 if (res
.w
[0]-- == 0) {
355 if (fstar
.w
[3] > onehalf128
[ind
- 1] ||
356 (fstar
.w
[3] == onehalf128
[ind
- 1] &&
357 (fstar
.w
[2] || fstar
.w
[1] || fstar
.w
[0]))) {
358 // f2* > 1/2 and the result may be exact
359 // Calculate f2* - 1/2
360 tmp64
= fstar
.w
[3] - onehalf128
[ind
- 1];
361 if (tmp64
|| fstar
.w
[2] || fstar
.w
[1] > ten2mk128
[ind
- 1].w
[1]
362 || (fstar
.w
[1] == ten2mk128
[ind
- 1].w
[1]
363 && fstar
.w
[0] >= ten2mk128
[ind
- 1].w
[0])) {
364 // set the inexact flag
365 *pfpsf
|= INEXACT_EXCEPTION
;
366 } // else the result is exact
367 } else { // the result is inexact; f2* <= 1/2
368 // set the inexact flag
369 *pfpsf
|= INEXACT_EXCEPTION
;
372 res
.w
[1] = x_sign
| 0x3040000000000000ull
| res
.w
[1];
374 } else { // if ((q + exp) < 0) <=> q < -exp
375 // the result is +0 or -0
376 res
.w
[1] = x_sign
| 0x3040000000000000ull
;
377 res
.w
[0] = 0x0000000000000000ull
;
378 *pfpsf
|= INEXACT_EXCEPTION
;
382 case ROUNDING_TIES_AWAY
:
383 if ((q
+ exp
) >= 0) { // exp < 0 and 1 <= -exp <= q
384 // need to shift right -exp digits from the coefficient; exp will be 0
385 ind
= -exp
; // 1 <= ind <= 34; ind is a synonym for 'x'
386 // chop off ind digits from the lower part of C1
387 // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits
390 C1
.w
[0] = C1
.w
[0] + midpoint64
[ind
- 1];
392 C1
.w
[0] = C1
.w
[0] + midpoint128
[ind
- 20].w
[0];
393 C1
.w
[1] = C1
.w
[1] + midpoint128
[ind
- 20].w
[1];
397 // calculate C* and f*
398 // C* is actually floor(C*) in this case
399 // C* and f* need shifting and masking, as shown by
400 // shiftright128[] and maskhigh128[]
402 // kx = 10^(-x) = ten2mk128[ind - 1]
403 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
404 // the approximation of 10^(-x) was rounded up to 118 bits
405 __mul_128x128_to_256 (P256
, C1
, ten2mk128
[ind
- 1]);
406 // the top Ex bits of 10^(-x) are T* = ten2mk128trunc[ind], e.g.
407 // if x=1, T*=ten2mk128trunc[0]=0x19999999999999999999999999999999
408 // if (0 < f* < 10^(-x)) then the result is a midpoint
409 // if floor(C*) is even then C* = floor(C*) - logical right
410 // shift; C* has p decimal digits, correct by Prop. 1)
411 // else if floor(C*) is odd C* = floor(C*)-1 (logical right
412 // shift; C* has p decimal digits, correct by Pr. 1)
414 // C* = floor(C*) (logical right shift; C has p decimal digits,
415 // correct by Property 1)
418 // determine also the inexactness of the rounding of C*
419 // if (0 < f* - 1/2 < 10^(-x)) then
420 // the result is exact
421 // else // if (f* - 1/2 > T*) then
422 // the result is inexact
423 // Note: we are going to use ten2mk128[] instead of ten2mk128trunc[]
424 // shift right C* by Ex-128 = shiftright128[ind]
425 if (ind
- 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
426 // redundant shift = shiftright128[ind - 1]; // shift = 0
427 res
.w
[1] = P256
.w
[3];
428 res
.w
[0] = P256
.w
[2];
429 // redundant fstar.w[3] = 0;
430 // redundant fstar.w[2] = 0;
431 fstar
.w
[1] = P256
.w
[1];
432 fstar
.w
[0] = P256
.w
[0];
433 if (fstar
.w
[1] > 0x8000000000000000ull
||
434 (fstar
.w
[1] == 0x8000000000000000ull
435 && fstar
.w
[0] > 0x0ull
)) {
436 // f* > 1/2 and the result may be exact
437 tmp64
= fstar
.w
[1] - 0x8000000000000000ull
; // f* - 1/2
438 if ((tmp64
> ten2mk128
[ind
- 1].w
[1] ||
439 (tmp64
== ten2mk128
[ind
- 1].w
[1] &&
440 fstar
.w
[0] >= ten2mk128
[ind
- 1].w
[0]))) {
441 // set the inexact flag
442 *pfpsf
|= INEXACT_EXCEPTION
;
443 } // else the result is exact
444 } else { // the result is inexact; f2* <= 1/2
445 // set the inexact flag
446 *pfpsf
|= INEXACT_EXCEPTION
;
448 } else if (ind
- 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
449 shift
= shiftright128
[ind
- 1]; // 3 <= shift <= 63
450 res
.w
[1] = (P256
.w
[3] >> shift
);
451 res
.w
[0] = (P256
.w
[3] << (64 - shift
)) | (P256
.w
[2] >> shift
);
452 // redundant fstar.w[3] = 0;
453 fstar
.w
[2] = P256
.w
[2] & maskhigh128
[ind
- 1];
454 fstar
.w
[1] = P256
.w
[1];
455 fstar
.w
[0] = P256
.w
[0];
456 if (fstar
.w
[2] > onehalf128
[ind
- 1] ||
457 (fstar
.w
[2] == onehalf128
[ind
- 1]
458 && (fstar
.w
[1] || fstar
.w
[0]))) {
459 // f2* > 1/2 and the result may be exact
460 // Calculate f2* - 1/2
461 tmp64
= fstar
.w
[2] - onehalf128
[ind
- 1];
462 if (tmp64
|| fstar
.w
[1] > ten2mk128
[ind
- 1].w
[1] ||
463 (fstar
.w
[1] == ten2mk128
[ind
- 1].w
[1] &&
464 fstar
.w
[0] >= ten2mk128
[ind
- 1].w
[0])) {
465 // set the inexact flag
466 *pfpsf
|= INEXACT_EXCEPTION
;
467 } // else the result is exact
468 } else { // the result is inexact; f2* <= 1/2
469 // set the inexact flag
470 *pfpsf
|= INEXACT_EXCEPTION
;
472 } else { // 22 <= ind - 1 <= 33
473 shift
= shiftright128
[ind
- 1] - 64; // 2 <= shift <= 38
475 res
.w
[0] = P256
.w
[3] >> shift
;
476 fstar
.w
[3] = P256
.w
[3] & maskhigh128
[ind
- 1];
477 fstar
.w
[2] = P256
.w
[2];
478 fstar
.w
[1] = P256
.w
[1];
479 fstar
.w
[0] = P256
.w
[0];
480 if (fstar
.w
[3] > onehalf128
[ind
- 1] ||
481 (fstar
.w
[3] == onehalf128
[ind
- 1] &&
482 (fstar
.w
[2] || fstar
.w
[1] || fstar
.w
[0]))) {
483 // f2* > 1/2 and the result may be exact
484 // Calculate f2* - 1/2
485 tmp64
= fstar
.w
[3] - onehalf128
[ind
- 1];
486 if (tmp64
|| fstar
.w
[2] || fstar
.w
[1] > ten2mk128
[ind
- 1].w
[1]
487 || (fstar
.w
[1] == ten2mk128
[ind
- 1].w
[1]
488 && fstar
.w
[0] >= ten2mk128
[ind
- 1].w
[0])) {
489 // set the inexact flag
490 *pfpsf
|= INEXACT_EXCEPTION
;
491 } // else the result is exact
492 } else { // the result is inexact; f2* <= 1/2
493 // set the inexact flag
494 *pfpsf
|= INEXACT_EXCEPTION
;
497 // if the result was a midpoint, it was already rounded away from zero
498 res
.w
[1] |= x_sign
| 0x3040000000000000ull
;
500 } else { // if ((q + exp) < 0) <=> q < -exp
501 // the result is +0 or -0
502 res
.w
[1] = x_sign
| 0x3040000000000000ull
;
503 res
.w
[0] = 0x0000000000000000ull
;
504 *pfpsf
|= INEXACT_EXCEPTION
;
509 if ((q
+ exp
) > 0) { // exp < 0 and 1 <= -exp < q
510 // need to shift right -exp digits from the coefficient; exp will be 0
511 ind
= -exp
; // 1 <= ind <= 34; ind is a synonym for 'x'
512 // (number of digits to be chopped off)
513 // chop off ind digits from the lower part of C1
514 // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
515 // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
516 // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
517 // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
520 // C1.w[0] = C1.w[0] + midpoint64[ind - 1];
522 // C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
523 // C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
525 // if (C1.w[0] < tmp64) C1.w[1]++;
526 // if carry-out from C1.w[0], increment C1.w[1]
527 // calculate C* and f*
528 // C* is actually floor(C*) in this case
529 // C* and f* need shifting and masking, as shown by
530 // shiftright128[] and maskhigh128[]
532 // kx = 10^(-x) = ten2mk128[ind - 1]
533 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
534 // the approximation of 10^(-x) was rounded up to 118 bits
535 __mul_128x128_to_256 (P256
, C1
, ten2mk128
[ind
- 1]);
536 if (ind
- 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
537 res
.w
[1] = P256
.w
[3];
538 res
.w
[0] = P256
.w
[2];
539 // redundant fstar.w[3] = 0;
540 // redundant fstar.w[2] = 0;
541 // redundant fstar.w[1] = P256.w[1];
542 // redundant fstar.w[0] = P256.w[0];
543 // fraction f* > 10^(-x) <=> inexact
544 // f* is in the right position to be compared with
545 // 10^(-x) from ten2mk128[]
546 if ((P256
.w
[1] > ten2mk128
[ind
- 1].w
[1])
547 || (P256
.w
[1] == ten2mk128
[ind
- 1].w
[1]
548 && (P256
.w
[0] >= ten2mk128
[ind
- 1].w
[0]))) {
549 *pfpsf
|= INEXACT_EXCEPTION
;
550 // if positive, the truncated value is already the correct result
551 if (x_sign
) { // if negative
552 if (++res
.w
[0] == 0) {
557 } else if (ind
- 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
558 shift
= shiftright128
[ind
- 1]; // 0 <= shift <= 102
559 res
.w
[1] = (P256
.w
[3] >> shift
);
560 res
.w
[0] = (P256
.w
[3] << (64 - shift
)) | (P256
.w
[2] >> shift
);
561 // redundant fstar.w[3] = 0;
562 fstar
.w
[2] = P256
.w
[2] & maskhigh128
[ind
- 1];
563 fstar
.w
[1] = P256
.w
[1];
564 fstar
.w
[0] = P256
.w
[0];
565 // fraction f* > 10^(-x) <=> inexact
566 // f* is in the right position to be compared with
567 // 10^(-x) from ten2mk128[]
568 if (fstar
.w
[2] || fstar
.w
[1] > ten2mk128
[ind
- 1].w
[1] ||
569 (fstar
.w
[1] == ten2mk128
[ind
- 1].w
[1] &&
570 fstar
.w
[0] >= ten2mk128
[ind
- 1].w
[0])) {
571 *pfpsf
|= INEXACT_EXCEPTION
;
572 // if positive, the truncated value is already the correct result
573 if (x_sign
) { // if negative
574 if (++res
.w
[0] == 0) {
579 } else { // 22 <= ind - 1 <= 33
580 shift
= shiftright128
[ind
- 1] - 64; // 2 <= shift <= 38
582 res
.w
[0] = P256
.w
[3] >> shift
;
583 fstar
.w
[3] = P256
.w
[3] & maskhigh128
[ind
- 1];
584 fstar
.w
[2] = P256
.w
[2];
585 fstar
.w
[1] = P256
.w
[1];
586 fstar
.w
[0] = P256
.w
[0];
587 // fraction f* > 10^(-x) <=> inexact
588 // f* is in the right position to be compared with
589 // 10^(-x) from ten2mk128[]
590 if (fstar
.w
[3] || fstar
.w
[2]
591 || fstar
.w
[1] > ten2mk128
[ind
- 1].w
[1]
592 || (fstar
.w
[1] == ten2mk128
[ind
- 1].w
[1]
593 && fstar
.w
[0] >= ten2mk128
[ind
- 1].w
[0])) {
594 *pfpsf
|= INEXACT_EXCEPTION
;
595 // if positive, the truncated value is already the correct result
596 if (x_sign
) { // if negative
597 if (++res
.w
[0] == 0) {
603 res
.w
[1] = x_sign
| 0x3040000000000000ull
| res
.w
[1];
605 } else { // if exp < 0 and q + exp <= 0
606 if (x_sign
) { // negative rounds down to -1.0
607 res
.w
[1] = 0xb040000000000000ull
;
608 res
.w
[0] = 0x0000000000000001ull
;
609 } else { // positive rpunds down to +0.0
610 res
.w
[1] = 0x3040000000000000ull
;
611 res
.w
[0] = 0x0000000000000000ull
;
613 *pfpsf
|= INEXACT_EXCEPTION
;
618 if ((q
+ exp
) > 0) { // exp < 0 and 1 <= -exp < q
619 // need to shift right -exp digits from the coefficient; exp will be 0
620 ind
= -exp
; // 1 <= ind <= 34; ind is a synonym for 'x'
621 // (number of digits to be chopped off)
622 // chop off ind digits from the lower part of C1
623 // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
624 // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
625 // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
626 // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
629 // C1.w[0] = C1.w[0] + midpoint64[ind - 1];
631 // C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
632 // C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
634 // if (C1.w[0] < tmp64) C1.w[1]++;
635 // if carry-out from C1.w[0], increment C1.w[1]
636 // calculate C* and f*
637 // C* is actually floor(C*) in this case
638 // C* and f* need shifting and masking, as shown by
639 // shiftright128[] and maskhigh128[]
641 // kx = 10^(-x) = ten2mk128[ind - 1]
643 // the approximation of 10^(-x) was rounded up to 118 bits
644 __mul_128x128_to_256 (P256
, C1
, ten2mk128
[ind
- 1]);
645 if (ind
- 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
646 res
.w
[1] = P256
.w
[3];
647 res
.w
[0] = P256
.w
[2];
648 // redundant fstar.w[3] = 0;
649 // redundant fstar.w[2] = 0;
650 // redundant fstar.w[1] = P256.w[1];
651 // redundant fstar.w[0] = P256.w[0];
652 // fraction f* > 10^(-x) <=> inexact
653 // f* is in the right position to be compared with
654 // 10^(-x) from ten2mk128[]
655 if ((P256
.w
[1] > ten2mk128
[ind
- 1].w
[1])
656 || (P256
.w
[1] == ten2mk128
[ind
- 1].w
[1]
657 && (P256
.w
[0] >= ten2mk128
[ind
- 1].w
[0]))) {
658 *pfpsf
|= INEXACT_EXCEPTION
;
659 // if negative, the truncated value is already the correct result
660 if (!x_sign
) { // if positive
661 if (++res
.w
[0] == 0) {
666 } else if (ind
- 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
667 shift
= shiftright128
[ind
- 1]; // 3 <= shift <= 63
668 res
.w
[1] = (P256
.w
[3] >> shift
);
669 res
.w
[0] = (P256
.w
[3] << (64 - shift
)) | (P256
.w
[2] >> shift
);
670 // redundant fstar.w[3] = 0;
671 fstar
.w
[2] = P256
.w
[2] & maskhigh128
[ind
- 1];
672 fstar
.w
[1] = P256
.w
[1];
673 fstar
.w
[0] = P256
.w
[0];
674 // fraction f* > 10^(-x) <=> inexact
675 // f* is in the right position to be compared with
676 // 10^(-x) from ten2mk128[]
677 if (fstar
.w
[2] || fstar
.w
[1] > ten2mk128
[ind
- 1].w
[1] ||
678 (fstar
.w
[1] == ten2mk128
[ind
- 1].w
[1] &&
679 fstar
.w
[0] >= ten2mk128
[ind
- 1].w
[0])) {
680 *pfpsf
|= INEXACT_EXCEPTION
;
681 // if negative, the truncated value is already the correct result
682 if (!x_sign
) { // if positive
683 if (++res
.w
[0] == 0) {
688 } else { // 22 <= ind - 1 <= 33
689 shift
= shiftright128
[ind
- 1] - 64; // 2 <= shift <= 38
691 res
.w
[0] = P256
.w
[3] >> shift
;
692 fstar
.w
[3] = P256
.w
[3] & maskhigh128
[ind
- 1];
693 fstar
.w
[2] = P256
.w
[2];
694 fstar
.w
[1] = P256
.w
[1];
695 fstar
.w
[0] = P256
.w
[0];
696 // fraction f* > 10^(-x) <=> inexact
697 // f* is in the right position to be compared with
698 // 10^(-x) from ten2mk128[]
699 if (fstar
.w
[3] || fstar
.w
[2]
700 || fstar
.w
[1] > ten2mk128
[ind
- 1].w
[1]
701 || (fstar
.w
[1] == ten2mk128
[ind
- 1].w
[1]
702 && fstar
.w
[0] >= ten2mk128
[ind
- 1].w
[0])) {
703 *pfpsf
|= INEXACT_EXCEPTION
;
704 // if negative, the truncated value is already the correct result
705 if (!x_sign
) { // if positive
706 if (++res
.w
[0] == 0) {
712 res
.w
[1] = x_sign
| 0x3040000000000000ull
| res
.w
[1];
714 } else { // if exp < 0 and q + exp <= 0
715 if (x_sign
) { // negative rounds up to -0.0
716 res
.w
[1] = 0xb040000000000000ull
;
717 res
.w
[0] = 0x0000000000000000ull
;
718 } else { // positive rpunds up to +1.0
719 res
.w
[1] = 0x3040000000000000ull
;
720 res
.w
[0] = 0x0000000000000001ull
;
722 *pfpsf
|= INEXACT_EXCEPTION
;
726 case ROUNDING_TO_ZERO
:
727 if ((q
+ exp
) > 0) { // exp < 0 and 1 <= -exp < q
728 // need to shift right -exp digits from the coefficient; exp will be 0
729 ind
= -exp
; // 1 <= ind <= 34; ind is a synonym for 'x'
730 // (number of digits to be chopped off)
731 // chop off ind digits from the lower part of C1
732 // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
733 // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
734 // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
735 // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
738 // C1.w[0] = C1.w[0] + midpoint64[ind - 1];
740 // C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
741 // C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
743 // if (C1.w[0] < tmp64) C1.w[1]++;
744 // if carry-out from C1.w[0], increment C1.w[1]
745 // calculate C* and f*
746 // C* is actually floor(C*) in this case
747 // C* and f* need shifting and masking, as shown by
748 // shiftright128[] and maskhigh128[]
750 // kx = 10^(-x) = ten2mk128[ind - 1]
751 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
752 // the approximation of 10^(-x) was rounded up to 118 bits
753 __mul_128x128_to_256 (P256
, C1
, ten2mk128
[ind
- 1]);
754 if (ind
- 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
755 res
.w
[1] = P256
.w
[3];
756 res
.w
[0] = P256
.w
[2];
757 // redundant fstar.w[3] = 0;
758 // redundant fstar.w[2] = 0;
759 // redundant fstar.w[1] = P256.w[1];
760 // redundant fstar.w[0] = P256.w[0];
761 // fraction f* > 10^(-x) <=> inexact
762 // f* is in the right position to be compared with
763 // 10^(-x) from ten2mk128[]
764 if ((P256
.w
[1] > ten2mk128
[ind
- 1].w
[1])
765 || (P256
.w
[1] == ten2mk128
[ind
- 1].w
[1]
766 && (P256
.w
[0] >= ten2mk128
[ind
- 1].w
[0]))) {
767 *pfpsf
|= INEXACT_EXCEPTION
;
769 } else if (ind
- 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
770 shift
= shiftright128
[ind
- 1]; // 3 <= shift <= 63
771 res
.w
[1] = (P256
.w
[3] >> shift
);
772 res
.w
[0] = (P256
.w
[3] << (64 - shift
)) | (P256
.w
[2] >> shift
);
773 // redundant fstar.w[3] = 0;
774 fstar
.w
[2] = P256
.w
[2] & maskhigh128
[ind
- 1];
775 fstar
.w
[1] = P256
.w
[1];
776 fstar
.w
[0] = P256
.w
[0];
777 // fraction f* > 10^(-x) <=> inexact
778 // f* is in the right position to be compared with
779 // 10^(-x) from ten2mk128[]
780 if (fstar
.w
[2] || fstar
.w
[1] > ten2mk128
[ind
- 1].w
[1] ||
781 (fstar
.w
[1] == ten2mk128
[ind
- 1].w
[1] &&
782 fstar
.w
[0] >= ten2mk128
[ind
- 1].w
[0])) {
783 *pfpsf
|= INEXACT_EXCEPTION
;
785 } else { // 22 <= ind - 1 <= 33
786 shift
= shiftright128
[ind
- 1] - 64; // 2 <= shift <= 38
788 res
.w
[0] = P256
.w
[3] >> shift
;
789 fstar
.w
[3] = P256
.w
[3] & maskhigh128
[ind
- 1];
790 fstar
.w
[2] = P256
.w
[2];
791 fstar
.w
[1] = P256
.w
[1];
792 fstar
.w
[0] = P256
.w
[0];
793 // fraction f* > 10^(-x) <=> inexact
794 // f* is in the right position to be compared with
795 // 10^(-x) from ten2mk128[]
796 if (fstar
.w
[3] || fstar
.w
[2]
797 || fstar
.w
[1] > ten2mk128
[ind
- 1].w
[1]
798 || (fstar
.w
[1] == ten2mk128
[ind
- 1].w
[1]
799 && fstar
.w
[0] >= ten2mk128
[ind
- 1].w
[0])) {
800 *pfpsf
|= INEXACT_EXCEPTION
;
803 res
.w
[1] = x_sign
| 0x3040000000000000ull
| res
.w
[1];
805 } else { // if exp < 0 and q + exp <= 0 the result is +0 or -0
806 res
.w
[1] = x_sign
| 0x3040000000000000ull
;
807 res
.w
[0] = 0x0000000000000000ull
;
808 *pfpsf
|= INEXACT_EXCEPTION
;
817 /*****************************************************************************
818 * BID128_round_integral_nearest_even
819 ****************************************************************************/
821 BID128_FUNCTION_ARG1_NORND (bid128_round_integral_nearest_even
, x
)
826 int exp
; // unbiased exponent
827 // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
830 unsigned int x_nr_bits
;
833 // UINT128 res is C* at first - represents up to 34 decimal digits ~ 113 bits
837 // check for NaN or Infinity
838 if ((x
.w
[1] & MASK_SPECIAL
) == MASK_SPECIAL
) {
840 if ((x
.w
[1] & MASK_NAN
) == MASK_NAN
) { // x is NAN
841 // if x = NaN, then res = Q (x)
842 // check first for non-canonical NaN payload
843 if (((x
.w
[1] & 0x00003fffffffffffull
) > 0x0000314dc6448d93ull
) ||
844 (((x
.w
[1] & 0x00003fffffffffffull
) == 0x0000314dc6448d93ull
) &&
845 (x
.w
[0] > 0x38c15b09ffffffffull
))) {
846 x
.w
[1] = x
.w
[1] & 0xffffc00000000000ull
;
849 if ((x
.w
[1] & MASK_SNAN
) == MASK_SNAN
) { // x is SNAN
851 *pfpsf
|= INVALID_EXCEPTION
;
853 res
.w
[1] = x
.w
[1] & 0xfc003fffffffffffull
; // clear out also G[6]-G[16]
855 } else { // x is QNaN
857 res
.w
[1] = x
.w
[1] & 0xfc003fffffffffffull
; // clear out G[6]-G[16]
861 } else { // x is not a NaN, so it must be infinity
862 if ((x
.w
[1] & MASK_SIGN
) == 0x0ull
) { // x is +inf
864 res
.w
[1] = 0x7800000000000000ull
;
865 res
.w
[0] = 0x0000000000000000ull
;
866 } else { // x is -inf
868 res
.w
[1] = 0xf800000000000000ull
;
869 res
.w
[0] = 0x0000000000000000ull
;
875 x_sign
= x
.w
[1] & MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
876 C1
.w
[1] = x
.w
[1] & MASK_COEFF
;
879 // check for non-canonical values (treated as zero)
880 if ((x
.w
[1] & 0x6000000000000000ull
) == 0x6000000000000000ull
) { // G0_G1=11
882 x_exp
= (x
.w
[1] << 2) & MASK_EXP
; // biased and shifted left 49 bits
883 C1
.w
[1] = 0; // significand high
884 C1
.w
[0] = 0; // significand low
885 } else { // G0_G1 != 11
886 x_exp
= x
.w
[1] & MASK_EXP
; // biased and shifted left 49 bits
887 if (C1
.w
[1] > 0x0001ed09bead87c0ull
||
888 (C1
.w
[1] == 0x0001ed09bead87c0ull
889 && C1
.w
[0] > 0x378d8e63ffffffffull
)) {
890 // x is non-canonical if coefficient is larger than 10^34 -1
893 } else { // canonical
898 // test for input equal to zero
899 if ((C1
.w
[1] == 0x0ull
) && (C1
.w
[0] == 0x0ull
)) {
901 // return 0 preserving the sign bit and the preferred exponent
903 if (x_exp
<= (0x1820ull
<< 49)) {
904 res
.w
[1] = (x
.w
[1] & 0x8000000000000000ull
) | 0x3040000000000000ull
;
906 res
.w
[1] = x_sign
| x_exp
;
908 res
.w
[0] = 0x0000000000000000ull
;
911 // x is not special and is not zero
913 // if (exp <= -(p+1)) return 0
914 if (x_exp
<= 0x2ffa000000000000ull
) { // 0x2ffa000000000000ull == -35
915 res
.w
[1] = x_sign
| 0x3040000000000000ull
;
916 res
.w
[0] = 0x0000000000000000ull
;
919 // q = nr. of decimal digits in x
920 // determine first the nr. of bits in x
922 if (C1
.w
[0] >= 0x0020000000000000ull
) { // x >= 2^53
923 // split the 64-bit value in two 32-bit halves to avoid rounding errors
924 if (C1
.w
[0] >= 0x0000000100000000ull
) { // x >= 2^32
925 tmp1
.d
= (double) (C1
.w
[0] >> 32); // exact conversion
927 33 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
929 tmp1
.d
= (double) (C1
.w
[0]); // exact conversion
931 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
933 } else { // if x < 2^53
934 tmp1
.d
= (double) C1
.w
[0]; // exact conversion
936 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
938 } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
939 tmp1
.d
= (double) C1
.w
[1]; // exact conversion
941 65 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
944 q
= nr_digits
[x_nr_bits
- 1].digits
;
946 q
= nr_digits
[x_nr_bits
- 1].digits1
;
947 if (C1
.w
[1] > nr_digits
[x_nr_bits
- 1].threshold_hi
948 || (C1
.w
[1] == nr_digits
[x_nr_bits
- 1].threshold_hi
&&
949 C1
.w
[0] >= nr_digits
[x_nr_bits
- 1].threshold_lo
))
952 exp
= (x_exp
>> 49) - 6176;
953 if (exp
>= 0) { // -exp <= 0
954 // the argument is an integer already
958 } else if ((q
+ exp
) >= 0) { // exp < 0 and 1 <= -exp <= q
959 // need to shift right -exp digits from the coefficient; the exp will be 0
960 ind
= -exp
; // 1 <= ind <= 34; ind is a synonym for 'x'
961 // chop off ind digits from the lower part of C1
962 // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits
965 C1
.w
[0] = C1
.w
[0] + midpoint64
[ind
- 1];
967 C1
.w
[0] = C1
.w
[0] + midpoint128
[ind
- 20].w
[0];
968 C1
.w
[1] = C1
.w
[1] + midpoint128
[ind
- 20].w
[1];
972 // calculate C* and f*
973 // C* is actually floor(C*) in this case
974 // C* and f* need shifting and masking, as shown by
975 // shiftright128[] and maskhigh128[]
977 // kx = 10^(-x) = ten2mk128[ind - 1]
978 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
979 // the approximation of 10^(-x) was rounded up to 118 bits
980 __mul_128x128_to_256 (P256
, C1
, ten2mk128
[ind
- 1]);
981 // determine the value of res and fstar
982 if (ind
- 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
983 // redundant shift = shiftright128[ind - 1]; // shift = 0
984 res
.w
[1] = P256
.w
[3];
985 res
.w
[0] = P256
.w
[2];
986 // redundant fstar.w[3] = 0;
987 // redundant fstar.w[2] = 0;
988 // redundant fstar.w[1] = P256.w[1];
989 // redundant fstar.w[0] = P256.w[0];
990 // fraction f* < 10^(-x) <=> midpoint
991 // f* is in the right position to be compared with
992 // 10^(-x) from ten2mk128[]
993 // if 0 < fstar < 10^(-x), subtract 1 if odd (for rounding to even)
994 if ((res
.w
[0] & 0x0000000000000001ull
) && // is result odd?
995 ((P256
.w
[1] < (ten2mk128
[ind
- 1].w
[1]))
996 || ((P256
.w
[1] == ten2mk128
[ind
- 1].w
[1])
997 && (P256
.w
[0] < ten2mk128
[ind
- 1].w
[0])))) {
998 // subract 1 to make even
999 if (res
.w
[0]-- == 0) {
1003 } else if (ind
- 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
1004 shift
= shiftright128
[ind
- 1]; // 3 <= shift <= 63
1005 res
.w
[1] = (P256
.w
[3] >> shift
);
1006 res
.w
[0] = (P256
.w
[3] << (64 - shift
)) | (P256
.w
[2] >> shift
);
1007 // redundant fstar.w[3] = 0;
1008 fstar
.w
[2] = P256
.w
[2] & maskhigh128
[ind
- 1];
1009 fstar
.w
[1] = P256
.w
[1];
1010 fstar
.w
[0] = P256
.w
[0];
1011 // fraction f* < 10^(-x) <=> midpoint
1012 // f* is in the right position to be compared with
1013 // 10^(-x) from ten2mk128[]
1014 if ((res
.w
[0] & 0x0000000000000001ull
) && // is result odd?
1015 fstar
.w
[2] == 0 && (fstar
.w
[1] < ten2mk128
[ind
- 1].w
[1] ||
1016 (fstar
.w
[1] == ten2mk128
[ind
- 1].w
[1] &&
1017 fstar
.w
[0] < ten2mk128
[ind
- 1].w
[0]))) {
1018 // subract 1 to make even
1019 if (res
.w
[0]-- == 0) {
1023 } else { // 22 <= ind - 1 <= 33
1024 shift
= shiftright128
[ind
- 1] - 64; // 2 <= shift <= 38
1026 res
.w
[0] = P256
.w
[3] >> shift
;
1027 fstar
.w
[3] = P256
.w
[3] & maskhigh128
[ind
- 1];
1028 fstar
.w
[2] = P256
.w
[2];
1029 fstar
.w
[1] = P256
.w
[1];
1030 fstar
.w
[0] = P256
.w
[0];
1031 // fraction f* < 10^(-x) <=> midpoint
1032 // f* is in the right position to be compared with
1033 // 10^(-x) from ten2mk128[]
1034 if ((res
.w
[0] & 0x0000000000000001ull
) && // is result odd?
1035 fstar
.w
[3] == 0 && fstar
.w
[2] == 0
1036 && (fstar
.w
[1] < ten2mk128
[ind
- 1].w
[1]
1037 || (fstar
.w
[1] == ten2mk128
[ind
- 1].w
[1]
1038 && fstar
.w
[0] < ten2mk128
[ind
- 1].w
[0]))) {
1039 // subract 1 to make even
1040 if (res
.w
[0]-- == 0) {
1045 res
.w
[1] = x_sign
| 0x3040000000000000ull
| res
.w
[1];
1047 } else { // if ((q + exp) < 0) <=> q < -exp
1048 // the result is +0 or -0
1049 res
.w
[1] = x_sign
| 0x3040000000000000ull
;
1050 res
.w
[0] = 0x0000000000000000ull
;
1055 /*****************************************************************************
1056 * BID128_round_integral_negative
1057 ****************************************************************************/
1059 BID128_FUNCTION_ARG1_NORND (bid128_round_integral_negative
, x
)
1064 int exp
; // unbiased exponent
1065 // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo
1067 BID_UI64DOUBLE tmp1
;
1068 unsigned int x_nr_bits
;
1071 // UINT128 res is C* at first - represents up to 34 decimal digits ~
1076 // check for NaN or Infinity
1077 if ((x
.w
[1] & MASK_SPECIAL
) == MASK_SPECIAL
) {
1079 if ((x
.w
[1] & MASK_NAN
) == MASK_NAN
) { // x is NAN
1080 // if x = NaN, then res = Q (x)
1081 // check first for non-canonical NaN payload
1082 if (((x
.w
[1] & 0x00003fffffffffffull
) > 0x0000314dc6448d93ull
) ||
1083 (((x
.w
[1] & 0x00003fffffffffffull
) == 0x0000314dc6448d93ull
) &&
1084 (x
.w
[0] > 0x38c15b09ffffffffull
))) {
1085 x
.w
[1] = x
.w
[1] & 0xffffc00000000000ull
;
1088 if ((x
.w
[1] & MASK_SNAN
) == MASK_SNAN
) { // x is SNAN
1090 *pfpsf
|= INVALID_EXCEPTION
;
1092 res
.w
[1] = x
.w
[1] & 0xfc003fffffffffffull
; // clear out also G[6]-G[16]
1094 } else { // x is QNaN
1096 res
.w
[1] = x
.w
[1] & 0xfc003fffffffffffull
; // clear out G[6]-G[16]
1100 } else { // x is not a NaN, so it must be infinity
1101 if ((x
.w
[1] & MASK_SIGN
) == 0x0ull
) { // x is +inf
1103 res
.w
[1] = 0x7800000000000000ull
;
1104 res
.w
[0] = 0x0000000000000000ull
;
1105 } else { // x is -inf
1107 res
.w
[1] = 0xf800000000000000ull
;
1108 res
.w
[0] = 0x0000000000000000ull
;
1114 x_sign
= x
.w
[1] & MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
1115 C1
.w
[1] = x
.w
[1] & MASK_COEFF
;
1118 // check for non-canonical values (treated as zero)
1119 if ((x
.w
[1] & 0x6000000000000000ull
) == 0x6000000000000000ull
) { // G0_G1=11
1121 x_exp
= (x
.w
[1] << 2) & MASK_EXP
; // biased and shifted left 49 bits
1122 C1
.w
[1] = 0; // significand high
1123 C1
.w
[0] = 0; // significand low
1124 } else { // G0_G1 != 11
1125 x_exp
= x
.w
[1] & MASK_EXP
; // biased and shifted left 49 bits
1126 if (C1
.w
[1] > 0x0001ed09bead87c0ull
||
1127 (C1
.w
[1] == 0x0001ed09bead87c0ull
1128 && C1
.w
[0] > 0x378d8e63ffffffffull
)) {
1129 // x is non-canonical if coefficient is larger than 10^34 -1
1132 } else { // canonical
1137 // test for input equal to zero
1138 if ((C1
.w
[1] == 0x0ull
) && (C1
.w
[0] == 0x0ull
)) {
1140 // return 0 preserving the sign bit and the preferred exponent
1142 if (x_exp
<= (0x1820ull
<< 49)) {
1143 res
.w
[1] = (x
.w
[1] & 0x8000000000000000ull
) | 0x3040000000000000ull
;
1145 res
.w
[1] = x_sign
| x_exp
;
1147 res
.w
[0] = 0x0000000000000000ull
;
1150 // x is not special and is not zero
1152 // if (exp <= -p) return -1.0 or +0.0
1153 if (x_exp
<= 0x2ffc000000000000ull
) { // 0x2ffc000000000000ull == -34
1155 // if negative, return negative 1, because we know the coefficient
1156 // is non-zero (would have been caught above)
1157 res
.w
[1] = 0xb040000000000000ull
;
1158 res
.w
[0] = 0x0000000000000001ull
;
1160 // if positive, return positive 0, because we know coefficient is
1161 // non-zero (would have been caught above)
1162 res
.w
[1] = 0x3040000000000000ull
;
1163 res
.w
[0] = 0x0000000000000000ull
;
1167 // q = nr. of decimal digits in x
1168 // determine first the nr. of bits in x
1170 if (C1
.w
[0] >= 0x0020000000000000ull
) { // x >= 2^53
1171 // split the 64-bit value in two 32-bit halves to avoid rounding errors
1172 if (C1
.w
[0] >= 0x0000000100000000ull
) { // x >= 2^32
1173 tmp1
.d
= (double) (C1
.w
[0] >> 32); // exact conversion
1175 33 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1176 } else { // x < 2^32
1177 tmp1
.d
= (double) (C1
.w
[0]); // exact conversion
1179 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1181 } else { // if x < 2^53
1182 tmp1
.d
= (double) C1
.w
[0]; // exact conversion
1184 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1186 } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
1187 tmp1
.d
= (double) C1
.w
[1]; // exact conversion
1189 65 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1192 q
= nr_digits
[x_nr_bits
- 1].digits
;
1194 q
= nr_digits
[x_nr_bits
- 1].digits1
;
1195 if (C1
.w
[1] > nr_digits
[x_nr_bits
- 1].threshold_hi
||
1196 (C1
.w
[1] == nr_digits
[x_nr_bits
- 1].threshold_hi
&&
1197 C1
.w
[0] >= nr_digits
[x_nr_bits
- 1].threshold_lo
))
1200 exp
= (x_exp
>> 49) - 6176;
1201 if (exp
>= 0) { // -exp <= 0
1202 // the argument is an integer already
1206 } else if ((q
+ exp
) > 0) { // exp < 0 and 1 <= -exp < q
1207 // need to shift right -exp digits from the coefficient; the exp will be 0
1208 ind
= -exp
; // 1 <= ind <= 34; ind is a synonym for 'x'
1209 // (number of digits to be chopped off)
1210 // chop off ind digits from the lower part of C1
1211 // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
1212 // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
1213 // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
1214 // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
1217 // C1.w[0] = C1.w[0] + midpoint64[ind - 1];
1219 // C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
1220 // C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
1222 // if (C1.w[0] < tmp64) C1.w[1]++;
1223 // if carry-out from C1.w[0], increment C1.w[1]
1224 // calculate C* and f*
1225 // C* is actually floor(C*) in this case
1226 // C* and f* need shifting and masking, as shown by
1227 // shiftright128[] and maskhigh128[]
1229 // kx = 10^(-x) = ten2mk128[ind - 1]
1230 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
1231 // the approximation of 10^(-x) was rounded up to 118 bits
1232 __mul_128x128_to_256 (P256
, C1
, ten2mk128
[ind
- 1]);
1233 if (ind
- 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
1234 res
.w
[1] = P256
.w
[3];
1235 res
.w
[0] = P256
.w
[2];
1236 // if positive, the truncated value is already the correct result
1237 if (x_sign
) { // if negative
1238 // redundant fstar.w[3] = 0;
1239 // redundant fstar.w[2] = 0;
1240 // redundant fstar.w[1] = P256.w[1];
1241 // redundant fstar.w[0] = P256.w[0];
1242 // fraction f* > 10^(-x) <=> inexact
1243 // f* is in the right position to be compared with
1244 // 10^(-x) from ten2mk128[]
1245 if ((P256
.w
[1] > ten2mk128
[ind
- 1].w
[1])
1246 || (P256
.w
[1] == ten2mk128
[ind
- 1].w
[1]
1247 && (P256
.w
[0] >= ten2mk128
[ind
- 1].w
[0]))) {
1248 if (++res
.w
[0] == 0) {
1253 } else if (ind
- 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
1254 shift
= shiftright128
[ind
- 1]; // 0 <= shift <= 102
1255 res
.w
[1] = (P256
.w
[3] >> shift
);
1256 res
.w
[0] = (P256
.w
[3] << (64 - shift
)) | (P256
.w
[2] >> shift
);
1257 // if positive, the truncated value is already the correct result
1258 if (x_sign
) { // if negative
1259 // redundant fstar.w[3] = 0;
1260 fstar
.w
[2] = P256
.w
[2] & maskhigh128
[ind
- 1];
1261 fstar
.w
[1] = P256
.w
[1];
1262 fstar
.w
[0] = P256
.w
[0];
1263 // fraction f* > 10^(-x) <=> inexact
1264 // f* is in the right position to be compared with
1265 // 10^(-x) from ten2mk128[]
1266 if (fstar
.w
[2] || fstar
.w
[1] > ten2mk128
[ind
- 1].w
[1] ||
1267 (fstar
.w
[1] == ten2mk128
[ind
- 1].w
[1] &&
1268 fstar
.w
[0] >= ten2mk128
[ind
- 1].w
[0])) {
1269 if (++res
.w
[0] == 0) {
1274 } else { // 22 <= ind - 1 <= 33
1275 shift
= shiftright128
[ind
- 1] - 64; // 2 <= shift <= 38
1277 res
.w
[0] = P256
.w
[3] >> shift
;
1278 // if positive, the truncated value is already the correct result
1279 if (x_sign
) { // if negative
1280 fstar
.w
[3] = P256
.w
[3] & maskhigh128
[ind
- 1];
1281 fstar
.w
[2] = P256
.w
[2];
1282 fstar
.w
[1] = P256
.w
[1];
1283 fstar
.w
[0] = P256
.w
[0];
1284 // fraction f* > 10^(-x) <=> inexact
1285 // f* is in the right position to be compared with
1286 // 10^(-x) from ten2mk128[]
1287 if (fstar
.w
[3] || fstar
.w
[2]
1288 || fstar
.w
[1] > ten2mk128
[ind
- 1].w
[1]
1289 || (fstar
.w
[1] == ten2mk128
[ind
- 1].w
[1]
1290 && fstar
.w
[0] >= ten2mk128
[ind
- 1].w
[0])) {
1291 if (++res
.w
[0] == 0) {
1297 res
.w
[1] = x_sign
| 0x3040000000000000ull
| res
.w
[1];
1299 } else { // if exp < 0 and q + exp <= 0
1300 if (x_sign
) { // negative rounds down to -1.0
1301 res
.w
[1] = 0xb040000000000000ull
;
1302 res
.w
[0] = 0x0000000000000001ull
;
1303 } else { // positive rpunds down to +0.0
1304 res
.w
[1] = 0x3040000000000000ull
;
1305 res
.w
[0] = 0x0000000000000000ull
;
1311 /*****************************************************************************
1312 * BID128_round_integral_positive
1313 ****************************************************************************/
1315 BID128_FUNCTION_ARG1_NORND (bid128_round_integral_positive
, x
)
1320 int exp
; // unbiased exponent
1321 // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo
1323 BID_UI64DOUBLE tmp1
;
1324 unsigned int x_nr_bits
;
1327 // UINT128 res is C* at first - represents up to 34 decimal digits ~
1332 // check for NaN or Infinity
1333 if ((x
.w
[1] & MASK_SPECIAL
) == MASK_SPECIAL
) {
1335 if ((x
.w
[1] & MASK_NAN
) == MASK_NAN
) { // x is NAN
1336 // if x = NaN, then res = Q (x)
1337 // check first for non-canonical NaN payload
1338 if (((x
.w
[1] & 0x00003fffffffffffull
) > 0x0000314dc6448d93ull
) ||
1339 (((x
.w
[1] & 0x00003fffffffffffull
) == 0x0000314dc6448d93ull
) &&
1340 (x
.w
[0] > 0x38c15b09ffffffffull
))) {
1341 x
.w
[1] = x
.w
[1] & 0xffffc00000000000ull
;
1344 if ((x
.w
[1] & MASK_SNAN
) == MASK_SNAN
) { // x is SNAN
1346 *pfpsf
|= INVALID_EXCEPTION
;
1348 res
.w
[1] = x
.w
[1] & 0xfc003fffffffffffull
; // clear out also G[6]-G[16]
1350 } else { // x is QNaN
1352 res
.w
[1] = x
.w
[1] & 0xfc003fffffffffffull
; // clear out G[6]-G[16]
1356 } else { // x is not a NaN, so it must be infinity
1357 if ((x
.w
[1] & MASK_SIGN
) == 0x0ull
) { // x is +inf
1359 res
.w
[1] = 0x7800000000000000ull
;
1360 res
.w
[0] = 0x0000000000000000ull
;
1361 } else { // x is -inf
1363 res
.w
[1] = 0xf800000000000000ull
;
1364 res
.w
[0] = 0x0000000000000000ull
;
1370 x_sign
= x
.w
[1] & MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
1371 C1
.w
[1] = x
.w
[1] & MASK_COEFF
;
1374 // check for non-canonical values (treated as zero)
1375 if ((x
.w
[1] & 0x6000000000000000ull
) == 0x6000000000000000ull
) { // G0_G1=11
1377 x_exp
= (x
.w
[1] << 2) & MASK_EXP
; // biased and shifted left 49 bits
1378 C1
.w
[1] = 0; // significand high
1379 C1
.w
[0] = 0; // significand low
1380 } else { // G0_G1 != 11
1381 x_exp
= x
.w
[1] & MASK_EXP
; // biased and shifted left 49 bits
1382 if (C1
.w
[1] > 0x0001ed09bead87c0ull
||
1383 (C1
.w
[1] == 0x0001ed09bead87c0ull
1384 && C1
.w
[0] > 0x378d8e63ffffffffull
)) {
1385 // x is non-canonical if coefficient is larger than 10^34 -1
1388 } else { // canonical
1393 // test for input equal to zero
1394 if ((C1
.w
[1] == 0x0ull
) && (C1
.w
[0] == 0x0ull
)) {
1396 // return 0 preserving the sign bit and the preferred exponent
1398 if (x_exp
<= (0x1820ull
<< 49)) {
1399 res
.w
[1] = (x
.w
[1] & 0x8000000000000000ull
) | 0x3040000000000000ull
;
1401 res
.w
[1] = x_sign
| x_exp
;
1403 res
.w
[0] = 0x0000000000000000ull
;
1406 // x is not special and is not zero
1408 // if (exp <= -p) return -0.0 or +1.0
1409 if (x_exp
<= 0x2ffc000000000000ull
) { // 0x2ffc000000000000ull == -34
1411 // if negative, return negative 0, because we know the coefficient
1412 // is non-zero (would have been caught above)
1413 res
.w
[1] = 0xb040000000000000ull
;
1414 res
.w
[0] = 0x0000000000000000ull
;
1416 // if positive, return positive 1, because we know coefficient is
1417 // non-zero (would have been caught above)
1418 res
.w
[1] = 0x3040000000000000ull
;
1419 res
.w
[0] = 0x0000000000000001ull
;
1423 // q = nr. of decimal digits in x
1424 // determine first the nr. of bits in x
1426 if (C1
.w
[0] >= 0x0020000000000000ull
) { // x >= 2^53
1427 // split 64-bit value in two 32-bit halves to avoid rounding errors
1428 if (C1
.w
[0] >= 0x0000000100000000ull
) { // x >= 2^32
1429 tmp1
.d
= (double) (C1
.w
[0] >> 32); // exact conversion
1431 33 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1432 } else { // x < 2^32
1433 tmp1
.d
= (double) (C1
.w
[0]); // exact conversion
1435 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1437 } else { // if x < 2^53
1438 tmp1
.d
= (double) C1
.w
[0]; // exact conversion
1440 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1442 } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
1443 tmp1
.d
= (double) C1
.w
[1]; // exact conversion
1445 65 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1448 q
= nr_digits
[x_nr_bits
- 1].digits
;
1450 q
= nr_digits
[x_nr_bits
- 1].digits1
;
1451 if (C1
.w
[1] > nr_digits
[x_nr_bits
- 1].threshold_hi
||
1452 (C1
.w
[1] == nr_digits
[x_nr_bits
- 1].threshold_hi
&&
1453 C1
.w
[0] >= nr_digits
[x_nr_bits
- 1].threshold_lo
))
1456 exp
= (x_exp
>> 49) - 6176;
1457 if (exp
>= 0) { // -exp <= 0
1458 // the argument is an integer already
1462 } else if ((q
+ exp
) > 0) { // exp < 0 and 1 <= -exp < q
1463 // need to shift right -exp digits from the coefficient; exp will be 0
1464 ind
= -exp
; // 1 <= ind <= 34; ind is a synonym for 'x'
1465 // (number of digits to be chopped off)
1466 // chop off ind digits from the lower part of C1
1467 // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
1468 // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
1469 // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
1470 // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
1473 // C1.w[0] = C1.w[0] + midpoint64[ind - 1];
1475 // C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
1476 // C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
1478 // if (C1.w[0] < tmp64) C1.w[1]++;
1479 // if carry-out from C1.w[0], increment C1.w[1]
1480 // calculate C* and f*
1481 // C* is actually floor(C*) in this case
1482 // C* and f* need shifting and masking, as shown by
1483 // shiftright128[] and maskhigh128[]
1485 // kx = 10^(-x) = ten2mk128[ind - 1]
1486 // C* = C1 * 10^(-x)
1487 // the approximation of 10^(-x) was rounded up to 118 bits
1488 __mul_128x128_to_256 (P256
, C1
, ten2mk128
[ind
- 1]);
1489 if (ind
- 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
1490 res
.w
[1] = P256
.w
[3];
1491 res
.w
[0] = P256
.w
[2];
1492 // if negative, the truncated value is already the correct result
1493 if (!x_sign
) { // if positive
1494 // redundant fstar.w[3] = 0;
1495 // redundant fstar.w[2] = 0;
1496 // redundant fstar.w[1] = P256.w[1];
1497 // redundant fstar.w[0] = P256.w[0];
1498 // fraction f* > 10^(-x) <=> inexact
1499 // f* is in the right position to be compared with
1500 // 10^(-x) from ten2mk128[]
1501 if ((P256
.w
[1] > ten2mk128
[ind
- 1].w
[1])
1502 || (P256
.w
[1] == ten2mk128
[ind
- 1].w
[1]
1503 && (P256
.w
[0] >= ten2mk128
[ind
- 1].w
[0]))) {
1504 if (++res
.w
[0] == 0) {
1509 } else if (ind
- 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
1510 shift
= shiftright128
[ind
- 1]; // 3 <= shift <= 63
1511 res
.w
[1] = (P256
.w
[3] >> shift
);
1512 res
.w
[0] = (P256
.w
[3] << (64 - shift
)) | (P256
.w
[2] >> shift
);
1513 // if negative, the truncated value is already the correct result
1514 if (!x_sign
) { // if positive
1515 // redundant fstar.w[3] = 0;
1516 fstar
.w
[2] = P256
.w
[2] & maskhigh128
[ind
- 1];
1517 fstar
.w
[1] = P256
.w
[1];
1518 fstar
.w
[0] = P256
.w
[0];
1519 // fraction f* > 10^(-x) <=> inexact
1520 // f* is in the right position to be compared with
1521 // 10^(-x) from ten2mk128[]
1522 if (fstar
.w
[2] || fstar
.w
[1] > ten2mk128
[ind
- 1].w
[1] ||
1523 (fstar
.w
[1] == ten2mk128
[ind
- 1].w
[1] &&
1524 fstar
.w
[0] >= ten2mk128
[ind
- 1].w
[0])) {
1525 if (++res
.w
[0] == 0) {
1530 } else { // 22 <= ind - 1 <= 33
1531 shift
= shiftright128
[ind
- 1] - 64; // 2 <= shift <= 38
1533 res
.w
[0] = P256
.w
[3] >> shift
;
1534 // if negative, the truncated value is already the correct result
1535 if (!x_sign
) { // if positive
1536 fstar
.w
[3] = P256
.w
[3] & maskhigh128
[ind
- 1];
1537 fstar
.w
[2] = P256
.w
[2];
1538 fstar
.w
[1] = P256
.w
[1];
1539 fstar
.w
[0] = P256
.w
[0];
1540 // fraction f* > 10^(-x) <=> inexact
1541 // f* is in the right position to be compared with
1542 // 10^(-x) from ten2mk128[]
1543 if (fstar
.w
[3] || fstar
.w
[2]
1544 || fstar
.w
[1] > ten2mk128
[ind
- 1].w
[1]
1545 || (fstar
.w
[1] == ten2mk128
[ind
- 1].w
[1]
1546 && fstar
.w
[0] >= ten2mk128
[ind
- 1].w
[0])) {
1547 if (++res
.w
[0] == 0) {
1553 res
.w
[1] = x_sign
| 0x3040000000000000ull
| res
.w
[1];
1555 } else { // if exp < 0 and q + exp <= 0
1556 if (x_sign
) { // negative rounds up to -0.0
1557 res
.w
[1] = 0xb040000000000000ull
;
1558 res
.w
[0] = 0x0000000000000000ull
;
1559 } else { // positive rpunds up to +1.0
1560 res
.w
[1] = 0x3040000000000000ull
;
1561 res
.w
[0] = 0x0000000000000001ull
;
1567 /*****************************************************************************
1568 * BID128_round_integral_zero
1569 ****************************************************************************/
1571 BID128_FUNCTION_ARG1_NORND (bid128_round_integral_zero
, x
)
1576 int exp
; // unbiased exponent
1577 // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo
1579 BID_UI64DOUBLE tmp1
;
1580 unsigned int x_nr_bits
;
1583 // UINT128 res is C* at first - represents up to 34 decimal digits ~
1587 // check for NaN or Infinity
1588 if ((x
.w
[1] & MASK_SPECIAL
) == MASK_SPECIAL
) {
1590 if ((x
.w
[1] & MASK_NAN
) == MASK_NAN
) { // x is NAN
1591 // if x = NaN, then res = Q (x)
1592 // check first for non-canonical NaN payload
1593 if (((x
.w
[1] & 0x00003fffffffffffull
) > 0x0000314dc6448d93ull
) ||
1594 (((x
.w
[1] & 0x00003fffffffffffull
) == 0x0000314dc6448d93ull
) &&
1595 (x
.w
[0] > 0x38c15b09ffffffffull
))) {
1596 x
.w
[1] = x
.w
[1] & 0xffffc00000000000ull
;
1599 if ((x
.w
[1] & MASK_SNAN
) == MASK_SNAN
) { // x is SNAN
1601 *pfpsf
|= INVALID_EXCEPTION
;
1603 res
.w
[1] = x
.w
[1] & 0xfc003fffffffffffull
; // clear out also G[6]-G[16]
1605 } else { // x is QNaN
1607 res
.w
[1] = x
.w
[1] & 0xfc003fffffffffffull
; // clear out G[6]-G[16]
1611 } else { // x is not a NaN, so it must be infinity
1612 if ((x
.w
[1] & MASK_SIGN
) == 0x0ull
) { // x is +inf
1614 res
.w
[1] = 0x7800000000000000ull
;
1615 res
.w
[0] = 0x0000000000000000ull
;
1616 } else { // x is -inf
1618 res
.w
[1] = 0xf800000000000000ull
;
1619 res
.w
[0] = 0x0000000000000000ull
;
1625 x_sign
= x
.w
[1] & MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
1626 C1
.w
[1] = x
.w
[1] & MASK_COEFF
;
1629 // check for non-canonical values (treated as zero)
1630 if ((x
.w
[1] & 0x6000000000000000ull
) == 0x6000000000000000ull
) { // G0_G1=11
1632 x_exp
= (x
.w
[1] << 2) & MASK_EXP
; // biased and shifted left 49 bits
1633 C1
.w
[1] = 0; // significand high
1634 C1
.w
[0] = 0; // significand low
1635 } else { // G0_G1 != 11
1636 x_exp
= x
.w
[1] & MASK_EXP
; // biased and shifted left 49 bits
1637 if (C1
.w
[1] > 0x0001ed09bead87c0ull
||
1638 (C1
.w
[1] == 0x0001ed09bead87c0ull
1639 && C1
.w
[0] > 0x378d8e63ffffffffull
)) {
1640 // x is non-canonical if coefficient is larger than 10^34 -1
1643 } else { // canonical
1648 // test for input equal to zero
1649 if ((C1
.w
[1] == 0x0ull
) && (C1
.w
[0] == 0x0ull
)) {
1651 // return 0 preserving the sign bit and the preferred exponent
1653 if (x_exp
<= (0x1820ull
<< 49)) {
1654 res
.w
[1] = (x
.w
[1] & 0x8000000000000000ull
) | 0x3040000000000000ull
;
1656 res
.w
[1] = x_sign
| x_exp
;
1658 res
.w
[0] = 0x0000000000000000ull
;
1661 // x is not special and is not zero
1663 // if (exp <= -p) return -0.0 or +0.0
1664 if (x_exp
<= 0x2ffc000000000000ull
) { // 0x2ffc000000000000ull == -34
1665 res
.w
[1] = x_sign
| 0x3040000000000000ull
;
1666 res
.w
[0] = 0x0000000000000000ull
;
1669 // q = nr. of decimal digits in x
1670 // determine first the nr. of bits in x
1672 if (C1
.w
[0] >= 0x0020000000000000ull
) { // x >= 2^53
1673 // split the 64-bit value in two 32-bit halves to avoid rounding errors
1674 if (C1
.w
[0] >= 0x0000000100000000ull
) { // x >= 2^32
1675 tmp1
.d
= (double) (C1
.w
[0] >> 32); // exact conversion
1677 33 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1678 } else { // x < 2^32
1679 tmp1
.d
= (double) (C1
.w
[0]); // exact conversion
1681 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1683 } else { // if x < 2^53
1684 tmp1
.d
= (double) C1
.w
[0]; // exact conversion
1686 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1688 } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
1689 tmp1
.d
= (double) C1
.w
[1]; // exact conversion
1691 65 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1694 q
= nr_digits
[x_nr_bits
- 1].digits
;
1696 q
= nr_digits
[x_nr_bits
- 1].digits1
;
1697 if (C1
.w
[1] > nr_digits
[x_nr_bits
- 1].threshold_hi
||
1698 (C1
.w
[1] == nr_digits
[x_nr_bits
- 1].threshold_hi
&&
1699 C1
.w
[0] >= nr_digits
[x_nr_bits
- 1].threshold_lo
))
1702 exp
= (x_exp
>> 49) - 6176;
1703 if (exp
>= 0) { // -exp <= 0
1704 // the argument is an integer already
1708 } else if ((q
+ exp
) > 0) { // exp < 0 and 1 <= -exp < q
1709 // need to shift right -exp digits from the coefficient; the exp will be 0
1710 ind
= -exp
; // 1 <= ind <= 34; ind is a synonym for 'x'
1711 // (number of digits to be chopped off)
1712 // chop off ind digits from the lower part of C1
1713 // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
1714 // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
1715 // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
1716 // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
1719 // C1.w[0] = C1.w[0] + midpoint64[ind - 1];
1721 // C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
1722 // C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
1724 // if (C1.w[0] < tmp64) C1.w[1]++;
1725 // if carry-out from C1.w[0], increment C1.w[1]
1726 // calculate C* and f*
1727 // C* is actually floor(C*) in this case
1728 // C* and f* need shifting and masking, as shown by
1729 // shiftright128[] and maskhigh128[]
1731 // kx = 10^(-x) = ten2mk128[ind - 1]
1732 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
1733 // the approximation of 10^(-x) was rounded up to 118 bits
1734 __mul_128x128_to_256 (P256
, C1
, ten2mk128
[ind
- 1]);
1735 if (ind
- 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
1736 res
.w
[1] = P256
.w
[3];
1737 res
.w
[0] = P256
.w
[2];
1738 } else if (ind
- 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
1739 shift
= shiftright128
[ind
- 1]; // 3 <= shift <= 63
1740 res
.w
[1] = (P256
.w
[3] >> shift
);
1741 res
.w
[0] = (P256
.w
[3] << (64 - shift
)) | (P256
.w
[2] >> shift
);
1742 } else { // 22 <= ind - 1 <= 33
1743 shift
= shiftright128
[ind
- 1] - 64; // 2 <= shift <= 38
1745 res
.w
[0] = P256
.w
[3] >> shift
;
1747 res
.w
[1] = x_sign
| 0x3040000000000000ull
| res
.w
[1];
1749 } else { // if exp < 0 and q + exp <= 0 the result is +0 or -0
1750 res
.w
[1] = x_sign
| 0x3040000000000000ull
;
1751 res
.w
[0] = 0x0000000000000000ull
;
1756 /*****************************************************************************
1757 * BID128_round_integral_nearest_away
1758 ****************************************************************************/
1760 BID128_FUNCTION_ARG1_NORND (bid128_round_integral_nearest_away
, x
)
1765 int exp
; // unbiased exponent
1766 // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo
1769 BID_UI64DOUBLE tmp1
;
1770 unsigned int x_nr_bits
;
1773 // UINT128 res is C* at first - represents up to 34 decimal digits ~
1778 // check for NaN or Infinity
1779 if ((x
.w
[1] & MASK_SPECIAL
) == MASK_SPECIAL
) {
1781 if ((x
.w
[1] & MASK_NAN
) == MASK_NAN
) { // x is NAN
1782 // if x = NaN, then res = Q (x)
1783 // check first for non-canonical NaN payload
1784 if (((x
.w
[1] & 0x00003fffffffffffull
) > 0x0000314dc6448d93ull
) ||
1785 (((x
.w
[1] & 0x00003fffffffffffull
) == 0x0000314dc6448d93ull
) &&
1786 (x
.w
[0] > 0x38c15b09ffffffffull
))) {
1787 x
.w
[1] = x
.w
[1] & 0xffffc00000000000ull
;
1790 if ((x
.w
[1] & MASK_SNAN
) == MASK_SNAN
) { // x is SNAN
1792 *pfpsf
|= INVALID_EXCEPTION
;
1794 res
.w
[1] = x
.w
[1] & 0xfc003fffffffffffull
; // clear out also G[6]-G[16]
1796 } else { // x is QNaN
1798 res
.w
[1] = x
.w
[1] & 0xfc003fffffffffffull
; // clear out G[6]-G[16]
1802 } else { // x is not a NaN, so it must be infinity
1803 if ((x
.w
[1] & MASK_SIGN
) == 0x0ull
) { // x is +inf
1805 res
.w
[1] = 0x7800000000000000ull
;
1806 res
.w
[0] = 0x0000000000000000ull
;
1807 } else { // x is -inf
1809 res
.w
[1] = 0xf800000000000000ull
;
1810 res
.w
[0] = 0x0000000000000000ull
;
1816 x_sign
= x
.w
[1] & MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
1817 C1
.w
[1] = x
.w
[1] & MASK_COEFF
;
1820 // check for non-canonical values (treated as zero)
1821 if ((x
.w
[1] & 0x6000000000000000ull
) == 0x6000000000000000ull
) { // G0_G1=11
1823 x_exp
= (x
.w
[1] << 2) & MASK_EXP
; // biased and shifted left 49 bits
1824 C1
.w
[1] = 0; // significand high
1825 C1
.w
[0] = 0; // significand low
1826 } else { // G0_G1 != 11
1827 x_exp
= x
.w
[1] & MASK_EXP
; // biased and shifted left 49 bits
1828 if (C1
.w
[1] > 0x0001ed09bead87c0ull
||
1829 (C1
.w
[1] == 0x0001ed09bead87c0ull
1830 && C1
.w
[0] > 0x378d8e63ffffffffull
)) {
1831 // x is non-canonical if coefficient is larger than 10^34 -1
1834 } else { // canonical
1839 // test for input equal to zero
1840 if ((C1
.w
[1] == 0x0ull
) && (C1
.w
[0] == 0x0ull
)) {
1842 // return 0 preserving the sign bit and the preferred exponent
1844 if (x_exp
<= (0x1820ull
<< 49)) {
1845 res
.w
[1] = (x
.w
[1] & 0x8000000000000000ull
) | 0x3040000000000000ull
;
1847 res
.w
[1] = x_sign
| x_exp
;
1849 res
.w
[0] = 0x0000000000000000ull
;
1852 // x is not special and is not zero
1854 // if (exp <= -(p+1)) return 0.0
1855 if (x_exp
<= 0x2ffa000000000000ull
) { // 0x2ffa000000000000ull == -35
1856 res
.w
[1] = x_sign
| 0x3040000000000000ull
;
1857 res
.w
[0] = 0x0000000000000000ull
;
1860 // q = nr. of decimal digits in x
1861 // determine first the nr. of bits in x
1863 if (C1
.w
[0] >= 0x0020000000000000ull
) { // x >= 2^53
1864 // split the 64-bit value in two 32-bit halves to avoid rounding errors
1865 if (C1
.w
[0] >= 0x0000000100000000ull
) { // x >= 2^32
1866 tmp1
.d
= (double) (C1
.w
[0] >> 32); // exact conversion
1868 33 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1869 } else { // x < 2^32
1870 tmp1
.d
= (double) (C1
.w
[0]); // exact conversion
1872 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1874 } else { // if x < 2^53
1875 tmp1
.d
= (double) C1
.w
[0]; // exact conversion
1877 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1879 } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
1880 tmp1
.d
= (double) C1
.w
[1]; // exact conversion
1882 65 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1885 q
= nr_digits
[x_nr_bits
- 1].digits
;
1887 q
= nr_digits
[x_nr_bits
- 1].digits1
;
1888 if (C1
.w
[1] > nr_digits
[x_nr_bits
- 1].threshold_hi
||
1889 (C1
.w
[1] == nr_digits
[x_nr_bits
- 1].threshold_hi
&&
1890 C1
.w
[0] >= nr_digits
[x_nr_bits
- 1].threshold_lo
))
1893 exp
= (x_exp
>> 49) - 6176;
1894 if (exp
>= 0) { // -exp <= 0
1895 // the argument is an integer already
1899 } else if ((q
+ exp
) >= 0) { // exp < 0 and 1 <= -exp <= q
1900 // need to shift right -exp digits from the coefficient; the exp will be 0
1901 ind
= -exp
; // 1 <= ind <= 34; ind is a synonym for 'x'
1902 // chop off ind digits from the lower part of C1
1903 // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits
1906 C1
.w
[0] = C1
.w
[0] + midpoint64
[ind
- 1];
1908 C1
.w
[0] = C1
.w
[0] + midpoint128
[ind
- 20].w
[0];
1909 C1
.w
[1] = C1
.w
[1] + midpoint128
[ind
- 20].w
[1];
1911 if (C1
.w
[0] < tmp64
)
1913 // calculate C* and f*
1914 // C* is actually floor(C*) in this case
1915 // C* and f* need shifting and masking, as shown by
1916 // shiftright128[] and maskhigh128[]
1918 // kx = 10^(-x) = ten2mk128[ind - 1]
1919 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
1920 // the approximation of 10^(-x) was rounded up to 118 bits
1921 __mul_128x128_to_256 (P256
, C1
, ten2mk128
[ind
- 1]);
1922 // the top Ex bits of 10^(-x) are T* = ten2mk128trunc[ind], e.g.
1923 // if x=1, T*=ten2mk128trunc[0]=0x19999999999999999999999999999999
1924 // if (0 < f* < 10^(-x)) then the result is a midpoint
1925 // if floor(C*) is even then C* = floor(C*) - logical right
1926 // shift; C* has p decimal digits, correct by Prop. 1)
1927 // else if floor(C*) is odd C* = floor(C*)-1 (logical right
1928 // shift; C* has p decimal digits, correct by Pr. 1)
1930 // C* = floor(C*) (logical right shift; C has p decimal digits,
1931 // correct by Property 1)
1932 // n = C* * 10^(e+x)
1934 // shift right C* by Ex-128 = shiftright128[ind]
1935 if (ind
- 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
1936 res
.w
[1] = P256
.w
[3];
1937 res
.w
[0] = P256
.w
[2];
1938 } else if (ind
- 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
1939 shift
= shiftright128
[ind
- 1]; // 3 <= shift <= 63
1940 res
.w
[0] = (P256
.w
[3] << (64 - shift
)) | (P256
.w
[2] >> shift
);
1941 res
.w
[1] = (P256
.w
[3] >> shift
);
1942 } else { // 22 <= ind - 1 <= 33
1943 shift
= shiftright128
[ind
- 1]; // 2 <= shift <= 38
1945 res
.w
[0] = (P256
.w
[3] >> (shift
- 64)); // 2 <= shift - 64 <= 38
1947 // if the result was a midpoint, it was already rounded away from zero
1948 res
.w
[1] |= x_sign
| 0x3040000000000000ull
;
1950 } else { // if ((q + exp) < 0) <=> q < -exp
1951 // the result is +0 or -0
1952 res
.w
[1] = x_sign
| 0x3040000000000000ull
;
1953 res
.w
[0] = 0x0000000000000000ull
;