1 /* Copyright (C) 2007 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 2, or (at your option) any later
10 In addition to the permissions in the GNU General Public License, the
11 Free Software Foundation gives you unlimited permission to link the
12 compiled version of this file into combinations with other programs,
13 and to distribute those combinations without any restriction coming
14 from the use of this file. (The General Public License restrictions
15 do apply in other respects; for example, they cover modification of
16 the file, and distribution when not linked into a combine
19 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
20 WARRANTY; without even the implied warranty of MERCHANTABILITY or
21 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
24 You should have received a copy of the GNU General Public License
25 along with GCC; see the file COPYING. If not, write to the Free
26 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
29 #include "bid_internal.h"
31 /*****************************************************************************
32 * BID128_to_uint32_rnint
33 ****************************************************************************/
35 BID128_FUNCTION_ARG1_NORND_CUSTOMRESTYPE (unsigned int,
36 bid128_to_uint32_rnint
, x
)
41 int exp
; // unbiased exponent
42 // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
45 unsigned int x_nr_bits
;
48 UINT128 Cstar
; // C* represents up to 34 decimal digits ~ 113 bits
53 x_sign
= x
.w
[1] & MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
54 x_exp
= x
.w
[1] & MASK_EXP
; // biased and shifted left 49 bit positions
55 C1
.w
[1] = x
.w
[1] & MASK_COEFF
;
58 // check for NaN or Infinity
59 if ((x
.w
[1] & MASK_SPECIAL
) == MASK_SPECIAL
) {
61 if ((x
.w
[1] & MASK_NAN
) == MASK_NAN
) { // x is NAN
62 if ((x
.w
[1] & MASK_SNAN
) == MASK_SNAN
) { // x is SNAN
64 *pfpsf
|= INVALID_EXCEPTION
;
65 // return Integer Indefinite
69 *pfpsf
|= INVALID_EXCEPTION
;
70 // return Integer Indefinite
74 } else { // x is not a NaN, so it must be infinity
75 if (!x_sign
) { // x is +inf
77 *pfpsf
|= INVALID_EXCEPTION
;
78 // return Integer Indefinite
82 *pfpsf
|= INVALID_EXCEPTION
;
83 // return Integer Indefinite
89 // check for non-canonical values (after the check for special values)
90 if ((C1
.w
[1] > 0x0001ed09bead87c0ull
)
91 || (C1
.w
[1] == 0x0001ed09bead87c0ull
92 && (C1
.w
[0] > 0x378d8e63ffffffffull
))
93 || ((x
.w
[1] & 0x6000000000000000ull
) == 0x6000000000000000ull
)) {
96 } else if ((C1
.w
[1] == 0x0ull
) && (C1
.w
[0] == 0x0ull
)) {
100 } else { // x is not special and is not zero
102 // q = nr. of decimal digits in x
103 // determine first the nr. of bits in x
105 if (C1
.w
[0] >= 0x0020000000000000ull
) { // x >= 2^53
106 // split the 64-bit value in two 32-bit halves to avoid rounding errors
107 if (C1
.w
[0] >= 0x0000000100000000ull
) { // x >= 2^32
108 tmp1
.d
= (double) (C1
.w
[0] >> 32); // exact conversion
110 33 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
112 tmp1
.d
= (double) (C1
.w
[0]); // exact conversion
114 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
116 } else { // if x < 2^53
117 tmp1
.d
= (double) C1
.w
[0]; // exact conversion
119 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
121 } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
122 tmp1
.d
= (double) C1
.w
[1]; // exact conversion
124 65 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
126 q
= nr_digits
[x_nr_bits
- 1].digits
;
128 q
= nr_digits
[x_nr_bits
- 1].digits1
;
129 if (C1
.w
[1] > nr_digits
[x_nr_bits
- 1].threshold_hi
130 || (C1
.w
[1] == nr_digits
[x_nr_bits
- 1].threshold_hi
131 && C1
.w
[0] >= nr_digits
[x_nr_bits
- 1].threshold_lo
))
134 exp
= (x_exp
>> 49) - 6176;
135 if ((q
+ exp
) > 10) { // x >= 10^10 ~= 2^33.2... (cannot fit in 32 bits)
137 *pfpsf
|= INVALID_EXCEPTION
;
138 // return Integer Indefinite
141 } else if ((q
+ exp
) == 10) { // x = c(0)c(1)...c(9).c(10)...c(q-1)
142 // in this case 2^29.89... ~= 10^9 <= x < 10^10 ~= 2^33.2...
143 // so x rounded to an integer may or may not fit in an unsigned 32-bit int
144 // the cases that do not fit are identified here; the ones that fit
145 // fall through and will be handled with other cases further,
146 // under '1 <= q + exp <= 10'
147 if (x_sign
) { // if n < 0 and q + exp = 10
148 // if n < -1/2 then n cannot be converted to uint32 with RN
149 // too large if c(0)c(1)...c(9).c(10)...c(q-1) > 1/2
150 // <=> 0.c(0)c(1)...c(q-1) * 10^11 > 0x05, 1<=q<=34
152 tmp64
= C1
.w
[0] * ten2k64
[11 - q
]; // C scaled up to 11-digit int
153 // c(0)c(1)...c(9)c(10) or c(0)c(1)...c(q-1)0...0 (11 digits)
154 if (tmp64
> 0x05ull
) {
156 *pfpsf
|= INVALID_EXCEPTION
;
157 // return Integer Indefinite
161 // else cases that can be rounded to a 32-bit int fall through
162 // to '1 <= q + exp <= 10'
163 } else { // if (q > 11), i.e. 12 <= q <= 34 and so -24 <= exp <= -2
164 // 0.c(0)c(1)...c(q-1) * 10^11 > 0x05 <=>
165 // C > 0x05 * 10^(q-11) where 1 <= q - 11 <= 23
168 if (q
- 11 <= 19) { // 1 <= q - 11 <= 19; 10^(q-11) requires 64 bits
169 __mul_64x64_to_128MACH (C
, tmp64
, ten2k64
[q
- 11]);
170 } else { // 20 <= q - 11 <= 23, and 10^(q-11) requires 128 bits
171 __mul_128x64_to_128 (C
, tmp64
, ten2k128
[q
- 31]);
173 if (C1
.w
[1] > C
.w
[1] || (C1
.w
[1] == C
.w
[1] && C1
.w
[0] > C
.w
[0])) {
175 *pfpsf
|= INVALID_EXCEPTION
;
176 // return Integer Indefinite
180 // else cases that can be rounded to a 32-bit int fall through
181 // to '1 <= q + exp <= 10'
183 } else { // if n > 0 and q + exp = 10
184 // if n >= 2^32 - 1/2 then n is too large
185 // too large if c(0)c(1)...c(9).c(10)...c(q-1) >= 2^32-1/2
186 // too large if 0.c(0)c(1)...c(q-1) * 10^11 >= 0x9fffffffb, 1<=q<=34
188 tmp64
= C1
.w
[0] * ten2k64
[11 - q
]; // C scaled up to 11-digit int
189 // c(0)c(1)...c(9)c(10) or c(0)c(1)...c(q-1)0...0 (11 digits)
190 if (tmp64
>= 0x9fffffffbull
) {
192 *pfpsf
|= INVALID_EXCEPTION
;
193 // return Integer Indefinite
197 // else cases that can be rounded to a 32-bit int fall through
198 // to '1 <= q + exp <= 10'
199 } else { // if (q > 11), i.e. 12 <= q <= 34 and so -24 <= exp <= -2
200 // 0.c(0)c(1)...c(q-1) * 10^11 >= 0x9fffffffb <=>
201 // C >= 0x9fffffffb * 10^(q-11) where 1 <= q - 11 <= 23
202 // (scale 2^32-1/2 up)
203 tmp64
= 0x9fffffffbull
;
204 if (q
- 11 <= 19) { // 1 <= q - 11 <= 19; 10^(q-11) requires 64 bits
205 __mul_64x64_to_128MACH (C
, tmp64
, ten2k64
[q
- 11]);
206 } else { // 20 <= q - 11 <= 23, and 10^(q-11) requires 128 bits
207 __mul_128x64_to_128 (C
, tmp64
, ten2k128
[q
- 31]);
210 || (C1
.w
[1] == C
.w
[1] && C1
.w
[0] >= C
.w
[0])) {
212 *pfpsf
|= INVALID_EXCEPTION
;
213 // return Integer Indefinite
217 // else cases that can be rounded to a 32-bit int fall through
218 // to '1 <= q + exp <= 10'
222 // n is not too large to be converted to int32: -1/2 <= n < 2^32 - 1/2
223 // Note: some of the cases tested for above fall through to this point
224 if ((q
+ exp
) < 0) { // n = +/-0.0...c(0)c(1)...c(q-1)
228 } else if ((q
+ exp
) == 0) { // n = +/-0.c(0)c(1)...c(q-1)
229 // if 0.c(0)c(1)...c(q-1) <= 0.5 <=> c(0)c(1)...c(q-1) <= 5 * 10^(q-1)
236 if (ind
<= 18) { // 0 <= ind <= 18
237 if ((C1
.w
[1] == 0) && (C1
.w
[0] <= midpoint64
[ind
])) {
238 res
= 0x00000000; // return 0
239 } else if (!x_sign
) { // n > 0
240 res
= 0x00000001; // return +1
243 *pfpsf
|= INVALID_EXCEPTION
;
245 } else { // 19 <= ind <= 33
246 if ((C1
.w
[1] < midpoint128
[ind
- 19].w
[1])
247 || ((C1
.w
[1] == midpoint128
[ind
- 19].w
[1])
248 && (C1
.w
[0] <= midpoint128
[ind
- 19].w
[0]))) {
249 res
= 0x00000000; // return 0
250 } else if (!x_sign
) { // n > 0
251 res
= 0x00000001; // return +1
254 *pfpsf
|= INVALID_EXCEPTION
;
257 } else { // if (1 <= q + exp <= 10, 1 <= q <= 34, -33 <= exp <= 9)
258 if (x_sign
) { // x <= -1
260 *pfpsf
|= INVALID_EXCEPTION
;
261 // return Integer Indefinite
265 // 1 <= x < 2^32-1/2 so x can be rounded
266 // to nearest to a 32-bit unsigned integer
267 if (exp
< 0) { // 2 <= q <= 34, -33 <= exp <= -1, 1 <= q + exp <= 10
268 ind
= -exp
; // 1 <= ind <= 33; ind is a synonym for 'x'
269 // chop off ind digits from the lower part of C1
270 // C1 = C1 + 1/2 * 10^ind where the result C1 fits in 127 bits
273 C1
.w
[0] = C1
.w
[0] + midpoint64
[ind
- 1];
275 C1
.w
[0] = C1
.w
[0] + midpoint128
[ind
- 20].w
[0];
276 C1
.w
[1] = C1
.w
[1] + midpoint128
[ind
- 20].w
[1];
280 // calculate C* and f*
281 // C* is actually floor(C*) in this case
282 // C* and f* need shifting and masking, as shown by
283 // shiftright128[] and maskhigh128[]
285 // kx = 10^(-x) = ten2mk128[ind - 1]
286 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
287 // the approximation of 10^(-x) was rounded up to 118 bits
288 __mul_128x128_to_256 (P256
, C1
, ten2mk128
[ind
- 1]);
289 if (ind
- 1 <= 21) { // 0 <= ind - 1 <= 21
290 Cstar
.w
[1] = P256
.w
[3];
291 Cstar
.w
[0] = P256
.w
[2];
293 fstar
.w
[2] = P256
.w
[2] & maskhigh128
[ind
- 1];
294 fstar
.w
[1] = P256
.w
[1];
295 fstar
.w
[0] = P256
.w
[0];
296 } else { // 22 <= ind - 1 <= 33
298 Cstar
.w
[0] = P256
.w
[3];
299 fstar
.w
[3] = P256
.w
[3] & maskhigh128
[ind
- 1];
300 fstar
.w
[2] = P256
.w
[2];
301 fstar
.w
[1] = P256
.w
[1];
302 fstar
.w
[0] = P256
.w
[0];
304 // the top Ex bits of 10^(-x) are T* = ten2mk128trunc[ind], e.g.
305 // if x=1, T*=ten2mk128trunc[0]=0x19999999999999999999999999999999
306 // if (0 < f* < 10^(-x)) then the result is a midpoint
307 // if floor(C*) is even then C* = floor(C*) - logical right
308 // shift; C* has p decimal digits, correct by Prop. 1)
309 // else if floor(C*) is odd C* = floor(C*)-1 (logical right
310 // shift; C* has p decimal digits, correct by Pr. 1)
312 // C* = floor(C*) (logical right shift; C has p decimal digits,
313 // correct by Property 1)
316 // shift right C* by Ex-128 = shiftright128[ind]
317 shift
= shiftright128
[ind
- 1]; // 0 <= shift <= 102
318 if (ind
- 1 <= 21) { // 0 <= ind - 1 <= 21
320 (Cstar
.w
[0] >> shift
) | (Cstar
.w
[1] << (64 - shift
));
321 // redundant, it will be 0! Cstar.w[1] = (Cstar.w[1] >> shift);
322 } else { // 22 <= ind - 1 <= 33
323 Cstar
.w
[0] = (Cstar
.w
[0] >> (shift
- 64)); // 2 <= shift - 64 <= 38
325 // if the result was a midpoint it was rounded away from zero, so
326 // it will need a correction
327 // check for midpoints
328 if ((fstar
.w
[3] == 0) && (fstar
.w
[2] == 0)
329 && (fstar
.w
[1] || fstar
.w
[0])
330 && (fstar
.w
[1] < ten2mk128trunc
[ind
- 1].w
[1]
331 || (fstar
.w
[1] == ten2mk128trunc
[ind
- 1].w
[1]
332 && fstar
.w
[0] <= ten2mk128trunc
[ind
- 1].w
[0]))) {
333 // the result is a midpoint; round to nearest
334 if (Cstar
.w
[0] & 0x01) { // Cstar.w[0] is odd; MP in [EVEN, ODD]
335 // if floor(C*) is odd C = floor(C*) - 1; the result >= 1
336 Cstar
.w
[0]--; // Cstar.w[0] is now even
337 } // else MP in [ODD, EVEN]
339 res
= Cstar
.w
[0]; // the result is positive
340 } else if (exp
== 0) {
344 } else { // if (exp > 0) => 1 <= exp <= 9, 1 <= q < 9, 2 <= q + exp <= 10
345 // res = C * 10^exp (exact)
346 res
= C1
.w
[0] * ten2k64
[exp
];
354 /*****************************************************************************
355 * BID128_to_uint32_xrnint
356 ****************************************************************************/
358 BID128_FUNCTION_ARG1_NORND_CUSTOMRESTYPE (unsigned int,
359 bid128_to_uint32_xrnint
, x
)
364 int exp
; // unbiased exponent
365 // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
366 UINT64 tmp64
, tmp64A
;
368 unsigned int x_nr_bits
;
371 UINT128 Cstar
; // C* represents up to 34 decimal digits ~ 113 bits
374 unsigned int tmp_inexact
= 0;
377 x_sign
= x
.w
[1] & MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
378 x_exp
= x
.w
[1] & MASK_EXP
; // biased and shifted left 49 bit positions
379 C1
.w
[1] = x
.w
[1] & MASK_COEFF
;
382 // check for NaN or Infinity
383 if ((x
.w
[1] & MASK_SPECIAL
) == MASK_SPECIAL
) {
385 if ((x
.w
[1] & MASK_NAN
) == MASK_NAN
) { // x is NAN
386 if ((x
.w
[1] & MASK_SNAN
) == MASK_SNAN
) { // x is SNAN
388 *pfpsf
|= INVALID_EXCEPTION
;
389 // return Integer Indefinite
391 } else { // x is QNaN
393 *pfpsf
|= INVALID_EXCEPTION
;
394 // return Integer Indefinite
398 } else { // x is not a NaN, so it must be infinity
399 if (!x_sign
) { // x is +inf
401 *pfpsf
|= INVALID_EXCEPTION
;
402 // return Integer Indefinite
404 } else { // x is -inf
406 *pfpsf
|= INVALID_EXCEPTION
;
407 // return Integer Indefinite
413 // check for non-canonical values (after the check for special values)
414 if ((C1
.w
[1] > 0x0001ed09bead87c0ull
)
415 || (C1
.w
[1] == 0x0001ed09bead87c0ull
416 && (C1
.w
[0] > 0x378d8e63ffffffffull
))
417 || ((x
.w
[1] & 0x6000000000000000ull
) == 0x6000000000000000ull
)) {
420 } else if ((C1
.w
[1] == 0x0ull
) && (C1
.w
[0] == 0x0ull
)) {
424 } else { // x is not special and is not zero
426 // q = nr. of decimal digits in x
427 // determine first the nr. of bits in x
429 if (C1
.w
[0] >= 0x0020000000000000ull
) { // x >= 2^53
430 // split the 64-bit value in two 32-bit halves to avoid rounding errors
431 if (C1
.w
[0] >= 0x0000000100000000ull
) { // x >= 2^32
432 tmp1
.d
= (double) (C1
.w
[0] >> 32); // exact conversion
434 33 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
436 tmp1
.d
= (double) (C1
.w
[0]); // exact conversion
438 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
440 } else { // if x < 2^53
441 tmp1
.d
= (double) C1
.w
[0]; // exact conversion
443 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
445 } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
446 tmp1
.d
= (double) C1
.w
[1]; // exact conversion
448 65 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
450 q
= nr_digits
[x_nr_bits
- 1].digits
;
452 q
= nr_digits
[x_nr_bits
- 1].digits1
;
453 if (C1
.w
[1] > nr_digits
[x_nr_bits
- 1].threshold_hi
454 || (C1
.w
[1] == nr_digits
[x_nr_bits
- 1].threshold_hi
455 && C1
.w
[0] >= nr_digits
[x_nr_bits
- 1].threshold_lo
))
458 exp
= (x_exp
>> 49) - 6176;
459 if ((q
+ exp
) > 10) { // x >= 10^10 ~= 2^33.2... (cannot fit in 32 bits)
461 *pfpsf
|= INVALID_EXCEPTION
;
462 // return Integer Indefinite
465 } else if ((q
+ exp
) == 10) { // x = c(0)c(1)...c(9).c(10)...c(q-1)
466 // in this case 2^29.89... ~= 10^9 <= x < 10^10 ~= 2^33.2...
467 // so x rounded to an integer may or may not fit in an unsigned 32-bit int
468 // the cases that do not fit are identified here; the ones that fit
469 // fall through and will be handled with other cases further,
470 // under '1 <= q + exp <= 10'
471 if (x_sign
) { // if n < 0 and q + exp = 10
472 // if n < -1/2 then n cannot be converted to uint32 with RN
473 // too large if c(0)c(1)...c(9).c(10)...c(q-1) > 1/2
474 // <=> 0.c(0)c(1)...c(q-1) * 10^11 > 0x05, 1<=q<=34
476 tmp64
= C1
.w
[0] * ten2k64
[11 - q
]; // C scaled up to 11-digit int
477 // c(0)c(1)...c(9)c(10) or c(0)c(1)...c(q-1)0...0 (11 digits)
478 if (tmp64
> 0x05ull
) {
480 *pfpsf
|= INVALID_EXCEPTION
;
481 // return Integer Indefinite
485 // else cases that can be rounded to a 32-bit int fall through
486 // to '1 <= q + exp <= 10'
487 } else { // if (q > 11), i.e. 12 <= q <= 34 and so -24 <= exp <= -2
488 // 0.c(0)c(1)...c(q-1) * 10^11 > 0x05 <=>
489 // C > 0x05 * 10^(q-11) where 1 <= q - 11 <= 23
492 if (q
- 11 <= 19) { // 1 <= q - 11 <= 19; 10^(q-11) requires 64 bits
493 __mul_64x64_to_128MACH (C
, tmp64
, ten2k64
[q
- 11]);
494 } else { // 20 <= q - 11 <= 23, and 10^(q-11) requires 128 bits
495 __mul_128x64_to_128 (C
, tmp64
, ten2k128
[q
- 31]);
497 if (C1
.w
[1] > C
.w
[1] || (C1
.w
[1] == C
.w
[1] && C1
.w
[0] > C
.w
[0])) {
499 *pfpsf
|= INVALID_EXCEPTION
;
500 // return Integer Indefinite
504 // else cases that can be rounded to a 32-bit int fall through
505 // to '1 <= q + exp <= 10'
507 } else { // if n > 0 and q + exp = 10
508 // if n >= 2^32 - 1/2 then n is too large
509 // too large if c(0)c(1)...c(9).c(10)...c(q-1) >= 2^32-1/2
510 // too large if 0.c(0)c(1)...c(q-1) * 10^11 >= 0x9fffffffb, 1<=q<=34
512 tmp64
= C1
.w
[0] * ten2k64
[11 - q
]; // C scaled up to 11-digit int
513 // c(0)c(1)...c(9)c(10) or c(0)c(1)...c(q-1)0...0 (11 digits)
514 if (tmp64
>= 0x9fffffffbull
) {
516 *pfpsf
|= INVALID_EXCEPTION
;
517 // return Integer Indefinite
521 // else cases that can be rounded to a 32-bit int fall through
522 // to '1 <= q + exp <= 10'
523 } else { // if (q > 11), i.e. 12 <= q <= 34 and so -24 <= exp <= -2
524 // 0.c(0)c(1)...c(q-1) * 10^11 >= 0x9fffffffb <=>
525 // C >= 0x9fffffffb * 10^(q-11) where 1 <= q - 11 <= 23
526 // (scale 2^32-1/2 up)
527 tmp64
= 0x9fffffffbull
;
528 if (q
- 11 <= 19) { // 1 <= q - 11 <= 19; 10^(q-11) requires 64 bits
529 __mul_64x64_to_128MACH (C
, tmp64
, ten2k64
[q
- 11]);
530 } else { // 20 <= q - 11 <= 23, and 10^(q-11) requires 128 bits
531 __mul_128x64_to_128 (C
, tmp64
, ten2k128
[q
- 31]);
534 || (C1
.w
[1] == C
.w
[1] && C1
.w
[0] >= C
.w
[0])) {
536 *pfpsf
|= INVALID_EXCEPTION
;
537 // return Integer Indefinite
541 // else cases that can be rounded to a 32-bit int fall through
542 // to '1 <= q + exp <= 10'
546 // n is not too large to be converted to int32: -1/2 <= n < 2^32 - 1/2
547 // Note: some of the cases tested for above fall through to this point
548 if ((q
+ exp
) < 0) { // n = +/-0.0...c(0)c(1)...c(q-1)
550 *pfpsf
|= INEXACT_EXCEPTION
;
554 } else if ((q
+ exp
) == 0) { // n = +/-0.c(0)c(1)...c(q-1)
555 // if 0.c(0)c(1)...c(q-1) <= 0.5 <=> c(0)c(1)...c(q-1) <= 5 * 10^(q-1)
562 if (ind
<= 18) { // 0 <= ind <= 18
563 if ((C1
.w
[1] == 0) && (C1
.w
[0] <= midpoint64
[ind
])) {
564 res
= 0x00000000; // return 0
565 } else if (!x_sign
) { // n > 0
566 res
= 0x00000001; // return +1
569 *pfpsf
|= INVALID_EXCEPTION
;
572 } else { // 19 <= ind <= 33
573 if ((C1
.w
[1] < midpoint128
[ind
- 19].w
[1])
574 || ((C1
.w
[1] == midpoint128
[ind
- 19].w
[1])
575 && (C1
.w
[0] <= midpoint128
[ind
- 19].w
[0]))) {
576 res
= 0x00000000; // return 0
577 } else if (!x_sign
) { // n > 0
578 res
= 0x00000001; // return +1
581 *pfpsf
|= INVALID_EXCEPTION
;
586 *pfpsf
|= INEXACT_EXCEPTION
;
587 } else { // if (1 <= q + exp <= 10, 1 <= q <= 34, -33 <= exp <= 9)
588 if (x_sign
) { // x <= -1
590 *pfpsf
|= INVALID_EXCEPTION
;
591 // return Integer Indefinite
595 // 1 <= x < 2^32-1/2 so x can be rounded
596 // to nearest to a 32-bit unsigned integer
597 if (exp
< 0) { // 2 <= q <= 34, -33 <= exp <= -1, 1 <= q + exp <= 10
598 ind
= -exp
; // 1 <= ind <= 33; ind is a synonym for 'x'
599 // chop off ind digits from the lower part of C1
600 // C1 = C1 + 1/2 * 10^ind where the result C1 fits in 127 bits
603 C1
.w
[0] = C1
.w
[0] + midpoint64
[ind
- 1];
605 C1
.w
[0] = C1
.w
[0] + midpoint128
[ind
- 20].w
[0];
606 C1
.w
[1] = C1
.w
[1] + midpoint128
[ind
- 20].w
[1];
610 // calculate C* and f*
611 // C* is actually floor(C*) in this case
612 // C* and f* need shifting and masking, as shown by
613 // shiftright128[] and maskhigh128[]
615 // kx = 10^(-x) = ten2mk128[ind - 1]
616 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
617 // the approximation of 10^(-x) was rounded up to 118 bits
618 __mul_128x128_to_256 (P256
, C1
, ten2mk128
[ind
- 1]);
619 if (ind
- 1 <= 21) { // 0 <= ind - 1 <= 21
620 Cstar
.w
[1] = P256
.w
[3];
621 Cstar
.w
[0] = P256
.w
[2];
623 fstar
.w
[2] = P256
.w
[2] & maskhigh128
[ind
- 1];
624 fstar
.w
[1] = P256
.w
[1];
625 fstar
.w
[0] = P256
.w
[0];
626 } else { // 22 <= ind - 1 <= 33
628 Cstar
.w
[0] = P256
.w
[3];
629 fstar
.w
[3] = P256
.w
[3] & maskhigh128
[ind
- 1];
630 fstar
.w
[2] = P256
.w
[2];
631 fstar
.w
[1] = P256
.w
[1];
632 fstar
.w
[0] = P256
.w
[0];
634 // the top Ex bits of 10^(-x) are T* = ten2mk128trunc[ind], e.g.
635 // if x=1, T*=ten2mk128trunc[0]=0x19999999999999999999999999999999
636 // if (0 < f* < 10^(-x)) then the result is a midpoint
637 // if floor(C*) is even then C* = floor(C*) - logical right
638 // shift; C* has p decimal digits, correct by Prop. 1)
639 // else if floor(C*) is odd C* = floor(C*)-1 (logical right
640 // shift; C* has p decimal digits, correct by Pr. 1)
642 // C* = floor(C*) (logical right shift; C has p decimal digits,
643 // correct by Property 1)
646 // shift right C* by Ex-128 = shiftright128[ind]
647 shift
= shiftright128
[ind
- 1]; // 0 <= shift <= 102
648 if (ind
- 1 <= 21) { // 0 <= ind - 1 <= 21
650 (Cstar
.w
[0] >> shift
) | (Cstar
.w
[1] << (64 - shift
));
651 // redundant, it will be 0! Cstar.w[1] = (Cstar.w[1] >> shift);
652 } else { // 22 <= ind - 1 <= 33
653 Cstar
.w
[0] = (Cstar
.w
[0] >> (shift
- 64)); // 2 <= shift - 64 <= 38
655 // determine inexactness of the rounding of C*
656 // if (0 < f* - 1/2 < 10^(-x)) then
657 // the result is exact
658 // else // if (f* - 1/2 > T*) then
659 // the result is inexact
661 if (fstar
.w
[1] > 0x8000000000000000ull
||
662 (fstar
.w
[1] == 0x8000000000000000ull
663 && fstar
.w
[0] > 0x0ull
)) {
664 // f* > 1/2 and the result may be exact
665 tmp64
= fstar
.w
[1] - 0x8000000000000000ull
; // f* - 1/2
666 if (tmp64
> ten2mk128trunc
[ind
- 1].w
[1]
667 || (tmp64
== ten2mk128trunc
[ind
- 1].w
[1]
668 && fstar
.w
[0] >= ten2mk128trunc
[ind
- 1].w
[0])) {
669 // set the inexact flag
670 // *pfpsf |= INEXACT_EXCEPTION;
672 } // else the result is exact
673 } else { // the result is inexact; f2* <= 1/2
674 // set the inexact flag
675 // *pfpsf |= INEXACT_EXCEPTION;
678 } else if (ind
- 1 <= 21) { // if 3 <= ind <= 21
679 if (fstar
.w
[3] > 0x0 ||
680 (fstar
.w
[3] == 0x0 && fstar
.w
[2] > onehalf128
[ind
- 1]) ||
681 (fstar
.w
[3] == 0x0 && fstar
.w
[2] == onehalf128
[ind
- 1] &&
682 (fstar
.w
[1] || fstar
.w
[0]))) {
683 // f2* > 1/2 and the result may be exact
684 // Calculate f2* - 1/2
685 tmp64
= fstar
.w
[2] - onehalf128
[ind
- 1];
687 if (tmp64
> fstar
.w
[2])
690 || fstar
.w
[1] > ten2mk128trunc
[ind
- 1].w
[1]
691 || (fstar
.w
[1] == ten2mk128trunc
[ind
- 1].w
[1]
692 && fstar
.w
[0] > ten2mk128trunc
[ind
- 1].w
[0])) {
693 // set the inexact flag
694 // *pfpsf |= INEXACT_EXCEPTION;
696 } // else the result is exact
697 } else { // the result is inexact; f2* <= 1/2
698 // set the inexact flag
699 // *pfpsf |= INEXACT_EXCEPTION;
702 } else { // if 22 <= ind <= 33
703 if (fstar
.w
[3] > onehalf128
[ind
- 1] ||
704 (fstar
.w
[3] == onehalf128
[ind
- 1] &&
705 (fstar
.w
[2] || fstar
.w
[1] || fstar
.w
[0]))) {
706 // f2* > 1/2 and the result may be exact
707 // Calculate f2* - 1/2
708 tmp64
= fstar
.w
[3] - onehalf128
[ind
- 1];
709 if (tmp64
|| fstar
.w
[2]
710 || fstar
.w
[1] > ten2mk128trunc
[ind
- 1].w
[1]
711 || (fstar
.w
[1] == ten2mk128trunc
[ind
- 1].w
[1]
712 && fstar
.w
[0] > ten2mk128trunc
[ind
- 1].w
[0])) {
713 // set the inexact flag
714 // *pfpsf |= INEXACT_EXCEPTION;
716 } // else the result is exact
717 } else { // the result is inexact; f2* <= 1/2
718 // set the inexact flag
719 // *pfpsf |= INEXACT_EXCEPTION;
724 // if the result was a midpoint it was rounded away from zero, so
725 // it will need a correction
726 // check for midpoints
727 if ((fstar
.w
[3] == 0) && (fstar
.w
[2] == 0)
728 && (fstar
.w
[1] || fstar
.w
[0])
729 && (fstar
.w
[1] < ten2mk128trunc
[ind
- 1].w
[1]
730 || (fstar
.w
[1] == ten2mk128trunc
[ind
- 1].w
[1]
731 && fstar
.w
[0] <= ten2mk128trunc
[ind
- 1].w
[0]))) {
732 // the result is a midpoint; round to nearest
733 if (Cstar
.w
[0] & 0x01) { // Cstar.w[0] is odd; MP in [EVEN, ODD]
734 // if floor(C*) is odd C = floor(C*) - 1; the result >= 1
735 Cstar
.w
[0]--; // Cstar.w[0] is now even
736 } // else MP in [ODD, EVEN]
738 res
= Cstar
.w
[0]; // the result is positive
740 *pfpsf
|= INEXACT_EXCEPTION
;
741 } else if (exp
== 0) {
745 } else { // if (exp > 0) => 1 <= exp <= 9, 1 <= q < 9, 2 <= q + exp <= 10
746 // res = C * 10^exp (exact)
747 res
= C1
.w
[0] * ten2k64
[exp
];
755 /*****************************************************************************
756 * BID128_to_uint32_floor
757 ****************************************************************************/
759 BID128_FUNCTION_ARG1_NORND_CUSTOMRESTYPE (unsigned int,
760 bid128_to_uint32_floor
, x
)
765 int exp
; // unbiased exponent
766 // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
767 UINT64 tmp64
, tmp64A
;
769 unsigned int x_nr_bits
;
772 UINT128 Cstar
; // C* represents up to 34 decimal digits ~ 113 bits
775 int is_inexact_gt_midpoint
= 0;
776 int is_midpoint_lt_even
= 0;
779 x_sign
= x
.w
[1] & MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
780 x_exp
= x
.w
[1] & MASK_EXP
; // biased and shifted left 49 bit positions
781 C1
.w
[1] = x
.w
[1] & MASK_COEFF
;
784 // check for NaN or Infinity
785 if ((x
.w
[1] & MASK_SPECIAL
) == MASK_SPECIAL
) {
787 if ((x
.w
[1] & MASK_NAN
) == MASK_NAN
) { // x is NAN
788 if ((x
.w
[1] & MASK_SNAN
) == MASK_SNAN
) { // x is SNAN
790 *pfpsf
|= INVALID_EXCEPTION
;
791 // return Integer Indefinite
793 } else { // x is QNaN
795 *pfpsf
|= INVALID_EXCEPTION
;
796 // return Integer Indefinite
800 } else { // x is not a NaN, so it must be infinity
801 if (!x_sign
) { // x is +inf
803 *pfpsf
|= INVALID_EXCEPTION
;
804 // return Integer Indefinite
806 } else { // x is -inf
808 *pfpsf
|= INVALID_EXCEPTION
;
809 // return Integer Indefinite
815 // check for non-canonical values (after the check for special values)
816 if ((C1
.w
[1] > 0x0001ed09bead87c0ull
)
817 || (C1
.w
[1] == 0x0001ed09bead87c0ull
818 && (C1
.w
[0] > 0x378d8e63ffffffffull
))
819 || ((x
.w
[1] & 0x6000000000000000ull
) == 0x6000000000000000ull
)) {
822 } else if ((C1
.w
[1] == 0x0ull
) && (C1
.w
[0] == 0x0ull
)) {
826 } else { // x is not special and is not zero
830 *pfpsf
|= INVALID_EXCEPTION
;
831 // return Integer Indefinite
835 // x > 0 from this point on
836 // q = nr. of decimal digits in x
837 // determine first the nr. of bits in x
839 if (C1
.w
[0] >= 0x0020000000000000ull
) { // x >= 2^53
840 // split the 64-bit value in two 32-bit halves to avoid rounding errors
841 if (C1
.w
[0] >= 0x0000000100000000ull
) { // x >= 2^32
842 tmp1
.d
= (double) (C1
.w
[0] >> 32); // exact conversion
844 33 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
846 tmp1
.d
= (double) (C1
.w
[0]); // exact conversion
848 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
850 } else { // if x < 2^53
851 tmp1
.d
= (double) C1
.w
[0]; // exact conversion
853 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
855 } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
856 tmp1
.d
= (double) C1
.w
[1]; // exact conversion
858 65 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
860 q
= nr_digits
[x_nr_bits
- 1].digits
;
862 q
= nr_digits
[x_nr_bits
- 1].digits1
;
863 if (C1
.w
[1] > nr_digits
[x_nr_bits
- 1].threshold_hi
864 || (C1
.w
[1] == nr_digits
[x_nr_bits
- 1].threshold_hi
865 && C1
.w
[0] >= nr_digits
[x_nr_bits
- 1].threshold_lo
))
868 exp
= (x_exp
>> 49) - 6176;
870 if ((q
+ exp
) > 10) { // x >= 10^10 ~= 2^33.2... (cannot fit in 32 bits)
872 *pfpsf
|= INVALID_EXCEPTION
;
873 // return Integer Indefinite
876 } else if ((q
+ exp
) == 10) { // x = c(0)c(1)...c(9).c(10)...c(q-1)
877 // in this case 2^29.89... ~= 10^9 <= x < 10^10 ~= 2^33.2...
878 // so x rounded to an integer may or may not fit in a signed 32-bit int
879 // the cases that do not fit are identified here; the ones that fit
880 // fall through and will be handled with other cases further,
881 // under '1 <= q + exp <= 10'
882 // n > 0 and q + exp = 10
883 // if n >= 2^32 then n is too large
884 // too large if c(0)c(1)...c(9).c(10)...c(q-1) >= 2^32
885 // too large if 0.c(0)c(1)...c(q-1) * 10^11 >= 0xa00000000, 1<=q<=34
887 tmp64
= C1
.w
[0] * ten2k64
[11 - q
]; // C scaled up to 11-digit int
888 // c(0)c(1)...c(9)c(10) or c(0)c(1)...c(q-1)0...0 (11 digits)
889 if (tmp64
>= 0xa00000000ull
) {
891 *pfpsf
|= INVALID_EXCEPTION
;
892 // return Integer Indefinite
896 // else cases that can be rounded to a 32-bit int fall through
897 // to '1 <= q + exp <= 10'
898 } else { // if (q > 11), i.e. 12 <= q <= 34 and so -24 <= exp <= -2
899 // 0.c(0)c(1)...c(q-1) * 10^11 >= 0xa00000000 <=>
900 // C >= 0xa00000000 * 10^(q-11) where 1 <= q - 11 <= 23
902 tmp64
= 0xa00000000ull
;
903 if (q
- 11 <= 19) { // 1 <= q - 11 <= 19; 10^(q-11) requires 64 bits
904 __mul_64x64_to_128MACH (C
, tmp64
, ten2k64
[q
- 11]);
905 } else { // 20 <= q - 11 <= 23, and 10^(q-11) requires 128 bits
906 __mul_128x64_to_128 (C
, tmp64
, ten2k128
[q
- 31]);
908 if (C1
.w
[1] > C
.w
[1] || (C1
.w
[1] == C
.w
[1] && C1
.w
[0] >= C
.w
[0])) {
910 *pfpsf
|= INVALID_EXCEPTION
;
911 // return Integer Indefinite
915 // else cases that can be rounded to a 32-bit int fall through
916 // to '1 <= q + exp <= 10'
919 // n is not too large to be converted to int32: 0 <= n < 2^31
920 // Note: some of the cases tested for above fall through to this point
921 if ((q
+ exp
) <= 0) {
922 // n = +0.0...c(0)c(1)...c(q-1) or n = +0.c(0)c(1)...c(q-1)
926 } else { // if (1 <= q + exp <= 10, 1 <= q <= 34, -33 <= exp <= 9)
927 // 1 <= x < 2^32 so x can be rounded down to a 32-bit unsigned integer
928 if (exp
< 0) { // 2 <= q <= 34, -33 <= exp <= -1, 1 <= q + exp <= 10
929 ind
= -exp
; // 1 <= ind <= 33; ind is a synonym for 'x'
930 // chop off ind digits from the lower part of C1
931 // C1 = C1 + 1/2 * 10^ind where the result C1 fits in 127 bits
934 C1
.w
[0] = C1
.w
[0] + midpoint64
[ind
- 1];
936 C1
.w
[0] = C1
.w
[0] + midpoint128
[ind
- 20].w
[0];
937 C1
.w
[1] = C1
.w
[1] + midpoint128
[ind
- 20].w
[1];
941 // calculate C* and f*
942 // C* is actually floor(C*) in this case
943 // C* and f* need shifting and masking, as shown by
944 // shiftright128[] and maskhigh128[]
946 // kx = 10^(-x) = ten2mk128[ind - 1]
947 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
948 // the approximation of 10^(-x) was rounded up to 118 bits
949 __mul_128x128_to_256 (P256
, C1
, ten2mk128
[ind
- 1]);
950 if (ind
- 1 <= 21) { // 0 <= ind - 1 <= 21
951 Cstar
.w
[1] = P256
.w
[3];
952 Cstar
.w
[0] = P256
.w
[2];
954 fstar
.w
[2] = P256
.w
[2] & maskhigh128
[ind
- 1];
955 fstar
.w
[1] = P256
.w
[1];
956 fstar
.w
[0] = P256
.w
[0];
957 } else { // 22 <= ind - 1 <= 33
959 Cstar
.w
[0] = P256
.w
[3];
960 fstar
.w
[3] = P256
.w
[3] & maskhigh128
[ind
- 1];
961 fstar
.w
[2] = P256
.w
[2];
962 fstar
.w
[1] = P256
.w
[1];
963 fstar
.w
[0] = P256
.w
[0];
965 // the top Ex bits of 10^(-x) are T* = ten2mk128trunc[ind], e.g.
966 // if x=1, T*=ten2mk128trunc[0]=0x19999999999999999999999999999999
967 // if (0 < f* < 10^(-x)) then the result is a midpoint
968 // if floor(C*) is even then C* = floor(C*) - logical right
969 // shift; C* has p decimal digits, correct by Prop. 1)
970 // else if floor(C*) is odd C* = floor(C*)-1 (logical right
971 // shift; C* has p decimal digits, correct by Pr. 1)
973 // C* = floor(C*) (logical right shift; C has p decimal digits,
974 // correct by Property 1)
977 // shift right C* by Ex-128 = shiftright128[ind]
978 shift
= shiftright128
[ind
- 1]; // 0 <= shift <= 102
979 if (ind
- 1 <= 21) { // 0 <= ind - 1 <= 21
981 (Cstar
.w
[0] >> shift
) | (Cstar
.w
[1] << (64 - shift
));
982 // redundant, it will be 0! Cstar.w[1] = (Cstar.w[1] >> shift);
983 } else { // 22 <= ind - 1 <= 33
984 Cstar
.w
[0] = (Cstar
.w
[0] >> (shift
- 64)); // 2 <= shift - 64 <= 38
986 // determine inexactness of the rounding of C*
987 // if (0 < f* - 1/2 < 10^(-x)) then
988 // the result is exact
989 // else // if (f* - 1/2 > T*) then
990 // the result is inexact
992 if (fstar
.w
[1] > 0x8000000000000000ull
||
993 (fstar
.w
[1] == 0x8000000000000000ull
994 && fstar
.w
[0] > 0x0ull
)) {
995 // f* > 1/2 and the result may be exact
996 tmp64
= fstar
.w
[1] - 0x8000000000000000ull
; // f* - 1/2
997 if (tmp64
> ten2mk128trunc
[ind
- 1].w
[1]
998 || (tmp64
== ten2mk128trunc
[ind
- 1].w
[1]
999 && fstar
.w
[0] >= ten2mk128trunc
[ind
- 1].w
[0])) {
1000 } // else the result is exact
1001 } else { // the result is inexact; f2* <= 1/2
1002 is_inexact_gt_midpoint
= 1;
1004 } else if (ind
- 1 <= 21) { // if 3 <= ind <= 21
1005 if (fstar
.w
[3] > 0x0 ||
1006 (fstar
.w
[3] == 0x0 && fstar
.w
[2] > onehalf128
[ind
- 1]) ||
1007 (fstar
.w
[3] == 0x0 && fstar
.w
[2] == onehalf128
[ind
- 1] &&
1008 (fstar
.w
[1] || fstar
.w
[0]))) {
1009 // f2* > 1/2 and the result may be exact
1010 // Calculate f2* - 1/2
1011 tmp64
= fstar
.w
[2] - onehalf128
[ind
- 1];
1012 tmp64A
= fstar
.w
[3];
1013 if (tmp64
> fstar
.w
[2])
1016 || fstar
.w
[1] > ten2mk128trunc
[ind
- 1].w
[1]
1017 || (fstar
.w
[1] == ten2mk128trunc
[ind
- 1].w
[1]
1018 && fstar
.w
[0] > ten2mk128trunc
[ind
- 1].w
[0])) {
1019 } // else the result is exact
1020 } else { // the result is inexact; f2* <= 1/2
1021 is_inexact_gt_midpoint
= 1;
1023 } else { // if 22 <= ind <= 33
1024 if (fstar
.w
[3] > onehalf128
[ind
- 1] ||
1025 (fstar
.w
[3] == onehalf128
[ind
- 1] &&
1026 (fstar
.w
[2] || fstar
.w
[1] || fstar
.w
[0]))) {
1027 // f2* > 1/2 and the result may be exact
1028 // Calculate f2* - 1/2
1029 tmp64
= fstar
.w
[3] - onehalf128
[ind
- 1];
1030 if (tmp64
|| fstar
.w
[2]
1031 || fstar
.w
[1] > ten2mk128trunc
[ind
- 1].w
[1]
1032 || (fstar
.w
[1] == ten2mk128trunc
[ind
- 1].w
[1]
1033 && fstar
.w
[0] > ten2mk128trunc
[ind
- 1].w
[0])) {
1034 } // else the result is exact
1035 } else { // the result is inexact; f2* <= 1/2
1036 is_inexact_gt_midpoint
= 1;
1040 // if the result was a midpoint it was rounded away from zero, so
1041 // it will need a correction
1042 // check for midpoints
1043 if ((fstar
.w
[3] == 0) && (fstar
.w
[2] == 0)
1044 && (fstar
.w
[1] || fstar
.w
[0])
1045 && (fstar
.w
[1] < ten2mk128trunc
[ind
- 1].w
[1]
1046 || (fstar
.w
[1] == ten2mk128trunc
[ind
- 1].w
[1]
1047 && fstar
.w
[0] <= ten2mk128trunc
[ind
- 1].w
[0]))) {
1048 // the result is a midpoint; round to nearest
1049 if (Cstar
.w
[0] & 0x01) { // Cstar.w[0] is odd; MP in [EVEN, ODD]
1050 // if floor(C*) is odd C = floor(C*) - 1; the result >= 1
1051 Cstar
.w
[0]--; // Cstar.w[0] is now even
1052 is_inexact_gt_midpoint
= 0;
1053 } else { // else MP in [ODD, EVEN]
1054 is_midpoint_lt_even
= 1;
1055 is_inexact_gt_midpoint
= 0;
1058 // general correction for RM
1059 if (is_midpoint_lt_even
|| is_inexact_gt_midpoint
) {
1060 Cstar
.w
[0] = Cstar
.w
[0] - 1;
1062 ; // the result is already correct
1065 } else if (exp
== 0) {
1069 } else { // if (exp > 0) => 1 <= exp <= 9, 1 <= q < 9, 2 <= q + exp <= 10
1070 // res = +C * 10^exp (exact)
1071 res
= C1
.w
[0] * ten2k64
[exp
];
1079 /*****************************************************************************
1080 * BID128_to_uint32_xfloor
1081 ****************************************************************************/
1083 BID128_FUNCTION_ARG1_NORND_CUSTOMRESTYPE (unsigned int,
1084 bid128_to_uint32_xfloor
, x
)
1089 int exp
; // unbiased exponent
1090 // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
1091 UINT64 tmp64
, tmp64A
;
1092 BID_UI64DOUBLE tmp1
;
1093 unsigned int x_nr_bits
;
1096 UINT128 Cstar
; // C* represents up to 34 decimal digits ~ 113 bits
1099 int is_inexact_gt_midpoint
= 0;
1100 int is_midpoint_lt_even
= 0;
1103 x_sign
= x
.w
[1] & MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
1104 x_exp
= x
.w
[1] & MASK_EXP
; // biased and shifted left 49 bit positions
1105 C1
.w
[1] = x
.w
[1] & MASK_COEFF
;
1108 // check for NaN or Infinity
1109 if ((x
.w
[1] & MASK_SPECIAL
) == MASK_SPECIAL
) {
1111 if ((x
.w
[1] & MASK_NAN
) == MASK_NAN
) { // x is NAN
1112 if ((x
.w
[1] & MASK_SNAN
) == MASK_SNAN
) { // x is SNAN
1114 *pfpsf
|= INVALID_EXCEPTION
;
1115 // return Integer Indefinite
1117 } else { // x is QNaN
1119 *pfpsf
|= INVALID_EXCEPTION
;
1120 // return Integer Indefinite
1124 } else { // x is not a NaN, so it must be infinity
1125 if (!x_sign
) { // x is +inf
1127 *pfpsf
|= INVALID_EXCEPTION
;
1128 // return Integer Indefinite
1130 } else { // x is -inf
1132 *pfpsf
|= INVALID_EXCEPTION
;
1133 // return Integer Indefinite
1139 // check for non-canonical values (after the check for special values)
1140 if ((C1
.w
[1] > 0x0001ed09bead87c0ull
)
1141 || (C1
.w
[1] == 0x0001ed09bead87c0ull
1142 && (C1
.w
[0] > 0x378d8e63ffffffffull
))
1143 || ((x
.w
[1] & 0x6000000000000000ull
) == 0x6000000000000000ull
)) {
1146 } else if ((C1
.w
[1] == 0x0ull
) && (C1
.w
[0] == 0x0ull
)) {
1150 } else { // x is not special and is not zero
1154 *pfpsf
|= INVALID_EXCEPTION
;
1155 // return Integer Indefinite
1159 // x > 0 from this point on
1160 // q = nr. of decimal digits in x
1161 // determine first the nr. of bits in x
1163 if (C1
.w
[0] >= 0x0020000000000000ull
) { // x >= 2^53
1164 // split the 64-bit value in two 32-bit halves to avoid rounding errors
1165 if (C1
.w
[0] >= 0x0000000100000000ull
) { // x >= 2^32
1166 tmp1
.d
= (double) (C1
.w
[0] >> 32); // exact conversion
1168 33 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1169 } else { // x < 2^32
1170 tmp1
.d
= (double) (C1
.w
[0]); // exact conversion
1172 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1174 } else { // if x < 2^53
1175 tmp1
.d
= (double) C1
.w
[0]; // exact conversion
1177 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1179 } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
1180 tmp1
.d
= (double) C1
.w
[1]; // exact conversion
1182 65 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1184 q
= nr_digits
[x_nr_bits
- 1].digits
;
1186 q
= nr_digits
[x_nr_bits
- 1].digits1
;
1187 if (C1
.w
[1] > nr_digits
[x_nr_bits
- 1].threshold_hi
1188 || (C1
.w
[1] == nr_digits
[x_nr_bits
- 1].threshold_hi
1189 && C1
.w
[0] >= nr_digits
[x_nr_bits
- 1].threshold_lo
))
1192 exp
= (x_exp
>> 49) - 6176;
1193 if ((q
+ exp
) > 10) { // x >= 10^10 ~= 2^33.2... (cannot fit in 32 bits)
1195 *pfpsf
|= INVALID_EXCEPTION
;
1196 // return Integer Indefinite
1199 } else if ((q
+ exp
) == 10) { // x = c(0)c(1)...c(9).c(10)...c(q-1)
1200 // in this case 2^29.89... ~= 10^9 <= x < 10^10 ~= 2^33.2...
1201 // so x rounded to an integer may or may not fit in a signed 32-bit int
1202 // the cases that do not fit are identified here; the ones that fit
1203 // fall through and will be handled with other cases further,
1204 // under '1 <= q + exp <= 10'
1205 // n > 0 and q + exp = 10
1206 // if n >= 2^32 then n is too large
1207 // too large if c(0)c(1)...c(9).c(10)...c(q-1) >= 2^32
1208 // too large if 0.c(0)c(1)...c(q-1) * 10^11 >= 0xa00000000, 1<=q<=34
1210 tmp64
= C1
.w
[0] * ten2k64
[11 - q
]; // C scaled up to 11-digit int
1211 // c(0)c(1)...c(9)c(10) or c(0)c(1)...c(q-1)0...0 (11 digits)
1212 if (tmp64
>= 0xa00000000ull
) {
1214 *pfpsf
|= INVALID_EXCEPTION
;
1215 // return Integer Indefinite
1219 // else cases that can be rounded to a 32-bit int fall through
1220 // to '1 <= q + exp <= 10'
1221 } else { // if (q > 11), i.e. 12 <= q <= 34 and so -24 <= exp <= -2
1222 // 0.c(0)c(1)...c(q-1) * 10^11 >= 0xa00000000 <=>
1223 // C >= 0xa00000000 * 10^(q-11) where 1 <= q - 11 <= 23
1225 tmp64
= 0xa00000000ull
;
1226 if (q
- 11 <= 19) { // 1 <= q - 11 <= 19; 10^(q-11) requires 64 bits
1227 __mul_64x64_to_128MACH (C
, tmp64
, ten2k64
[q
- 11]);
1228 } else { // 20 <= q - 11 <= 23, and 10^(q-11) requires 128 bits
1229 __mul_128x64_to_128 (C
, tmp64
, ten2k128
[q
- 31]);
1231 if (C1
.w
[1] > C
.w
[1] || (C1
.w
[1] == C
.w
[1] && C1
.w
[0] >= C
.w
[0])) {
1233 *pfpsf
|= INVALID_EXCEPTION
;
1234 // return Integer Indefinite
1238 // else cases that can be rounded to a 32-bit int fall through
1239 // to '1 <= q + exp <= 10'
1242 // n is not too large to be converted to int32: 0 <= n < 2^31
1243 // Note: some of the cases tested for above fall through to this point
1244 if ((q
+ exp
) <= 0) {
1245 // n = +0.0...c(0)c(1)...c(q-1) or n = +0.c(0)c(1)...c(q-1)
1247 *pfpsf
|= INEXACT_EXCEPTION
;
1251 } else { // if (1 <= q + exp <= 10, 1 <= q <= 34, -33 <= exp <= 9)
1252 // 1 <= x < 2^32 so x can be rounded down to a 32-bit unsigned integer
1253 if (exp
< 0) { // 2 <= q <= 34, -33 <= exp <= -1, 1 <= q + exp <= 10
1254 ind
= -exp
; // 1 <= ind <= 33; ind is a synonym for 'x'
1255 // chop off ind digits from the lower part of C1
1256 // C1 = C1 + 1/2 * 10^ind where the result C1 fits in 127 bits
1259 C1
.w
[0] = C1
.w
[0] + midpoint64
[ind
- 1];
1261 C1
.w
[0] = C1
.w
[0] + midpoint128
[ind
- 20].w
[0];
1262 C1
.w
[1] = C1
.w
[1] + midpoint128
[ind
- 20].w
[1];
1264 if (C1
.w
[0] < tmp64
)
1266 // calculate C* and f*
1267 // C* is actually floor(C*) in this case
1268 // C* and f* need shifting and masking, as shown by
1269 // shiftright128[] and maskhigh128[]
1271 // kx = 10^(-x) = ten2mk128[ind - 1]
1272 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
1273 // the approximation of 10^(-x) was rounded up to 118 bits
1274 __mul_128x128_to_256 (P256
, C1
, ten2mk128
[ind
- 1]);
1275 if (ind
- 1 <= 21) { // 0 <= ind - 1 <= 21
1276 Cstar
.w
[1] = P256
.w
[3];
1277 Cstar
.w
[0] = P256
.w
[2];
1279 fstar
.w
[2] = P256
.w
[2] & maskhigh128
[ind
- 1];
1280 fstar
.w
[1] = P256
.w
[1];
1281 fstar
.w
[0] = P256
.w
[0];
1282 } else { // 22 <= ind - 1 <= 33
1284 Cstar
.w
[0] = P256
.w
[3];
1285 fstar
.w
[3] = P256
.w
[3] & maskhigh128
[ind
- 1];
1286 fstar
.w
[2] = P256
.w
[2];
1287 fstar
.w
[1] = P256
.w
[1];
1288 fstar
.w
[0] = P256
.w
[0];
1290 // the top Ex bits of 10^(-x) are T* = ten2mk128trunc[ind], e.g.
1291 // if x=1, T*=ten2mk128trunc[0]=0x19999999999999999999999999999999
1292 // if (0 < f* < 10^(-x)) then the result is a midpoint
1293 // if floor(C*) is even then C* = floor(C*) - logical right
1294 // shift; C* has p decimal digits, correct by Prop. 1)
1295 // else if floor(C*) is odd C* = floor(C*)-1 (logical right
1296 // shift; C* has p decimal digits, correct by Pr. 1)
1298 // C* = floor(C*) (logical right shift; C has p decimal digits,
1299 // correct by Property 1)
1300 // n = C* * 10^(e+x)
1302 // shift right C* by Ex-128 = shiftright128[ind]
1303 shift
= shiftright128
[ind
- 1]; // 0 <= shift <= 102
1304 if (ind
- 1 <= 21) { // 0 <= ind - 1 <= 21
1306 (Cstar
.w
[0] >> shift
) | (Cstar
.w
[1] << (64 - shift
));
1307 // redundant, it will be 0! Cstar.w[1] = (Cstar.w[1] >> shift);
1308 } else { // 22 <= ind - 1 <= 33
1309 Cstar
.w
[0] = (Cstar
.w
[0] >> (shift
- 64)); // 2 <= shift - 64 <= 38
1311 // determine inexactness of the rounding of C*
1312 // if (0 < f* - 1/2 < 10^(-x)) then
1313 // the result is exact
1314 // else // if (f* - 1/2 > T*) then
1315 // the result is inexact
1317 if (fstar
.w
[1] > 0x8000000000000000ull
||
1318 (fstar
.w
[1] == 0x8000000000000000ull
1319 && fstar
.w
[0] > 0x0ull
)) {
1320 // f* > 1/2 and the result may be exact
1321 tmp64
= fstar
.w
[1] - 0x8000000000000000ull
; // f* - 1/2
1322 if (tmp64
> ten2mk128trunc
[ind
- 1].w
[1]
1323 || (tmp64
== ten2mk128trunc
[ind
- 1].w
[1]
1324 && fstar
.w
[0] >= ten2mk128trunc
[ind
- 1].w
[0])) {
1325 // set the inexact flag
1326 *pfpsf
|= INEXACT_EXCEPTION
;
1327 } // else the result is exact
1328 } else { // the result is inexact; f2* <= 1/2
1329 // set the inexact flag
1330 *pfpsf
|= INEXACT_EXCEPTION
;
1331 is_inexact_gt_midpoint
= 1;
1333 } else if (ind
- 1 <= 21) { // if 3 <= ind <= 21
1334 if (fstar
.w
[3] > 0x0 ||
1335 (fstar
.w
[3] == 0x0 && fstar
.w
[2] > onehalf128
[ind
- 1]) ||
1336 (fstar
.w
[3] == 0x0 && fstar
.w
[2] == onehalf128
[ind
- 1] &&
1337 (fstar
.w
[1] || fstar
.w
[0]))) {
1338 // f2* > 1/2 and the result may be exact
1339 // Calculate f2* - 1/2
1340 tmp64
= fstar
.w
[2] - onehalf128
[ind
- 1];
1341 tmp64A
= fstar
.w
[3];
1342 if (tmp64
> fstar
.w
[2])
1345 || fstar
.w
[1] > ten2mk128trunc
[ind
- 1].w
[1]
1346 || (fstar
.w
[1] == ten2mk128trunc
[ind
- 1].w
[1]
1347 && fstar
.w
[0] > ten2mk128trunc
[ind
- 1].w
[0])) {
1348 // set the inexact flag
1349 *pfpsf
|= INEXACT_EXCEPTION
;
1350 } // else the result is exact
1351 } else { // the result is inexact; f2* <= 1/2
1352 // set the inexact flag
1353 *pfpsf
|= INEXACT_EXCEPTION
;
1354 is_inexact_gt_midpoint
= 1;
1356 } else { // if 22 <= ind <= 33
1357 if (fstar
.w
[3] > onehalf128
[ind
- 1] ||
1358 (fstar
.w
[3] == onehalf128
[ind
- 1] &&
1359 (fstar
.w
[2] || fstar
.w
[1] || fstar
.w
[0]))) {
1360 // f2* > 1/2 and the result may be exact
1361 // Calculate f2* - 1/2
1362 tmp64
= fstar
.w
[3] - onehalf128
[ind
- 1];
1363 if (tmp64
|| fstar
.w
[2]
1364 || fstar
.w
[1] > ten2mk128trunc
[ind
- 1].w
[1]
1365 || (fstar
.w
[1] == ten2mk128trunc
[ind
- 1].w
[1]
1366 && fstar
.w
[0] > ten2mk128trunc
[ind
- 1].w
[0])) {
1367 // set the inexact flag
1368 *pfpsf
|= INEXACT_EXCEPTION
;
1369 } // else the result is exact
1370 } else { // the result is inexact; f2* <= 1/2
1371 // set the inexact flag
1372 *pfpsf
|= INEXACT_EXCEPTION
;
1373 is_inexact_gt_midpoint
= 1;
1377 // if the result was a midpoint it was rounded away from zero, so
1378 // it will need a correction
1379 // check for midpoints
1380 if ((fstar
.w
[3] == 0) && (fstar
.w
[2] == 0)
1381 && (fstar
.w
[1] || fstar
.w
[0])
1382 && (fstar
.w
[1] < ten2mk128trunc
[ind
- 1].w
[1]
1383 || (fstar
.w
[1] == ten2mk128trunc
[ind
- 1].w
[1]
1384 && fstar
.w
[0] <= ten2mk128trunc
[ind
- 1].w
[0]))) {
1385 // the result is a midpoint; round to nearest
1386 if (Cstar
.w
[0] & 0x01) { // Cstar.w[0] is odd; MP in [EVEN, ODD]
1387 // if floor(C*) is odd C = floor(C*) - 1; the result >= 1
1388 Cstar
.w
[0]--; // Cstar.w[0] is now even
1389 is_inexact_gt_midpoint
= 0;
1390 } else { // else MP in [ODD, EVEN]
1391 is_midpoint_lt_even
= 1;
1392 is_inexact_gt_midpoint
= 0;
1395 // general correction for RM
1396 if (is_midpoint_lt_even
|| is_inexact_gt_midpoint
) {
1397 Cstar
.w
[0] = Cstar
.w
[0] - 1;
1399 ; // the result is already correct
1402 } else if (exp
== 0) {
1406 } else { // if (exp > 0) => 1 <= exp <= 9, 1 <= q < 9, 2 <= q + exp <= 10
1407 // res = +C * 10^exp (exact)
1408 res
= C1
.w
[0] * ten2k64
[exp
];
1416 /*****************************************************************************
1417 * BID128_to_uint32_ceil
1418 ****************************************************************************/
1420 BID128_FUNCTION_ARG1_NORND_CUSTOMRESTYPE (unsigned int,
1421 bid128_to_uint32_ceil
, x
)
1426 int exp
; // unbiased exponent
1427 // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
1428 UINT64 tmp64
, tmp64A
;
1429 BID_UI64DOUBLE tmp1
;
1430 unsigned int x_nr_bits
;
1433 UINT128 Cstar
; // C* represents up to 34 decimal digits ~ 113 bits
1436 int is_inexact_lt_midpoint
= 0;
1437 int is_midpoint_gt_even
= 0;
1440 x_sign
= x
.w
[1] & MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
1441 x_exp
= x
.w
[1] & MASK_EXP
; // biased and shifted left 49 bit positions
1442 C1
.w
[1] = x
.w
[1] & MASK_COEFF
;
1445 // check for NaN or Infinity
1446 if ((x
.w
[1] & MASK_SPECIAL
) == MASK_SPECIAL
) {
1448 if ((x
.w
[1] & MASK_NAN
) == MASK_NAN
) { // x is NAN
1449 if ((x
.w
[1] & MASK_SNAN
) == MASK_SNAN
) { // x is SNAN
1451 *pfpsf
|= INVALID_EXCEPTION
;
1452 // return Integer Indefinite
1454 } else { // x is QNaN
1456 *pfpsf
|= INVALID_EXCEPTION
;
1457 // return Integer Indefinite
1461 } else { // x is not a NaN, so it must be infinity
1462 if (!x_sign
) { // x is +inf
1464 *pfpsf
|= INVALID_EXCEPTION
;
1465 // return Integer Indefinite
1467 } else { // x is -inf
1469 *pfpsf
|= INVALID_EXCEPTION
;
1470 // return Integer Indefinite
1476 // check for non-canonical values (after the check for special values)
1477 if ((C1
.w
[1] > 0x0001ed09bead87c0ull
)
1478 || (C1
.w
[1] == 0x0001ed09bead87c0ull
1479 && (C1
.w
[0] > 0x378d8e63ffffffffull
))
1480 || ((x
.w
[1] & 0x6000000000000000ull
) == 0x6000000000000000ull
)) {
1483 } else if ((C1
.w
[1] == 0x0ull
) && (C1
.w
[0] == 0x0ull
)) {
1487 } else { // x is not special and is not zero
1489 // q = nr. of decimal digits in x
1490 // determine first the nr. of bits in x
1492 if (C1
.w
[0] >= 0x0020000000000000ull
) { // x >= 2^53
1493 // split the 64-bit value in two 32-bit halves to avoid rounding errors
1494 if (C1
.w
[0] >= 0x0000000100000000ull
) { // x >= 2^32
1495 tmp1
.d
= (double) (C1
.w
[0] >> 32); // exact conversion
1497 33 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1498 } else { // x < 2^32
1499 tmp1
.d
= (double) (C1
.w
[0]); // exact conversion
1501 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1503 } else { // if x < 2^53
1504 tmp1
.d
= (double) C1
.w
[0]; // exact conversion
1506 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1508 } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
1509 tmp1
.d
= (double) C1
.w
[1]; // exact conversion
1511 65 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1513 q
= nr_digits
[x_nr_bits
- 1].digits
;
1515 q
= nr_digits
[x_nr_bits
- 1].digits1
;
1516 if (C1
.w
[1] > nr_digits
[x_nr_bits
- 1].threshold_hi
1517 || (C1
.w
[1] == nr_digits
[x_nr_bits
- 1].threshold_hi
1518 && C1
.w
[0] >= nr_digits
[x_nr_bits
- 1].threshold_lo
))
1521 exp
= (x_exp
>> 49) - 6176;
1523 if ((q
+ exp
) > 10) { // x >= 10^10 ~= 2^33.2... (cannot fit in 32 bits)
1525 *pfpsf
|= INVALID_EXCEPTION
;
1526 // return Integer Indefinite
1529 } else if ((q
+ exp
) == 10) { // x = c(0)c(1)...c(9).c(10)...c(q-1)
1530 // in this case 2^29.89... ~= 10^9 <= x < 10^10 ~= 2^33.2...
1531 // so x rounded to an integer may or may not fit in a signed 32-bit int
1532 // the cases that do not fit are identified here; the ones that fit
1533 // fall through and will be handled with other cases further,
1534 // under '1 <= q + exp <= 10'
1535 if (x_sign
) { // if n < 0 and q + exp = 10
1536 // if n <= -1 then n is too large
1537 // too large if c(0)c(1)...c(9).c(10)...c(q-1) >= 1
1538 // <=> 0.c(0)c(1)...c(q-1) * 10^11 >= 0x50000000a, 1<=q<=34
1540 tmp64
= C1
.w
[0] * ten2k64
[11 - q
]; // C scaled up to 11-digit int
1541 // c(0)c(1)...c(9)c(10) or c(0)c(1)...c(q-1)0...0 (11 digits)
1542 if (tmp64
>= 0x0aull
) {
1544 *pfpsf
|= INVALID_EXCEPTION
;
1545 // return Integer Indefinite
1549 // else cases that can be rounded to a 32-bit int fall through
1550 // to '1 <= q + exp <= 10'
1551 } else { // if (q > 11), i.e. 12 <= q <= 34 and so -24 <= exp <= -2
1552 // 0.c(0)c(1)...c(q-1) * 10^11 >= 0x0a <=>
1553 // C >= 0x0a * 10^(q-11) where 1 <= q - 11 <= 23
1556 if (q
- 11 <= 19) { // 1 <= q - 11 <= 19; 10^(q-11) requires 64 bits
1557 __mul_64x64_to_128MACH (C
, tmp64
, ten2k64
[q
- 11]);
1558 } else { // 20 <= q - 11 <= 23, and 10^(q-11) requires 128 bits
1559 __mul_128x64_to_128 (C
, tmp64
, ten2k128
[q
- 31]);
1561 if (C1
.w
[1] > C
.w
[1]
1562 || (C1
.w
[1] == C
.w
[1] && C1
.w
[0] >= C
.w
[0])) {
1564 *pfpsf
|= INVALID_EXCEPTION
;
1565 // return Integer Indefinite
1569 // else cases that can be rounded to a 32-bit int fall through
1570 // to '1 <= q + exp <= 10'
1572 } else { // if n > 0 and q + exp = 10
1573 // if n > 2^32 - 1 then n is too large
1574 // too large if c(0)c(1)...c(9).c(10)...c(q-1) > 2^32 - 1
1575 // too large if 0.c(0)c(1)...c(q-1) * 10^11 > 0x9fffffff6, 1<=q<=34
1577 tmp64
= C1
.w
[0] * ten2k64
[11 - q
]; // C scaled up to 11-digit int
1578 // c(0)c(1)...c(9)c(10) or c(0)c(1)...c(q-1)0...0 (11 digits)
1579 if (tmp64
> 0x9fffffff6ull
) {
1581 *pfpsf
|= INVALID_EXCEPTION
;
1582 // return Integer Indefinite
1586 // else cases that can be rounded to a 32-bit int fall through
1587 // to '1 <= q + exp <= 10'
1588 } else { // if (q > 11), i.e. 12 <= q <= 34 and so -24 <= exp <= -2
1589 // 0.c(0)c(1)...c(q-1) * 10^11 > 0x9fffffff6 <=>
1590 // C > 0x9fffffff6 * 10^(q-11) where 1 <= q - 11 <= 23
1592 tmp64
= 0x9fffffff6ull
;
1593 if (q
- 11 <= 19) { // 1 <= q - 11 <= 19; 10^(q-11) requires 64 bits
1594 __mul_64x64_to_128MACH (C
, tmp64
, ten2k64
[q
- 11]);
1595 } else { // 20 <= q - 11 <= 23, and 10^(q-11) requires 128 bits
1596 __mul_128x64_to_128 (C
, tmp64
, ten2k128
[q
- 31]);
1598 if (C1
.w
[1] > C
.w
[1] || (C1
.w
[1] == C
.w
[1] && C1
.w
[0] > C
.w
[0])) {
1600 *pfpsf
|= INVALID_EXCEPTION
;
1601 // return Integer Indefinite
1605 // else cases that can be rounded to a 32-bit int fall through
1606 // to '1 <= q + exp <= 10'
1610 // n is not too large to be converted to int32: -2^32-1 < n <= 2^32-1
1611 // Note: some of the cases tested for above fall through to this point
1612 if ((q
+ exp
) <= 0) {
1613 // n = +/-0.0...c(0)c(1)...c(q-1) or n = +/-0.c(0)c(1)...c(q-1)
1620 } else { // if (1 <= q + exp <= 10, 1 <= q <= 34, -33 <= exp <= 9)
1621 // -2^32-1 < x <= -1 or 1 <= x <= 2^32-1 so x can be rounded
1622 // toward positive infinity to a 32-bit signed integer
1623 if (x_sign
) { // x <= -1 is invalid
1625 *pfpsf
|= INVALID_EXCEPTION
;
1626 // return Integer Indefinite
1630 // x > 0 from this point on
1631 if (exp
< 0) { // 2 <= q <= 34, -33 <= exp <= -1, 1 <= q + exp <= 10
1632 ind
= -exp
; // 1 <= ind <= 33; ind is a synonym for 'x'
1633 // chop off ind digits from the lower part of C1
1634 // C1 = C1 + 1/2 * 10^ind where the result C1 fits in 127 bits
1637 C1
.w
[0] = C1
.w
[0] + midpoint64
[ind
- 1];
1639 C1
.w
[0] = C1
.w
[0] + midpoint128
[ind
- 20].w
[0];
1640 C1
.w
[1] = C1
.w
[1] + midpoint128
[ind
- 20].w
[1];
1642 if (C1
.w
[0] < tmp64
)
1644 // calculate C* and f*
1645 // C* is actually floor(C*) in this case
1646 // C* and f* need shifting and masking, as shown by
1647 // shiftright128[] and maskhigh128[]
1649 // kx = 10^(-x) = ten2mk128[ind - 1]
1650 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
1651 // the approximation of 10^(-x) was rounded up to 118 bits
1652 __mul_128x128_to_256 (P256
, C1
, ten2mk128
[ind
- 1]);
1653 if (ind
- 1 <= 21) { // 0 <= ind - 1 <= 21
1654 Cstar
.w
[1] = P256
.w
[3];
1655 Cstar
.w
[0] = P256
.w
[2];
1657 fstar
.w
[2] = P256
.w
[2] & maskhigh128
[ind
- 1];
1658 fstar
.w
[1] = P256
.w
[1];
1659 fstar
.w
[0] = P256
.w
[0];
1660 } else { // 22 <= ind - 1 <= 33
1662 Cstar
.w
[0] = P256
.w
[3];
1663 fstar
.w
[3] = P256
.w
[3] & maskhigh128
[ind
- 1];
1664 fstar
.w
[2] = P256
.w
[2];
1665 fstar
.w
[1] = P256
.w
[1];
1666 fstar
.w
[0] = P256
.w
[0];
1668 // the top Ex bits of 10^(-x) are T* = ten2mk128trunc[ind], e.g.
1669 // if x=1, T*=ten2mk128trunc[0]=0x19999999999999999999999999999999
1670 // if (0 < f* < 10^(-x)) then the result is a midpoint
1671 // if floor(C*) is even then C* = floor(C*) - logical right
1672 // shift; C* has p decimal digits, correct by Prop. 1)
1673 // else if floor(C*) is odd C* = floor(C*)-1 (logical right
1674 // shift; C* has p decimal digits, correct by Pr. 1)
1676 // C* = floor(C*) (logical right shift; C has p decimal digits,
1677 // correct by Property 1)
1678 // n = C* * 10^(e+x)
1680 // shift right C* by Ex-128 = shiftright128[ind]
1681 shift
= shiftright128
[ind
- 1]; // 0 <= shift <= 102
1682 if (ind
- 1 <= 21) { // 0 <= ind - 1 <= 21
1684 (Cstar
.w
[0] >> shift
) | (Cstar
.w
[1] << (64 - shift
));
1685 // redundant, it will be 0! Cstar.w[1] = (Cstar.w[1] >> shift);
1686 } else { // 22 <= ind - 1 <= 33
1687 Cstar
.w
[0] = (Cstar
.w
[0] >> (shift
- 64)); // 2 <= shift - 64 <= 38
1689 // determine inexactness of the rounding of C*
1690 // if (0 < f* - 1/2 < 10^(-x)) then
1691 // the result is exact
1692 // else // if (f* - 1/2 > T*) then
1693 // the result is inexact
1695 if (fstar
.w
[1] > 0x8000000000000000ull
||
1696 (fstar
.w
[1] == 0x8000000000000000ull
1697 && fstar
.w
[0] > 0x0ull
)) {
1698 // f* > 1/2 and the result may be exact
1699 tmp64
= fstar
.w
[1] - 0x8000000000000000ull
; // f* - 1/2
1700 if (tmp64
> ten2mk128trunc
[ind
- 1].w
[1]
1701 || (tmp64
== ten2mk128trunc
[ind
- 1].w
[1]
1702 && fstar
.w
[0] >= ten2mk128trunc
[ind
- 1].w
[0])) {
1703 is_inexact_lt_midpoint
= 1;
1704 } // else the result is exact
1705 } else { // the result is inexact; f2* <= 1/2
1708 } else if (ind
- 1 <= 21) { // if 3 <= ind <= 21
1709 if (fstar
.w
[3] > 0x0 ||
1710 (fstar
.w
[3] == 0x0 && fstar
.w
[2] > onehalf128
[ind
- 1]) ||
1711 (fstar
.w
[3] == 0x0 && fstar
.w
[2] == onehalf128
[ind
- 1] &&
1712 (fstar
.w
[1] || fstar
.w
[0]))) {
1713 // f2* > 1/2 and the result may be exact
1714 // Calculate f2* - 1/2
1715 tmp64
= fstar
.w
[2] - onehalf128
[ind
- 1];
1716 tmp64A
= fstar
.w
[3];
1717 if (tmp64
> fstar
.w
[2])
1720 || fstar
.w
[1] > ten2mk128trunc
[ind
- 1].w
[1]
1721 || (fstar
.w
[1] == ten2mk128trunc
[ind
- 1].w
[1]
1722 && fstar
.w
[0] > ten2mk128trunc
[ind
- 1].w
[0])) {
1723 is_inexact_lt_midpoint
= 1;
1724 } // else the result is exact
1725 } else { // the result is inexact; f2* <= 1/2
1727 } else { // if 22 <= ind <= 33
1728 if (fstar
.w
[3] > onehalf128
[ind
- 1] ||
1729 (fstar
.w
[3] == onehalf128
[ind
- 1] &&
1730 (fstar
.w
[2] || fstar
.w
[1] || fstar
.w
[0]))) {
1731 // f2* > 1/2 and the result may be exact
1732 // Calculate f2* - 1/2
1733 tmp64
= fstar
.w
[3] - onehalf128
[ind
- 1];
1734 if (tmp64
|| fstar
.w
[2]
1735 || fstar
.w
[1] > ten2mk128trunc
[ind
- 1].w
[1]
1736 || (fstar
.w
[1] == ten2mk128trunc
[ind
- 1].w
[1]
1737 && fstar
.w
[0] > ten2mk128trunc
[ind
- 1].w
[0])) {
1738 is_inexact_lt_midpoint
= 1;
1739 } // else the result is exact
1740 } else { // the result is inexact; f2* <= 1/2
1745 // if the result was a midpoint it was rounded away from zero, so
1746 // it will need a correction
1747 // check for midpoints
1748 if ((fstar
.w
[3] == 0) && (fstar
.w
[2] == 0)
1749 && (fstar
.w
[1] || fstar
.w
[0])
1750 && (fstar
.w
[1] < ten2mk128trunc
[ind
- 1].w
[1]
1751 || (fstar
.w
[1] == ten2mk128trunc
[ind
- 1].w
[1]
1752 && fstar
.w
[0] <= ten2mk128trunc
[ind
- 1].w
[0]))) {
1753 // the result is a midpoint; round to nearest
1754 if (Cstar
.w
[0] & 0x01) { // Cstar.w[0] is odd; MP in [EVEN, ODD]
1755 // if floor(C*) is odd C = floor(C*) - 1; the result >= 1
1756 Cstar
.w
[0]--; // Cstar.w[0] is now even
1757 is_midpoint_gt_even
= 1;
1758 is_inexact_lt_midpoint
= 0;
1759 } else { // else MP in [ODD, EVEN]
1760 is_inexact_lt_midpoint
= 0;
1763 // general correction for RM
1764 if (is_midpoint_gt_even
|| is_inexact_lt_midpoint
) {
1765 Cstar
.w
[0] = Cstar
.w
[0] + 1;
1767 ; // the result is already correct
1770 } else if (exp
== 0) {
1774 } else { // if (exp > 0) => 1 <= exp <= 9, 1 <= q < 9, 2 <= q + exp <= 10
1775 // res = +C * 10^exp (exact)
1776 res
= C1
.w
[0] * ten2k64
[exp
];
1784 /*****************************************************************************
1785 * BID128_to_uint32_xceil
1786 ****************************************************************************/
1788 BID128_FUNCTION_ARG1_NORND_CUSTOMRESTYPE (unsigned int,
1789 bid128_to_uint32_xceil
, x
)
1794 int exp
; // unbiased exponent
1795 // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
1796 UINT64 tmp64
, tmp64A
;
1797 BID_UI64DOUBLE tmp1
;
1798 unsigned int x_nr_bits
;
1801 UINT128 Cstar
; // C* represents up to 34 decimal digits ~ 113 bits
1804 int is_inexact_lt_midpoint
= 0;
1805 int is_midpoint_gt_even
= 0;
1808 x_sign
= x
.w
[1] & MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
1809 x_exp
= x
.w
[1] & MASK_EXP
; // biased and shifted left 49 bit positions
1810 C1
.w
[1] = x
.w
[1] & MASK_COEFF
;
1813 // check for NaN or Infinity
1814 if ((x
.w
[1] & MASK_SPECIAL
) == MASK_SPECIAL
) {
1816 if ((x
.w
[1] & MASK_NAN
) == MASK_NAN
) { // x is NAN
1817 if ((x
.w
[1] & MASK_SNAN
) == MASK_SNAN
) { // x is SNAN
1819 *pfpsf
|= INVALID_EXCEPTION
;
1820 // return Integer Indefinite
1822 } else { // x is QNaN
1824 *pfpsf
|= INVALID_EXCEPTION
;
1825 // return Integer Indefinite
1829 } else { // x is not a NaN, so it must be infinity
1830 if (!x_sign
) { // x is +inf
1832 *pfpsf
|= INVALID_EXCEPTION
;
1833 // return Integer Indefinite
1835 } else { // x is -inf
1837 *pfpsf
|= INVALID_EXCEPTION
;
1838 // return Integer Indefinite
1844 // check for non-canonical values (after the check for special values)
1845 if ((C1
.w
[1] > 0x0001ed09bead87c0ull
)
1846 || (C1
.w
[1] == 0x0001ed09bead87c0ull
1847 && (C1
.w
[0] > 0x378d8e63ffffffffull
))
1848 || ((x
.w
[1] & 0x6000000000000000ull
) == 0x6000000000000000ull
)) {
1851 } else if ((C1
.w
[1] == 0x0ull
) && (C1
.w
[0] == 0x0ull
)) {
1855 } else { // x is not special and is not zero
1857 // q = nr. of decimal digits in x
1858 // determine first the nr. of bits in x
1860 if (C1
.w
[0] >= 0x0020000000000000ull
) { // x >= 2^53
1861 // split the 64-bit value in two 32-bit halves to avoid rounding errors
1862 if (C1
.w
[0] >= 0x0000000100000000ull
) { // x >= 2^32
1863 tmp1
.d
= (double) (C1
.w
[0] >> 32); // exact conversion
1865 33 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1866 } else { // x < 2^32
1867 tmp1
.d
= (double) (C1
.w
[0]); // exact conversion
1869 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1871 } else { // if x < 2^53
1872 tmp1
.d
= (double) C1
.w
[0]; // exact conversion
1874 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1876 } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
1877 tmp1
.d
= (double) C1
.w
[1]; // exact conversion
1879 65 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
1881 q
= nr_digits
[x_nr_bits
- 1].digits
;
1883 q
= nr_digits
[x_nr_bits
- 1].digits1
;
1884 if (C1
.w
[1] > nr_digits
[x_nr_bits
- 1].threshold_hi
1885 || (C1
.w
[1] == nr_digits
[x_nr_bits
- 1].threshold_hi
1886 && C1
.w
[0] >= nr_digits
[x_nr_bits
- 1].threshold_lo
))
1889 exp
= (x_exp
>> 49) - 6176;
1890 if ((q
+ exp
) > 10) { // x >= 10^10 ~= 2^33.2... (cannot fit in 32 bits)
1892 *pfpsf
|= INVALID_EXCEPTION
;
1893 // return Integer Indefinite
1896 } else if ((q
+ exp
) == 10) { // x = c(0)c(1)...c(9).c(10)...c(q-1)
1897 // in this case 2^29.89... ~= 10^9 <= x < 10^10 ~= 2^33.2...
1898 // so x rounded to an integer may or may not fit in a signed 32-bit int
1899 // the cases that do not fit are identified here; the ones that fit
1900 // fall through and will be handled with other cases further,
1901 // under '1 <= q + exp <= 10'
1902 if (x_sign
) { // if n < 0 and q + exp = 10
1903 // if n <= -1 then n is too large
1904 // too large if c(0)c(1)...c(9).c(10)...c(q-1) >= 1
1905 // <=> 0.c(0)c(1)...c(q-1) * 10^11 >= 0x50000000a, 1<=q<=34
1907 tmp64
= C1
.w
[0] * ten2k64
[11 - q
]; // C scaled up to 11-digit int
1908 // c(0)c(1)...c(9)c(10) or c(0)c(1)...c(q-1)0...0 (11 digits)
1909 if (tmp64
>= 0x0aull
) {
1911 *pfpsf
|= INVALID_EXCEPTION
;
1912 // return Integer Indefinite
1916 // else cases that can be rounded to a 32-bit int fall through
1917 // to '1 <= q + exp <= 10'
1918 } else { // if (q > 11), i.e. 12 <= q <= 34 and so -24 <= exp <= -2
1919 // 0.c(0)c(1)...c(q-1) * 10^11 >= 0x0a <=>
1920 // C >= 0x0a * 10^(q-11) where 1 <= q - 11 <= 23
1923 if (q
- 11 <= 19) { // 1 <= q - 11 <= 19; 10^(q-11) requires 64 bits
1924 __mul_64x64_to_128MACH (C
, tmp64
, ten2k64
[q
- 11]);
1925 } else { // 20 <= q - 11 <= 23, and 10^(q-11) requires 128 bits
1926 __mul_128x64_to_128 (C
, tmp64
, ten2k128
[q
- 31]);
1928 if (C1
.w
[1] > C
.w
[1]
1929 || (C1
.w
[1] == C
.w
[1] && C1
.w
[0] >= C
.w
[0])) {
1931 *pfpsf
|= INVALID_EXCEPTION
;
1932 // return Integer Indefinite
1936 // else cases that can be rounded to a 32-bit int fall through
1937 // to '1 <= q + exp <= 10'
1939 } else { // if n > 0 and q + exp = 10
1940 // if n > 2^32 - 1 then n is too large
1941 // too large if c(0)c(1)...c(9).c(10)...c(q-1) > 2^32 - 1
1942 // too large if 0.c(0)c(1)...c(q-1) * 10^11 > 0x9fffffff6, 1<=q<=34
1944 tmp64
= C1
.w
[0] * ten2k64
[11 - q
]; // C scaled up to 11-digit int
1945 // c(0)c(1)...c(9)c(10) or c(0)c(1)...c(q-1)0...0 (11 digits)
1946 if (tmp64
> 0x9fffffff6ull
) {
1948 *pfpsf
|= INVALID_EXCEPTION
;
1949 // return Integer Indefinite
1953 // else cases that can be rounded to a 32-bit int fall through
1954 // to '1 <= q + exp <= 10'
1955 } else { // if (q > 11), i.e. 12 <= q <= 34 and so -24 <= exp <= -2
1956 // 0.c(0)c(1)...c(q-1) * 10^11 > 0x9fffffff6 <=>
1957 // C > 0x9fffffff6 * 10^(q-11) where 1 <= q - 11 <= 23
1959 tmp64
= 0x9fffffff6ull
;
1960 if (q
- 11 <= 19) { // 1 <= q - 11 <= 19; 10^(q-11) requires 64 bits
1961 __mul_64x64_to_128MACH (C
, tmp64
, ten2k64
[q
- 11]);
1962 } else { // 20 <= q - 11 <= 23, and 10^(q-11) requires 128 bits
1963 __mul_128x64_to_128 (C
, tmp64
, ten2k128
[q
- 31]);
1965 if (C1
.w
[1] > C
.w
[1] || (C1
.w
[1] == C
.w
[1] && C1
.w
[0] > C
.w
[0])) {
1967 *pfpsf
|= INVALID_EXCEPTION
;
1968 // return Integer Indefinite
1972 // else cases that can be rounded to a 32-bit int fall through
1973 // to '1 <= q + exp <= 10'
1977 // n is not too large to be converted to int32: -2^32-1 < n <= 2^32-1
1978 // Note: some of the cases tested for above fall through to this point
1979 if ((q
+ exp
) <= 0) {
1980 // n = +/-0.0...c(0)c(1)...c(q-1) or n = +/-0.c(0)c(1)...c(q-1)
1982 *pfpsf
|= INEXACT_EXCEPTION
;
1989 } else { // if (1 <= q + exp <= 10, 1 <= q <= 34, -33 <= exp <= 9)
1990 // -2^32-1 < x <= -1 or 1 <= x <= 2^32-1 so x can be rounded
1991 // toward positive infinity to a 32-bit signed integer
1992 if (x_sign
) { // x <= -1 is invalid
1994 *pfpsf
|= INVALID_EXCEPTION
;
1995 // return Integer Indefinite
1999 // x > 0 from this point on
2000 if (exp
< 0) { // 2 <= q <= 34, -33 <= exp <= -1, 1 <= q + exp <= 10
2001 ind
= -exp
; // 1 <= ind <= 33; ind is a synonym for 'x'
2002 // chop off ind digits from the lower part of C1
2003 // C1 = C1 + 1/2 * 10^ind where the result C1 fits in 127 bits
2006 C1
.w
[0] = C1
.w
[0] + midpoint64
[ind
- 1];
2008 C1
.w
[0] = C1
.w
[0] + midpoint128
[ind
- 20].w
[0];
2009 C1
.w
[1] = C1
.w
[1] + midpoint128
[ind
- 20].w
[1];
2011 if (C1
.w
[0] < tmp64
)
2013 // calculate C* and f*
2014 // C* is actually floor(C*) in this case
2015 // C* and f* need shifting and masking, as shown by
2016 // shiftright128[] and maskhigh128[]
2018 // kx = 10^(-x) = ten2mk128[ind - 1]
2019 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
2020 // the approximation of 10^(-x) was rounded up to 118 bits
2021 __mul_128x128_to_256 (P256
, C1
, ten2mk128
[ind
- 1]);
2022 if (ind
- 1 <= 21) { // 0 <= ind - 1 <= 21
2023 Cstar
.w
[1] = P256
.w
[3];
2024 Cstar
.w
[0] = P256
.w
[2];
2026 fstar
.w
[2] = P256
.w
[2] & maskhigh128
[ind
- 1];
2027 fstar
.w
[1] = P256
.w
[1];
2028 fstar
.w
[0] = P256
.w
[0];
2029 } else { // 22 <= ind - 1 <= 33
2031 Cstar
.w
[0] = P256
.w
[3];
2032 fstar
.w
[3] = P256
.w
[3] & maskhigh128
[ind
- 1];
2033 fstar
.w
[2] = P256
.w
[2];
2034 fstar
.w
[1] = P256
.w
[1];
2035 fstar
.w
[0] = P256
.w
[0];
2037 // the top Ex bits of 10^(-x) are T* = ten2mk128trunc[ind], e.g.
2038 // if x=1, T*=ten2mk128trunc[0]=0x19999999999999999999999999999999
2039 // if (0 < f* < 10^(-x)) then the result is a midpoint
2040 // if floor(C*) is even then C* = floor(C*) - logical right
2041 // shift; C* has p decimal digits, correct by Prop. 1)
2042 // else if floor(C*) is odd C* = floor(C*)-1 (logical right
2043 // shift; C* has p decimal digits, correct by Pr. 1)
2045 // C* = floor(C*) (logical right shift; C has p decimal digits,
2046 // correct by Property 1)
2047 // n = C* * 10^(e+x)
2049 // shift right C* by Ex-128 = shiftright128[ind]
2050 shift
= shiftright128
[ind
- 1]; // 0 <= shift <= 102
2051 if (ind
- 1 <= 21) { // 0 <= ind - 1 <= 21
2053 (Cstar
.w
[0] >> shift
) | (Cstar
.w
[1] << (64 - shift
));
2054 // redundant, it will be 0! Cstar.w[1] = (Cstar.w[1] >> shift);
2055 } else { // 22 <= ind - 1 <= 33
2056 Cstar
.w
[0] = (Cstar
.w
[0] >> (shift
- 64)); // 2 <= shift - 64 <= 38
2058 // determine inexactness of the rounding of C*
2059 // if (0 < f* - 1/2 < 10^(-x)) then
2060 // the result is exact
2061 // else // if (f* - 1/2 > T*) then
2062 // the result is inexact
2064 if (fstar
.w
[1] > 0x8000000000000000ull
||
2065 (fstar
.w
[1] == 0x8000000000000000ull
2066 && fstar
.w
[0] > 0x0ull
)) {
2067 // f* > 1/2 and the result may be exact
2068 tmp64
= fstar
.w
[1] - 0x8000000000000000ull
; // f* - 1/2
2069 if (tmp64
> ten2mk128trunc
[ind
- 1].w
[1]
2070 || (tmp64
== ten2mk128trunc
[ind
- 1].w
[1]
2071 && fstar
.w
[0] >= ten2mk128trunc
[ind
- 1].w
[0])) {
2072 // set the inexact flag
2073 *pfpsf
|= INEXACT_EXCEPTION
;
2074 is_inexact_lt_midpoint
= 1;
2075 } // else the result is exact
2076 } else { // the result is inexact; f2* <= 1/2
2077 // set the inexact flag
2078 *pfpsf
|= INEXACT_EXCEPTION
;
2080 } else if (ind
- 1 <= 21) { // if 3 <= ind <= 21
2081 if (fstar
.w
[3] > 0x0 ||
2082 (fstar
.w
[3] == 0x0 && fstar
.w
[2] > onehalf128
[ind
- 1]) ||
2083 (fstar
.w
[3] == 0x0 && fstar
.w
[2] == onehalf128
[ind
- 1] &&
2084 (fstar
.w
[1] || fstar
.w
[0]))) {
2085 // f2* > 1/2 and the result may be exact
2086 // Calculate f2* - 1/2
2087 tmp64
= fstar
.w
[2] - onehalf128
[ind
- 1];
2088 tmp64A
= fstar
.w
[3];
2089 if (tmp64
> fstar
.w
[2])
2092 || fstar
.w
[1] > ten2mk128trunc
[ind
- 1].w
[1]
2093 || (fstar
.w
[1] == ten2mk128trunc
[ind
- 1].w
[1]
2094 && fstar
.w
[0] > ten2mk128trunc
[ind
- 1].w
[0])) {
2095 // set the inexact flag
2096 *pfpsf
|= INEXACT_EXCEPTION
;
2097 is_inexact_lt_midpoint
= 1;
2098 } // else the result is exact
2099 } else { // the result is inexact; f2* <= 1/2
2100 // set the inexact flag
2101 *pfpsf
|= INEXACT_EXCEPTION
;
2103 } else { // if 22 <= ind <= 33
2104 if (fstar
.w
[3] > onehalf128
[ind
- 1] ||
2105 (fstar
.w
[3] == onehalf128
[ind
- 1] &&
2106 (fstar
.w
[2] || fstar
.w
[1] || fstar
.w
[0]))) {
2107 // f2* > 1/2 and the result may be exact
2108 // Calculate f2* - 1/2
2109 tmp64
= fstar
.w
[3] - onehalf128
[ind
- 1];
2110 if (tmp64
|| fstar
.w
[2]
2111 || fstar
.w
[1] > ten2mk128trunc
[ind
- 1].w
[1]
2112 || (fstar
.w
[1] == ten2mk128trunc
[ind
- 1].w
[1]
2113 && fstar
.w
[0] > ten2mk128trunc
[ind
- 1].w
[0])) {
2114 // set the inexact flag
2115 *pfpsf
|= INEXACT_EXCEPTION
;
2116 is_inexact_lt_midpoint
= 1;
2117 } // else the result is exact
2118 } else { // the result is inexact; f2* <= 1/2
2119 // set the inexact flag
2120 *pfpsf
|= INEXACT_EXCEPTION
;
2124 // if the result was a midpoint it was rounded away from zero, so
2125 // it will need a correction
2126 // check for midpoints
2127 if ((fstar
.w
[3] == 0) && (fstar
.w
[2] == 0)
2128 && (fstar
.w
[1] || fstar
.w
[0])
2129 && (fstar
.w
[1] < ten2mk128trunc
[ind
- 1].w
[1]
2130 || (fstar
.w
[1] == ten2mk128trunc
[ind
- 1].w
[1]
2131 && fstar
.w
[0] <= ten2mk128trunc
[ind
- 1].w
[0]))) {
2132 // the result is a midpoint; round to nearest
2133 if (Cstar
.w
[0] & 0x01) { // Cstar.w[0] is odd; MP in [EVEN, ODD]
2134 // if floor(C*) is odd C = floor(C*) - 1; the result >= 1
2135 Cstar
.w
[0]--; // Cstar.w[0] is now even
2136 is_midpoint_gt_even
= 1;
2137 is_inexact_lt_midpoint
= 0;
2138 } else { // else MP in [ODD, EVEN]
2139 is_inexact_lt_midpoint
= 0;
2142 // general correction for RM
2143 if (is_midpoint_gt_even
|| is_inexact_lt_midpoint
) {
2144 Cstar
.w
[0] = Cstar
.w
[0] + 1;
2146 ; // the result is already correct
2149 } else if (exp
== 0) {
2153 } else { // if (exp > 0) => 1 <= exp <= 9, 1 <= q < 9, 2 <= q + exp <= 10
2154 // res = +C * 10^exp (exact)
2155 res
= C1
.w
[0] * ten2k64
[exp
];
2163 /*****************************************************************************
2164 * BID128_to_uint32_int
2165 ****************************************************************************/
2167 BID128_FUNCTION_ARG1_NORND_CUSTOMRESTYPE (unsigned int,
2168 bid128_to_uint32_int
, x
)
2173 int exp
; // unbiased exponent
2174 // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
2175 UINT64 tmp64
, tmp64A
;
2176 BID_UI64DOUBLE tmp1
;
2177 unsigned int x_nr_bits
;
2180 UINT128 Cstar
; // C* represents up to 34 decimal digits ~ 113 bits
2183 int is_inexact_gt_midpoint
= 0;
2184 int is_midpoint_lt_even
= 0;
2187 x_sign
= x
.w
[1] & MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
2188 x_exp
= x
.w
[1] & MASK_EXP
; // biased and shifted left 49 bit positions
2189 C1
.w
[1] = x
.w
[1] & MASK_COEFF
;
2192 // check for NaN or Infinity
2193 if ((x
.w
[1] & MASK_SPECIAL
) == MASK_SPECIAL
) {
2195 if ((x
.w
[1] & MASK_NAN
) == MASK_NAN
) { // x is NAN
2196 if ((x
.w
[1] & MASK_SNAN
) == MASK_SNAN
) { // x is SNAN
2198 *pfpsf
|= INVALID_EXCEPTION
;
2199 // return Integer Indefinite
2201 } else { // x is QNaN
2203 *pfpsf
|= INVALID_EXCEPTION
;
2204 // return Integer Indefinite
2208 } else { // x is not a NaN, so it must be infinity
2209 if (!x_sign
) { // x is +inf
2211 *pfpsf
|= INVALID_EXCEPTION
;
2212 // return Integer Indefinite
2214 } else { // x is -inf
2216 *pfpsf
|= INVALID_EXCEPTION
;
2217 // return Integer Indefinite
2223 // check for non-canonical values (after the check for special values)
2224 if ((C1
.w
[1] > 0x0001ed09bead87c0ull
)
2225 || (C1
.w
[1] == 0x0001ed09bead87c0ull
2226 && (C1
.w
[0] > 0x378d8e63ffffffffull
))
2227 || ((x
.w
[1] & 0x6000000000000000ull
) == 0x6000000000000000ull
)) {
2230 } else if ((C1
.w
[1] == 0x0ull
) && (C1
.w
[0] == 0x0ull
)) {
2234 } else { // x is not special and is not zero
2236 // q = nr. of decimal digits in x
2237 // determine first the nr. of bits in x
2239 if (C1
.w
[0] >= 0x0020000000000000ull
) { // x >= 2^53
2240 // split the 64-bit value in two 32-bit halves to avoid rounding errors
2241 if (C1
.w
[0] >= 0x0000000100000000ull
) { // x >= 2^32
2242 tmp1
.d
= (double) (C1
.w
[0] >> 32); // exact conversion
2244 33 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
2245 } else { // x < 2^32
2246 tmp1
.d
= (double) (C1
.w
[0]); // exact conversion
2248 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
2250 } else { // if x < 2^53
2251 tmp1
.d
= (double) C1
.w
[0]; // exact conversion
2253 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
2255 } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
2256 tmp1
.d
= (double) C1
.w
[1]; // exact conversion
2258 65 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
2260 q
= nr_digits
[x_nr_bits
- 1].digits
;
2262 q
= nr_digits
[x_nr_bits
- 1].digits1
;
2263 if (C1
.w
[1] > nr_digits
[x_nr_bits
- 1].threshold_hi
2264 || (C1
.w
[1] == nr_digits
[x_nr_bits
- 1].threshold_hi
2265 && C1
.w
[0] >= nr_digits
[x_nr_bits
- 1].threshold_lo
))
2268 exp
= (x_exp
>> 49) - 6176;
2269 if ((q
+ exp
) > 10) { // x >= 10^10 ~= 2^33.2... (cannot fit in 32 bits)
2271 *pfpsf
|= INVALID_EXCEPTION
;
2272 // return Integer Indefinite
2275 } else if ((q
+ exp
) == 10) { // x = c(0)c(1)...c(9).c(10)...c(q-1)
2276 // in this case 2^29.89... ~= 10^9 <= x < 10^10 ~= 2^33.2...
2277 // so x rounded to an integer may or may not fit in a signed 32-bit int
2278 // the cases that do not fit are identified here; the ones that fit
2279 // fall through and will be handled with other cases further,
2280 // under '1 <= q + exp <= 10'
2281 if (x_sign
) { // if n < 0 and q + exp = 10
2282 // if n <= -1 then n is too large
2283 // too large if c(0)c(1)...c(9).c(10)...c(q-1) >= 1
2284 // <=> 0.c(0)c(1)...c(q-1) * 10^11 >= 0x0a, 1<=q<=34
2286 tmp64
= C1
.w
[0] * ten2k64
[11 - q
]; // C scaled up to 11-digit int
2287 // c(0)c(1)...c(9)c(10) or c(0)c(1)...c(q-1)0...0 (11 digits)
2288 if (tmp64
>= 0x0aull
) {
2290 *pfpsf
|= INVALID_EXCEPTION
;
2291 // return Integer Indefinite
2295 // else cases that can be rounded to a 32-bit uint fall through
2296 // to '1 <= q + exp <= 10'
2297 } else { // if (q > 11), i.e. 12 <= q <= 34 and so -24 <= exp <= -2
2298 // 0.c(0)c(1)...c(q-1) * 10^11 >= 0x0a <=>
2299 // C >= 0x0a * 10^(q-11) where 1 <= q - 11 <= 23
2302 if (q
- 11 <= 19) { // 1 <= q - 11 <= 19; 10^(q-11) requires 64 bits
2303 __mul_64x64_to_128MACH (C
, tmp64
, ten2k64
[q
- 11]);
2304 } else { // 20 <= q - 11 <= 23, and 10^(q-11) requires 128 bits
2305 __mul_128x64_to_128 (C
, tmp64
, ten2k128
[q
- 31]);
2307 if (C1
.w
[1] > C
.w
[1]
2308 || (C1
.w
[1] == C
.w
[1] && C1
.w
[0] >= C
.w
[0])) {
2310 *pfpsf
|= INVALID_EXCEPTION
;
2311 // return Integer Indefinite
2315 // else cases that can be rounded to a 32-bit int fall through
2316 // to '1 <= q + exp <= 10'
2318 } else { // if n > 0 and q + exp = 10
2319 // if n >= 2^32 then n is too large
2320 // too large if c(0)c(1)...c(9).c(10)...c(q-1) >= 2^32
2321 // too large if 0.c(0)c(1)...c(q-1) * 10^11 >= 0xa00000000, 1<=q<=34
2323 tmp64
= C1
.w
[0] * ten2k64
[11 - q
]; // C scaled up to 11-digit int
2324 // c(0)c(1)...c(9)c(10) or c(0)c(1)...c(q-1)0...0 (11 digits)
2325 if (tmp64
>= 0xa00000000ull
) {
2327 *pfpsf
|= INVALID_EXCEPTION
;
2328 // return Integer Indefinite
2332 // else cases that can be rounded to a 32-bit uint fall through
2333 // to '1 <= q + exp <= 10'
2334 } else { // if (q > 11), i.e. 12 <= q <= 34 and so -24 <= exp <= -2
2335 // 0.c(0)c(1)...c(q-1) * 10^11 >= 0xa00000000 <=>
2336 // C >= 0xa00000000 * 10^(q-11) where 1 <= q - 11 <= 23
2338 tmp64
= 0xa00000000ull
;
2339 if (q
- 11 <= 19) { // 1 <= q - 11 <= 19; 10^(q-11) requires 64 bits
2340 __mul_64x64_to_128MACH (C
, tmp64
, ten2k64
[q
- 11]);
2341 } else { // 20 <= q - 11 <= 23, and 10^(q-11) requires 128 bits
2342 __mul_128x64_to_128 (C
, tmp64
, ten2k128
[q
- 31]);
2344 if (C1
.w
[1] > C
.w
[1]
2345 || (C1
.w
[1] == C
.w
[1] && C1
.w
[0] >= C
.w
[0])) {
2347 *pfpsf
|= INVALID_EXCEPTION
;
2348 // return Integer Indefinite
2352 // else cases that can be rounded to a 32-bit int fall through
2353 // to '1 <= q + exp <= 10'
2357 // n is not too large to be converted to uint32: -2^32 < n < 2^32
2358 // Note: some of the cases tested for above fall through to this point
2359 if ((q
+ exp
) <= 0) {
2360 // n = +/-0.0...c(0)c(1)...c(q-1) or n = +/-0.c(0)c(1)...c(q-1)
2364 } else { // if (1 <= q + exp <= 10, 1 <= q <= 34, -33 <= exp <= 9)
2365 // x = d(0)...d(k).d(k+1)..., k >= 0, d(0) != 0
2366 if (x_sign
) { // x <= -1
2368 *pfpsf
|= INVALID_EXCEPTION
;
2369 // return Integer Indefinite
2373 // x > 0 from this point on
2374 // 1 <= x < 2^32 so x can be rounded to zero to a 32-bit unsigned integer
2375 if (exp
< 0) { // 2 <= q <= 34, -33 <= exp <= -1, 1 <= q + exp <= 10
2376 ind
= -exp
; // 1 <= ind <= 33; ind is a synonym for 'x'
2377 // chop off ind digits from the lower part of C1
2378 // C1 = C1 + 1/2 * 10^ind where the result C1 fits in 127 bits
2381 C1
.w
[0] = C1
.w
[0] + midpoint64
[ind
- 1];
2383 C1
.w
[0] = C1
.w
[0] + midpoint128
[ind
- 20].w
[0];
2384 C1
.w
[1] = C1
.w
[1] + midpoint128
[ind
- 20].w
[1];
2386 if (C1
.w
[0] < tmp64
)
2388 // calculate C* and f*
2389 // C* is actually floor(C*) in this case
2390 // C* and f* need shifting and masking, as shown by
2391 // shiftright128[] and maskhigh128[]
2393 // kx = 10^(-x) = ten2mk128[ind - 1]
2394 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
2395 // the approximation of 10^(-x) was rounded up to 118 bits
2396 __mul_128x128_to_256 (P256
, C1
, ten2mk128
[ind
- 1]);
2397 if (ind
- 1 <= 21) { // 0 <= ind - 1 <= 21
2398 Cstar
.w
[1] = P256
.w
[3];
2399 Cstar
.w
[0] = P256
.w
[2];
2401 fstar
.w
[2] = P256
.w
[2] & maskhigh128
[ind
- 1];
2402 fstar
.w
[1] = P256
.w
[1];
2403 fstar
.w
[0] = P256
.w
[0];
2404 } else { // 22 <= ind - 1 <= 33
2406 Cstar
.w
[0] = P256
.w
[3];
2407 fstar
.w
[3] = P256
.w
[3] & maskhigh128
[ind
- 1];
2408 fstar
.w
[2] = P256
.w
[2];
2409 fstar
.w
[1] = P256
.w
[1];
2410 fstar
.w
[0] = P256
.w
[0];
2412 // the top Ex bits of 10^(-x) are T* = ten2mk128trunc[ind], e.g.
2413 // if x=1, T*=ten2mk128trunc[0]=0x19999999999999999999999999999999
2414 // if (0 < f* < 10^(-x)) then the result is a midpoint
2415 // if floor(C*) is even then C* = floor(C*) - logical right
2416 // shift; C* has p decimal digits, correct by Prop. 1)
2417 // else if floor(C*) is odd C* = floor(C*)-1 (logical right
2418 // shift; C* has p decimal digits, correct by Pr. 1)
2420 // C* = floor(C*) (logical right shift; C has p decimal digits,
2421 // correct by Property 1)
2422 // n = C* * 10^(e+x)
2424 // shift right C* by Ex-128 = shiftright128[ind]
2425 shift
= shiftright128
[ind
- 1]; // 0 <= shift <= 102
2426 if (ind
- 1 <= 21) { // 0 <= ind - 1 <= 21
2428 (Cstar
.w
[0] >> shift
) | (Cstar
.w
[1] << (64 - shift
));
2429 // redundant, it will be 0! Cstar.w[1] = (Cstar.w[1] >> shift);
2430 } else { // 22 <= ind - 1 <= 33
2431 Cstar
.w
[0] = (Cstar
.w
[0] >> (shift
- 64)); // 2 <= shift - 64 <= 38
2433 // determine inexactness of the rounding of C*
2434 // if (0 < f* - 1/2 < 10^(-x)) then
2435 // the result is exact
2436 // else // if (f* - 1/2 > T*) then
2437 // the result is inexact
2439 if (fstar
.w
[1] > 0x8000000000000000ull
||
2440 (fstar
.w
[1] == 0x8000000000000000ull
2441 && fstar
.w
[0] > 0x0ull
)) {
2442 // f* > 1/2 and the result may be exact
2443 tmp64
= fstar
.w
[1] - 0x8000000000000000ull
; // f* - 1/2
2444 if (tmp64
> ten2mk128trunc
[ind
- 1].w
[1]
2445 || (tmp64
== ten2mk128trunc
[ind
- 1].w
[1]
2446 && fstar
.w
[0] >= ten2mk128trunc
[ind
- 1].w
[0])) {
2447 } // else the result is exact
2448 } else { // the result is inexact; f2* <= 1/2
2449 is_inexact_gt_midpoint
= 1;
2451 } else if (ind
- 1 <= 21) { // if 3 <= ind <= 21
2452 if (fstar
.w
[3] > 0x0 ||
2453 (fstar
.w
[3] == 0x0 && fstar
.w
[2] > onehalf128
[ind
- 1]) ||
2454 (fstar
.w
[3] == 0x0 && fstar
.w
[2] == onehalf128
[ind
- 1] &&
2455 (fstar
.w
[1] || fstar
.w
[0]))) {
2456 // f2* > 1/2 and the result may be exact
2457 // Calculate f2* - 1/2
2458 tmp64
= fstar
.w
[2] - onehalf128
[ind
- 1];
2459 tmp64A
= fstar
.w
[3];
2460 if (tmp64
> fstar
.w
[2])
2463 || fstar
.w
[1] > ten2mk128trunc
[ind
- 1].w
[1]
2464 || (fstar
.w
[1] == ten2mk128trunc
[ind
- 1].w
[1]
2465 && fstar
.w
[0] > ten2mk128trunc
[ind
- 1].w
[0])) {
2466 } // else the result is exact
2467 } else { // the result is inexact; f2* <= 1/2
2468 is_inexact_gt_midpoint
= 1;
2470 } else { // if 22 <= ind <= 33
2471 if (fstar
.w
[3] > onehalf128
[ind
- 1] ||
2472 (fstar
.w
[3] == onehalf128
[ind
- 1] &&
2473 (fstar
.w
[2] || fstar
.w
[1] || fstar
.w
[0]))) {
2474 // f2* > 1/2 and the result may be exact
2475 // Calculate f2* - 1/2
2476 tmp64
= fstar
.w
[3] - onehalf128
[ind
- 1];
2477 if (tmp64
|| fstar
.w
[2]
2478 || fstar
.w
[1] > ten2mk128trunc
[ind
- 1].w
[1]
2479 || (fstar
.w
[1] == ten2mk128trunc
[ind
- 1].w
[1]
2480 && fstar
.w
[0] > ten2mk128trunc
[ind
- 1].w
[0])) {
2481 } // else the result is exact
2482 } else { // the result is inexact; f2* <= 1/2
2483 is_inexact_gt_midpoint
= 1;
2487 // if the result was a midpoint it was rounded away from zero, so
2488 // it will need a correction
2489 // check for midpoints
2490 if ((fstar
.w
[3] == 0) && (fstar
.w
[2] == 0)
2491 && (fstar
.w
[1] || fstar
.w
[0])
2492 && (fstar
.w
[1] < ten2mk128trunc
[ind
- 1].w
[1]
2493 || (fstar
.w
[1] == ten2mk128trunc
[ind
- 1].w
[1]
2494 && fstar
.w
[0] <= ten2mk128trunc
[ind
- 1].w
[0]))) {
2495 // the result is a midpoint; round to nearest
2496 if (Cstar
.w
[0] & 0x01) { // Cstar.w[0] is odd; MP in [EVEN, ODD]
2497 // if floor(C*) is odd C = floor(C*) - 1; the result >= 1
2498 Cstar
.w
[0]--; // Cstar.w[0] is now even
2499 is_inexact_gt_midpoint
= 0;
2500 } else { // else MP in [ODD, EVEN]
2501 is_midpoint_lt_even
= 1;
2502 is_inexact_gt_midpoint
= 0;
2505 // general correction for RZ
2506 if (is_midpoint_lt_even
|| is_inexact_gt_midpoint
) {
2507 Cstar
.w
[0] = Cstar
.w
[0] - 1;
2509 ; // exact, the result is already correct
2512 } else if (exp
== 0) {
2516 } else { // if (exp > 0) => 1 <= exp <= 9, 1 <= q < 9, 2 <= q + exp <= 10
2517 // res = +C * 10^exp (exact)
2518 res
= C1
.w
[0] * ten2k64
[exp
];
2526 /*****************************************************************************
2527 * BID128_to_uint32_xint
2528 ****************************************************************************/
2530 BID128_FUNCTION_ARG1_NORND_CUSTOMRESTYPE (unsigned int,
2531 bid128_to_uint32_xint
, x
)
2536 int exp
; // unbiased exponent
2537 // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
2538 UINT64 tmp64
, tmp64A
;
2539 BID_UI64DOUBLE tmp1
;
2540 unsigned int x_nr_bits
;
2543 UINT128 Cstar
; // C* represents up to 34 decimal digits ~ 113 bits
2546 int is_inexact_gt_midpoint
= 0;
2547 int is_midpoint_lt_even
= 0;
2550 x_sign
= x
.w
[1] & MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
2551 x_exp
= x
.w
[1] & MASK_EXP
; // biased and shifted left 49 bit positions
2552 C1
.w
[1] = x
.w
[1] & MASK_COEFF
;
2555 // check for NaN or Infinity
2556 if ((x
.w
[1] & MASK_SPECIAL
) == MASK_SPECIAL
) {
2558 if ((x
.w
[1] & MASK_NAN
) == MASK_NAN
) { // x is NAN
2559 if ((x
.w
[1] & MASK_SNAN
) == MASK_SNAN
) { // x is SNAN
2561 *pfpsf
|= INVALID_EXCEPTION
;
2562 // return Integer Indefinite
2564 } else { // x is QNaN
2566 *pfpsf
|= INVALID_EXCEPTION
;
2567 // return Integer Indefinite
2571 } else { // x is not a NaN, so it must be infinity
2572 if (!x_sign
) { // x is +inf
2574 *pfpsf
|= INVALID_EXCEPTION
;
2575 // return Integer Indefinite
2577 } else { // x is -inf
2579 *pfpsf
|= INVALID_EXCEPTION
;
2580 // return Integer Indefinite
2586 // check for non-canonical values (after the check for special values)
2587 if ((C1
.w
[1] > 0x0001ed09bead87c0ull
)
2588 || (C1
.w
[1] == 0x0001ed09bead87c0ull
2589 && (C1
.w
[0] > 0x378d8e63ffffffffull
))
2590 || ((x
.w
[1] & 0x6000000000000000ull
) == 0x6000000000000000ull
)) {
2593 } else if ((C1
.w
[1] == 0x0ull
) && (C1
.w
[0] == 0x0ull
)) {
2597 } else { // x is not special and is not zero
2599 // q = nr. of decimal digits in x
2600 // determine first the nr. of bits in x
2602 if (C1
.w
[0] >= 0x0020000000000000ull
) { // x >= 2^53
2603 // split the 64-bit value in two 32-bit halves to avoid rounding errors
2604 if (C1
.w
[0] >= 0x0000000100000000ull
) { // x >= 2^32
2605 tmp1
.d
= (double) (C1
.w
[0] >> 32); // exact conversion
2607 33 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
2608 } else { // x < 2^32
2609 tmp1
.d
= (double) (C1
.w
[0]); // exact conversion
2611 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
2613 } else { // if x < 2^53
2614 tmp1
.d
= (double) C1
.w
[0]; // exact conversion
2616 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
2618 } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
2619 tmp1
.d
= (double) C1
.w
[1]; // exact conversion
2621 65 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
2623 q
= nr_digits
[x_nr_bits
- 1].digits
;
2625 q
= nr_digits
[x_nr_bits
- 1].digits1
;
2626 if (C1
.w
[1] > nr_digits
[x_nr_bits
- 1].threshold_hi
2627 || (C1
.w
[1] == nr_digits
[x_nr_bits
- 1].threshold_hi
2628 && C1
.w
[0] >= nr_digits
[x_nr_bits
- 1].threshold_lo
))
2631 exp
= (x_exp
>> 49) - 6176;
2632 if ((q
+ exp
) > 10) { // x >= 10^10 ~= 2^33.2... (cannot fit in 32 bits)
2634 *pfpsf
|= INVALID_EXCEPTION
;
2635 // return Integer Indefinite
2638 } else if ((q
+ exp
) == 10) { // x = c(0)c(1)...c(9).c(10)...c(q-1)
2639 // in this case 2^29.89... ~= 10^9 <= x < 10^10 ~= 2^33.2...
2640 // so x rounded to an integer may or may not fit in a signed 32-bit int
2641 // the cases that do not fit are identified here; the ones that fit
2642 // fall through and will be handled with other cases further,
2643 // under '1 <= q + exp <= 10'
2644 if (x_sign
) { // if n < 0 and q + exp = 10
2645 // if n <= -1 then n is too large
2646 // too large if c(0)c(1)...c(9).c(10)...c(q-1) >= 1
2647 // <=> 0.c(0)c(1)...c(q-1) * 10^11 >= 0x0a, 1<=q<=34
2649 tmp64
= C1
.w
[0] * ten2k64
[11 - q
]; // C scaled up to 11-digit int
2650 // c(0)c(1)...c(9)c(10) or c(0)c(1)...c(q-1)0...0 (11 digits)
2651 if (tmp64
>= 0x0aull
) {
2653 *pfpsf
|= INVALID_EXCEPTION
;
2654 // return Integer Indefinite
2658 // else cases that can be rounded to a 32-bit uint fall through
2659 // to '1 <= q + exp <= 10'
2660 } else { // if (q > 11), i.e. 12 <= q <= 34 and so -24 <= exp <= -2
2661 // 0.c(0)c(1)...c(q-1) * 10^11 >= 0x0a <=>
2662 // C >= 0x0a * 10^(q-11) where 1 <= q - 11 <= 23
2665 if (q
- 11 <= 19) { // 1 <= q - 11 <= 19; 10^(q-11) requires 64 bits
2666 __mul_64x64_to_128MACH (C
, tmp64
, ten2k64
[q
- 11]);
2667 } else { // 20 <= q - 11 <= 23, and 10^(q-11) requires 128 bits
2668 __mul_128x64_to_128 (C
, tmp64
, ten2k128
[q
- 31]);
2670 if (C1
.w
[1] > C
.w
[1]
2671 || (C1
.w
[1] == C
.w
[1] && C1
.w
[0] >= C
.w
[0])) {
2673 *pfpsf
|= INVALID_EXCEPTION
;
2674 // return Integer Indefinite
2678 // else cases that can be rounded to a 32-bit int fall through
2679 // to '1 <= q + exp <= 10'
2681 } else { // if n > 0 and q + exp = 10
2682 // if n >= 2^32 then n is too large
2683 // too large if c(0)c(1)...c(9).c(10)...c(q-1) >= 2^32
2684 // too large if 0.c(0)c(1)...c(q-1) * 10^11 >= 0xa00000000, 1<=q<=34
2686 tmp64
= C1
.w
[0] * ten2k64
[11 - q
]; // C scaled up to 11-digit int
2687 // c(0)c(1)...c(9)c(10) or c(0)c(1)...c(q-1)0...0 (11 digits)
2688 if (tmp64
>= 0xa00000000ull
) {
2690 *pfpsf
|= INVALID_EXCEPTION
;
2691 // return Integer Indefinite
2695 // else cases that can be rounded to a 32-bit uint fall through
2696 // to '1 <= q + exp <= 10'
2697 } else { // if (q > 11), i.e. 12 <= q <= 34 and so -24 <= exp <= -2
2698 // 0.c(0)c(1)...c(q-1) * 10^11 >= 0xa00000000 <=>
2699 // C >= 0xa00000000 * 10^(q-11) where 1 <= q - 11 <= 23
2701 tmp64
= 0xa00000000ull
;
2702 if (q
- 11 <= 19) { // 1 <= q - 11 <= 19; 10^(q-11) requires 64 bits
2703 __mul_64x64_to_128MACH (C
, tmp64
, ten2k64
[q
- 11]);
2704 } else { // 20 <= q - 11 <= 23, and 10^(q-11) requires 128 bits
2705 __mul_128x64_to_128 (C
, tmp64
, ten2k128
[q
- 31]);
2707 if (C1
.w
[1] > C
.w
[1]
2708 || (C1
.w
[1] == C
.w
[1] && C1
.w
[0] >= C
.w
[0])) {
2710 *pfpsf
|= INVALID_EXCEPTION
;
2711 // return Integer Indefinite
2715 // else cases that can be rounded to a 32-bit int fall through
2716 // to '1 <= q + exp <= 10'
2720 // n is not too large to be converted to uint32: -2^32 < n < 2^32
2721 // Note: some of the cases tested for above fall through to this point
2722 if ((q
+ exp
) <= 0) {
2723 // n = +/-0.0...c(0)c(1)...c(q-1) or n = +/-0.c(0)c(1)...c(q-1)
2725 *pfpsf
|= INEXACT_EXCEPTION
;
2729 } else { // if (1 <= q + exp <= 10, 1 <= q <= 34, -33 <= exp <= 9)
2730 // x = d(0)...d(k).d(k+1)..., k >= 0, d(0) != 0
2731 if (x_sign
) { // x <= -1
2733 *pfpsf
|= INVALID_EXCEPTION
;
2734 // return Integer Indefinite
2738 // x > 0 from this point on
2739 // 1 <= x < 2^32 so x can be rounded to zero to a 32-bit unsigned integer
2740 if (exp
< 0) { // 2 <= q <= 34, -33 <= exp <= -1, 1 <= q + exp <= 10
2741 ind
= -exp
; // 1 <= ind <= 33; ind is a synonym for 'x'
2742 // chop off ind digits from the lower part of C1
2743 // C1 = C1 + 1/2 * 10^ind where the result C1 fits in 127 bits
2746 C1
.w
[0] = C1
.w
[0] + midpoint64
[ind
- 1];
2748 C1
.w
[0] = C1
.w
[0] + midpoint128
[ind
- 20].w
[0];
2749 C1
.w
[1] = C1
.w
[1] + midpoint128
[ind
- 20].w
[1];
2751 if (C1
.w
[0] < tmp64
)
2753 // calculate C* and f*
2754 // C* is actually floor(C*) in this case
2755 // C* and f* need shifting and masking, as shown by
2756 // shiftright128[] and maskhigh128[]
2758 // kx = 10^(-x) = ten2mk128[ind - 1]
2759 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
2760 // the approximation of 10^(-x) was rounded up to 118 bits
2761 __mul_128x128_to_256 (P256
, C1
, ten2mk128
[ind
- 1]);
2762 if (ind
- 1 <= 21) { // 0 <= ind - 1 <= 21
2763 Cstar
.w
[1] = P256
.w
[3];
2764 Cstar
.w
[0] = P256
.w
[2];
2766 fstar
.w
[2] = P256
.w
[2] & maskhigh128
[ind
- 1];
2767 fstar
.w
[1] = P256
.w
[1];
2768 fstar
.w
[0] = P256
.w
[0];
2769 } else { // 22 <= ind - 1 <= 33
2771 Cstar
.w
[0] = P256
.w
[3];
2772 fstar
.w
[3] = P256
.w
[3] & maskhigh128
[ind
- 1];
2773 fstar
.w
[2] = P256
.w
[2];
2774 fstar
.w
[1] = P256
.w
[1];
2775 fstar
.w
[0] = P256
.w
[0];
2777 // the top Ex bits of 10^(-x) are T* = ten2mk128trunc[ind], e.g.
2778 // if x=1, T*=ten2mk128trunc[0]=0x19999999999999999999999999999999
2779 // if (0 < f* < 10^(-x)) then the result is a midpoint
2780 // if floor(C*) is even then C* = floor(C*) - logical right
2781 // shift; C* has p decimal digits, correct by Prop. 1)
2782 // else if floor(C*) is odd C* = floor(C*)-1 (logical right
2783 // shift; C* has p decimal digits, correct by Pr. 1)
2785 // C* = floor(C*) (logical right shift; C has p decimal digits,
2786 // correct by Property 1)
2787 // n = C* * 10^(e+x)
2789 // shift right C* by Ex-128 = shiftright128[ind]
2790 shift
= shiftright128
[ind
- 1]; // 0 <= shift <= 102
2791 if (ind
- 1 <= 21) { // 0 <= ind - 1 <= 21
2793 (Cstar
.w
[0] >> shift
) | (Cstar
.w
[1] << (64 - shift
));
2794 // redundant, it will be 0! Cstar.w[1] = (Cstar.w[1] >> shift);
2795 } else { // 22 <= ind - 1 <= 33
2796 Cstar
.w
[0] = (Cstar
.w
[0] >> (shift
- 64)); // 2 <= shift - 64 <= 38
2798 // determine inexactness of the rounding of C*
2799 // if (0 < f* - 1/2 < 10^(-x)) then
2800 // the result is exact
2801 // else // if (f* - 1/2 > T*) then
2802 // the result is inexact
2804 if (fstar
.w
[1] > 0x8000000000000000ull
||
2805 (fstar
.w
[1] == 0x8000000000000000ull
2806 && fstar
.w
[0] > 0x0ull
)) {
2807 // f* > 1/2 and the result may be exact
2808 tmp64
= fstar
.w
[1] - 0x8000000000000000ull
; // f* - 1/2
2809 if (tmp64
> ten2mk128trunc
[ind
- 1].w
[1]
2810 || (tmp64
== ten2mk128trunc
[ind
- 1].w
[1]
2811 && fstar
.w
[0] >= ten2mk128trunc
[ind
- 1].w
[0])) {
2812 // set the inexact flag
2813 *pfpsf
|= INEXACT_EXCEPTION
;
2814 } // else the result is exact
2815 } else { // the result is inexact; f2* <= 1/2
2816 // set the inexact flag
2817 *pfpsf
|= INEXACT_EXCEPTION
;
2818 is_inexact_gt_midpoint
= 1;
2820 } else if (ind
- 1 <= 21) { // if 3 <= ind <= 21
2821 if (fstar
.w
[3] > 0x0 ||
2822 (fstar
.w
[3] == 0x0 && fstar
.w
[2] > onehalf128
[ind
- 1]) ||
2823 (fstar
.w
[3] == 0x0 && fstar
.w
[2] == onehalf128
[ind
- 1] &&
2824 (fstar
.w
[1] || fstar
.w
[0]))) {
2825 // f2* > 1/2 and the result may be exact
2826 // Calculate f2* - 1/2
2827 tmp64
= fstar
.w
[2] - onehalf128
[ind
- 1];
2828 tmp64A
= fstar
.w
[3];
2829 if (tmp64
> fstar
.w
[2])
2832 || fstar
.w
[1] > ten2mk128trunc
[ind
- 1].w
[1]
2833 || (fstar
.w
[1] == ten2mk128trunc
[ind
- 1].w
[1]
2834 && fstar
.w
[0] > ten2mk128trunc
[ind
- 1].w
[0])) {
2835 // set the inexact flag
2836 *pfpsf
|= INEXACT_EXCEPTION
;
2837 } // else the result is exact
2838 } else { // the result is inexact; f2* <= 1/2
2839 // set the inexact flag
2840 *pfpsf
|= INEXACT_EXCEPTION
;
2841 is_inexact_gt_midpoint
= 1;
2843 } else { // if 22 <= ind <= 33
2844 if (fstar
.w
[3] > onehalf128
[ind
- 1] ||
2845 (fstar
.w
[3] == onehalf128
[ind
- 1] &&
2846 (fstar
.w
[2] || fstar
.w
[1] || fstar
.w
[0]))) {
2847 // f2* > 1/2 and the result may be exact
2848 // Calculate f2* - 1/2
2849 tmp64
= fstar
.w
[3] - onehalf128
[ind
- 1];
2850 if (tmp64
|| fstar
.w
[2]
2851 || fstar
.w
[1] > ten2mk128trunc
[ind
- 1].w
[1]
2852 || (fstar
.w
[1] == ten2mk128trunc
[ind
- 1].w
[1]
2853 && fstar
.w
[0] > ten2mk128trunc
[ind
- 1].w
[0])) {
2854 // set the inexact flag
2855 *pfpsf
|= INEXACT_EXCEPTION
;
2856 } // else the result is exact
2857 } else { // the result is inexact; f2* <= 1/2
2858 // set the inexact flag
2859 *pfpsf
|= INEXACT_EXCEPTION
;
2860 is_inexact_gt_midpoint
= 1;
2864 // if the result was a midpoint it was rounded away from zero, so
2865 // it will need a correction
2866 // check for midpoints
2867 if ((fstar
.w
[3] == 0) && (fstar
.w
[2] == 0)
2868 && (fstar
.w
[1] || fstar
.w
[0])
2869 && (fstar
.w
[1] < ten2mk128trunc
[ind
- 1].w
[1]
2870 || (fstar
.w
[1] == ten2mk128trunc
[ind
- 1].w
[1]
2871 && fstar
.w
[0] <= ten2mk128trunc
[ind
- 1].w
[0]))) {
2872 // the result is a midpoint; round to nearest
2873 if (Cstar
.w
[0] & 0x01) { // Cstar.w[0] is odd; MP in [EVEN, ODD]
2874 // if floor(C*) is odd C = floor(C*) - 1; the result >= 1
2875 Cstar
.w
[0]--; // Cstar.w[0] is now even
2876 is_inexact_gt_midpoint
= 0;
2877 } else { // else MP in [ODD, EVEN]
2878 is_midpoint_lt_even
= 1;
2879 is_inexact_gt_midpoint
= 0;
2882 // general correction for RZ
2883 if (is_midpoint_lt_even
|| is_inexact_gt_midpoint
) {
2884 Cstar
.w
[0] = Cstar
.w
[0] - 1;
2886 ; // exact, the result is already correct
2889 } else if (exp
== 0) {
2893 } else { // if (exp > 0) => 1 <= exp <= 9, 1 <= q < 9, 2 <= q + exp <= 10
2894 // res = +C * 10^exp (exact)
2895 res
= C1
.w
[0] * ten2k64
[exp
];
2903 /*****************************************************************************
2904 * BID128_to_uint32_rninta
2905 ****************************************************************************/
2907 BID128_FUNCTION_ARG1_NORND_CUSTOMRESTYPE (unsigned int,
2908 bid128_to_uint32_rninta
, x
)
2913 int exp
; // unbiased exponent
2914 // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
2916 BID_UI64DOUBLE tmp1
;
2917 unsigned int x_nr_bits
;
2920 UINT128 Cstar
; // C* represents up to 34 decimal digits ~ 113 bits
2924 x_sign
= x
.w
[1] & MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
2925 x_exp
= x
.w
[1] & MASK_EXP
; // biased and shifted left 49 bit positions
2926 C1
.w
[1] = x
.w
[1] & MASK_COEFF
;
2929 // check for NaN or Infinity
2930 if ((x
.w
[1] & MASK_SPECIAL
) == MASK_SPECIAL
) {
2932 if ((x
.w
[1] & MASK_NAN
) == MASK_NAN
) { // x is NAN
2933 if ((x
.w
[1] & MASK_SNAN
) == MASK_SNAN
) { // x is SNAN
2935 *pfpsf
|= INVALID_EXCEPTION
;
2936 // return Integer Indefinite
2938 } else { // x is QNaN
2940 *pfpsf
|= INVALID_EXCEPTION
;
2941 // return Integer Indefinite
2945 } else { // x is not a NaN, so it must be infinity
2946 if (!x_sign
) { // x is +inf
2948 *pfpsf
|= INVALID_EXCEPTION
;
2949 // return Integer Indefinite
2951 } else { // x is -inf
2953 *pfpsf
|= INVALID_EXCEPTION
;
2954 // return Integer Indefinite
2960 // check for non-canonical values (after the check for special values)
2961 if ((C1
.w
[1] > 0x0001ed09bead87c0ull
)
2962 || (C1
.w
[1] == 0x0001ed09bead87c0ull
2963 && (C1
.w
[0] > 0x378d8e63ffffffffull
))
2964 || ((x
.w
[1] & 0x6000000000000000ull
) == 0x6000000000000000ull
)) {
2967 } else if ((C1
.w
[1] == 0x0ull
) && (C1
.w
[0] == 0x0ull
)) {
2971 } else { // x is not special and is not zero
2973 // q = nr. of decimal digits in x
2974 // determine first the nr. of bits in x
2976 if (C1
.w
[0] >= 0x0020000000000000ull
) { // x >= 2^53
2977 // split the 64-bit value in two 32-bit halves to avoid rounding errors
2978 if (C1
.w
[0] >= 0x0000000100000000ull
) { // x >= 2^32
2979 tmp1
.d
= (double) (C1
.w
[0] >> 32); // exact conversion
2981 33 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
2982 } else { // x < 2^32
2983 tmp1
.d
= (double) (C1
.w
[0]); // exact conversion
2985 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
2987 } else { // if x < 2^53
2988 tmp1
.d
= (double) C1
.w
[0]; // exact conversion
2990 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
2992 } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
2993 tmp1
.d
= (double) C1
.w
[1]; // exact conversion
2995 65 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
2997 q
= nr_digits
[x_nr_bits
- 1].digits
;
2999 q
= nr_digits
[x_nr_bits
- 1].digits1
;
3000 if (C1
.w
[1] > nr_digits
[x_nr_bits
- 1].threshold_hi
3001 || (C1
.w
[1] == nr_digits
[x_nr_bits
- 1].threshold_hi
3002 && C1
.w
[0] >= nr_digits
[x_nr_bits
- 1].threshold_lo
))
3005 exp
= (x_exp
>> 49) - 6176;
3006 if ((q
+ exp
) > 10) { // x >= 10^10 ~= 2^33.2... (cannot fit in 32 bits)
3008 *pfpsf
|= INVALID_EXCEPTION
;
3009 // return Integer Indefinite
3012 } else if ((q
+ exp
) == 10) { // x = c(0)c(1)...c(9).c(10)...c(q-1)
3013 // in this case 2^29.89... ~= 10^9 <= x < 10^10 ~= 2^33.2...
3014 // so x rounded to an integer may or may not fit in a signed 32-bit int
3015 // the cases that do not fit are identified here; the ones that fit
3016 // fall through and will be handled with other cases further,
3017 // under '1 <= q + exp <= 10'
3018 if (x_sign
) { // if n < 0 and q + exp = 10
3019 // if n <= -1/2 then n is too large
3020 // too large if c(0)c(1)...c(9).c(10)...c(q-1) >= 1/2
3021 // <=> 0.c(0)c(1)...c(q-1) * 10^11 >= 0x05, 1<=q<=34
3023 tmp64
= C1
.w
[0] * ten2k64
[11 - q
]; // C scaled up to 11-digit int
3024 // c(0)c(1)...c(9)c(10) or c(0)c(1)...c(q-1)0...0 (11 digits)
3025 if (tmp64
>= 0x05ull
) {
3027 *pfpsf
|= INVALID_EXCEPTION
;
3028 // return Integer Indefinite
3032 // else cases that can be rounded to a 32-bit int fall through
3033 // to '1 <= q + exp <= 10'
3034 } else { // if (q > 11), i.e. 12 <= q <= 34 and so -24 <= exp <= -2
3035 // 0.c(0)c(1)...c(q-1) * 10^11 >= 0x05 <=>
3036 // C >= 0x05 * 10^(q-11) where 1 <= q - 11 <= 23
3039 if (q
- 11 <= 19) { // 1 <= q - 11 <= 19; 10^(q-11) requires 64 bits
3040 __mul_64x64_to_128MACH (C
, tmp64
, ten2k64
[q
- 11]);
3041 } else { // 20 <= q - 11 <= 23, and 10^(q-11) requires 128 bits
3042 __mul_128x64_to_128 (C
, tmp64
, ten2k128
[q
- 31]);
3044 if (C1
.w
[1] > C
.w
[1]
3045 || (C1
.w
[1] == C
.w
[1] && C1
.w
[0] >= C
.w
[0])) {
3047 *pfpsf
|= INVALID_EXCEPTION
;
3048 // return Integer Indefinite
3052 // else cases that can be rounded to a 32-bit int fall through
3053 // to '1 <= q + exp <= 10'
3055 } else { // if n > 0 and q + exp = 10
3056 // if n >= 2^32 - 1/2 then n is too large
3057 // too large if c(0)c(1)...c(9).c(10)...c(q-1) >= 2^32-1/2
3058 // too large if 0.c(0)c(1)...c(q-1) * 10^11 >= 0x9fffffffb, 1<=q<=34
3060 tmp64
= C1
.w
[0] * ten2k64
[11 - q
]; // C scaled up to 11-digit int
3061 // c(0)c(1)...c(9)c(10) or c(0)c(1)...c(q-1)0...0 (11 digits)
3062 if (tmp64
>= 0x9fffffffbull
) {
3064 *pfpsf
|= INVALID_EXCEPTION
;
3065 // return Integer Indefinite
3069 // else cases that can be rounded to a 32-bit int fall through
3070 // to '1 <= q + exp <= 10'
3071 } else { // if (q > 11), i.e. 12 <= q <= 34 and so -24 <= exp <= -2
3072 // 0.c(0)c(1)...c(q-1) * 10^11 >= 0x9fffffffb <=>
3073 // C >= 0x9fffffffb * 10^(q-11) where 1 <= q - 11 <= 23
3074 // (scale 2^32-1/2 up)
3075 tmp64
= 0x9fffffffbull
;
3076 if (q
- 11 <= 19) { // 1 <= q - 11 <= 19; 10^(q-11) requires 64 bits
3077 __mul_64x64_to_128MACH (C
, tmp64
, ten2k64
[q
- 11]);
3078 } else { // 20 <= q - 11 <= 23, and 10^(q-11) requires 128 bits
3079 __mul_128x64_to_128 (C
, tmp64
, ten2k128
[q
- 31]);
3081 if (C1
.w
[1] > C
.w
[1]
3082 || (C1
.w
[1] == C
.w
[1] && C1
.w
[0] >= C
.w
[0])) {
3084 *pfpsf
|= INVALID_EXCEPTION
;
3085 // return Integer Indefinite
3089 // else cases that can be rounded to a 32-bit int fall through
3090 // to '1 <= q + exp <= 10'
3094 // n is not too large to be converted to int32: -1/2 < n < 2^32 - 1/2
3095 // Note: some of the cases tested for above fall through to this point
3096 if ((q
+ exp
) < 0) { // n = +/-0.0...c(0)c(1)...c(q-1)
3100 } else if ((q
+ exp
) == 0) { // n = +/-0.c(0)c(1)...c(q-1)
3101 // if 0.c(0)c(1)...c(q-1) < 0.5 <=> c(0)c(1)...c(q-1) < 5 * 10^(q-1)
3108 if (ind
<= 18) { // 0 <= ind <= 18
3109 if ((C1
.w
[1] == 0) && (C1
.w
[0] < midpoint64
[ind
])) {
3110 res
= 0x00000000; // return 0
3111 } else if (!x_sign
) { // n > 0
3112 res
= 0x00000001; // return +1
3115 *pfpsf
|= INVALID_EXCEPTION
;
3117 } else { // 19 <= ind <= 33
3118 if ((C1
.w
[1] < midpoint128
[ind
- 19].w
[1])
3119 || ((C1
.w
[1] == midpoint128
[ind
- 19].w
[1])
3120 && (C1
.w
[0] < midpoint128
[ind
- 19].w
[0]))) {
3121 res
= 0x00000000; // return 0
3122 } else if (!x_sign
) { // n > 0
3123 res
= 0x00000001; // return +1
3126 *pfpsf
|= INVALID_EXCEPTION
;
3129 } else { // if (1 <= q + exp <= 10, 1 <= q <= 34, -33 <= exp <= 9)
3130 if (x_sign
) { // x <= -1
3132 *pfpsf
|= INVALID_EXCEPTION
;
3133 // return Integer Indefinite
3137 // 1 <= x < 2^31-1/2 so x can be rounded
3138 // to nearest-away to a 32-bit signed integer
3139 if (exp
< 0) { // 2 <= q <= 34, -33 <= exp <= -1, 1 <= q + exp <= 10
3140 ind
= -exp
; // 1 <= ind <= 33; ind is a synonym for 'x'
3141 // chop off ind digits from the lower part of C1
3142 // C1 = C1 + 1/2 * 10^ind where the result C1 fits in 127 bits
3145 C1
.w
[0] = C1
.w
[0] + midpoint64
[ind
- 1];
3147 C1
.w
[0] = C1
.w
[0] + midpoint128
[ind
- 20].w
[0];
3148 C1
.w
[1] = C1
.w
[1] + midpoint128
[ind
- 20].w
[1];
3150 if (C1
.w
[0] < tmp64
)
3152 // calculate C* and f*
3153 // C* is actually floor(C*) in this case
3154 // C* and f* need shifting and masking, as shown by
3155 // shiftright128[] and maskhigh128[]
3157 // kx = 10^(-x) = ten2mk128[ind - 1]
3158 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
3159 // the approximation of 10^(-x) was rounded up to 118 bits
3160 __mul_128x128_to_256 (P256
, C1
, ten2mk128
[ind
- 1]);
3161 if (ind
- 1 <= 21) { // 0 <= ind - 1 <= 21
3162 Cstar
.w
[1] = P256
.w
[3];
3163 Cstar
.w
[0] = P256
.w
[2];
3164 } else { // 22 <= ind - 1 <= 33
3166 Cstar
.w
[0] = P256
.w
[3];
3168 // the top Ex bits of 10^(-x) are T* = ten2mk128trunc[ind], e.g.
3169 // if x=1, T*=ten2mk128trunc[0]=0x19999999999999999999999999999999
3170 // if (0 < f* < 10^(-x)) then the result is a midpoint
3171 // if floor(C*) is even then C* = floor(C*) - logical right
3172 // shift; C* has p decimal digits, correct by Prop. 1)
3173 // else if floor(C*) is odd C* = floor(C*)-1 (logical right
3174 // shift; C* has p decimal digits, correct by Pr. 1)
3176 // C* = floor(C*) (logical right shift; C has p decimal digits,
3177 // correct by Property 1)
3178 // n = C* * 10^(e+x)
3180 // shift right C* by Ex-128 = shiftright128[ind]
3181 shift
= shiftright128
[ind
- 1]; // 0 <= shift <= 102
3182 if (ind
- 1 <= 21) { // 0 <= ind - 1 <= 21
3184 (Cstar
.w
[0] >> shift
) | (Cstar
.w
[1] << (64 - shift
));
3185 // redundant, it will be 0! Cstar.w[1] = (Cstar.w[1] >> shift);
3186 } else { // 22 <= ind - 1 <= 33
3187 Cstar
.w
[0] = (Cstar
.w
[0] >> (shift
- 64)); // 2 <= shift - 64 <= 38
3189 // if the result was a midpoint, it was already rounded away from zero
3190 res
= Cstar
.w
[0]; // always positive
3191 // no need to check for midpoints - already rounded away from zero!
3192 } else if (exp
== 0) {
3196 } else { // if (exp > 0) => 1 <= exp <= 9, 1 <= q < 9, 2 <= q + exp <= 10
3197 // res = +C * 10^exp (exact)
3198 res
= C1
.w
[0] * ten2k64
[exp
];
3206 /*****************************************************************************
3207 * BID128_to_uint32_xrninta
3208 ****************************************************************************/
3210 BID128_FUNCTION_ARG1_NORND_CUSTOMRESTYPE (unsigned int,
3211 bid128_to_uint32_xrninta
, x
)
3216 int exp
; // unbiased exponent
3217 // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
3218 UINT64 tmp64
, tmp64A
;
3219 BID_UI64DOUBLE tmp1
;
3220 unsigned int x_nr_bits
;
3223 UINT128 Cstar
; // C* represents up to 34 decimal digits ~ 113 bits
3226 unsigned int tmp_inexact
= 0;
3229 x_sign
= x
.w
[1] & MASK_SIGN
; // 0 for positive, MASK_SIGN for negative
3230 x_exp
= x
.w
[1] & MASK_EXP
; // biased and shifted left 49 bit positions
3231 C1
.w
[1] = x
.w
[1] & MASK_COEFF
;
3234 // check for NaN or Infinity
3235 if ((x
.w
[1] & MASK_SPECIAL
) == MASK_SPECIAL
) {
3237 if ((x
.w
[1] & MASK_NAN
) == MASK_NAN
) { // x is NAN
3238 if ((x
.w
[1] & MASK_SNAN
) == MASK_SNAN
) { // x is SNAN
3240 *pfpsf
|= INVALID_EXCEPTION
;
3241 // return Integer Indefinite
3243 } else { // x is QNaN
3245 *pfpsf
|= INVALID_EXCEPTION
;
3246 // return Integer Indefinite
3250 } else { // x is not a NaN, so it must be infinity
3251 if (!x_sign
) { // x is +inf
3253 *pfpsf
|= INVALID_EXCEPTION
;
3254 // return Integer Indefinite
3256 } else { // x is -inf
3258 *pfpsf
|= INVALID_EXCEPTION
;
3259 // return Integer Indefinite
3265 // check for non-canonical values (after the check for special values)
3266 if ((C1
.w
[1] > 0x0001ed09bead87c0ull
)
3267 || (C1
.w
[1] == 0x0001ed09bead87c0ull
3268 && (C1
.w
[0] > 0x378d8e63ffffffffull
))
3269 || ((x
.w
[1] & 0x6000000000000000ull
) == 0x6000000000000000ull
)) {
3272 } else if ((C1
.w
[1] == 0x0ull
) && (C1
.w
[0] == 0x0ull
)) {
3276 } else { // x is not special and is not zero
3278 // q = nr. of decimal digits in x
3279 // determine first the nr. of bits in x
3281 if (C1
.w
[0] >= 0x0020000000000000ull
) { // x >= 2^53
3282 // split the 64-bit value in two 32-bit halves to avoid rounding errors
3283 if (C1
.w
[0] >= 0x0000000100000000ull
) { // x >= 2^32
3284 tmp1
.d
= (double) (C1
.w
[0] >> 32); // exact conversion
3286 33 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
3287 } else { // x < 2^32
3288 tmp1
.d
= (double) (C1
.w
[0]); // exact conversion
3290 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
3292 } else { // if x < 2^53
3293 tmp1
.d
= (double) C1
.w
[0]; // exact conversion
3295 1 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
3297 } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
3298 tmp1
.d
= (double) C1
.w
[1]; // exact conversion
3300 65 + ((((unsigned int) (tmp1
.ui64
>> 52)) & 0x7ff) - 0x3ff);
3302 q
= nr_digits
[x_nr_bits
- 1].digits
;
3304 q
= nr_digits
[x_nr_bits
- 1].digits1
;
3305 if (C1
.w
[1] > nr_digits
[x_nr_bits
- 1].threshold_hi
3306 || (C1
.w
[1] == nr_digits
[x_nr_bits
- 1].threshold_hi
3307 && C1
.w
[0] >= nr_digits
[x_nr_bits
- 1].threshold_lo
))
3310 exp
= (x_exp
>> 49) - 6176;
3311 if ((q
+ exp
) > 10) { // x >= 10^10 ~= 2^33.2... (cannot fit in 32 bits)
3313 *pfpsf
|= INVALID_EXCEPTION
;
3314 // return Integer Indefinite
3317 } else if ((q
+ exp
) == 10) { // x = c(0)c(1)...c(9).c(10)...c(q-1)
3318 // in this case 2^29.89... ~= 10^9 <= x < 10^10 ~= 2^33.2...
3319 // so x rounded to an integer may or may not fit in a signed 32-bit int
3320 // the cases that do not fit are identified here; the ones that fit
3321 // fall through and will be handled with other cases further,
3322 // under '1 <= q + exp <= 10'
3323 if (x_sign
) { // if n < 0 and q + exp = 10
3324 // if n <= -1/2 then n is too large
3325 // too large if c(0)c(1)...c(9).c(10)...c(q-1) >= 1/2
3326 // <=> 0.c(0)c(1)...c(q-1) * 10^11 >= 0x05, 1<=q<=34
3328 tmp64
= C1
.w
[0] * ten2k64
[11 - q
]; // C scaled up to 11-digit int
3329 // c(0)c(1)...c(9)c(10) or c(0)c(1)...c(q-1)0...0 (11 digits)
3330 if (tmp64
>= 0x05ull
) {
3332 *pfpsf
|= INVALID_EXCEPTION
;
3333 // return Integer Indefinite
3337 // else cases that can be rounded to a 32-bit int fall through
3338 // to '1 <= q + exp <= 10'
3339 } else { // if (q > 11), i.e. 12 <= q <= 34 and so -24 <= exp <= -2
3340 // 0.c(0)c(1)...c(q-1) * 10^11 >= 0x05 <=>
3341 // C >= 0x05 * 10^(q-11) where 1 <= q - 11 <= 23
3344 if (q
- 11 <= 19) { // 1 <= q - 11 <= 19; 10^(q-11) requires 64 bits
3345 __mul_64x64_to_128MACH (C
, tmp64
, ten2k64
[q
- 11]);
3346 } else { // 20 <= q - 11 <= 23, and 10^(q-11) requires 128 bits
3347 __mul_128x64_to_128 (C
, tmp64
, ten2k128
[q
- 31]);
3349 if (C1
.w
[1] > C
.w
[1]
3350 || (C1
.w
[1] == C
.w
[1] && C1
.w
[0] >= C
.w
[0])) {
3352 *pfpsf
|= INVALID_EXCEPTION
;
3353 // return Integer Indefinite
3357 // else cases that can be rounded to a 32-bit int fall through
3358 // to '1 <= q + exp <= 10'
3360 } else { // if n > 0 and q + exp = 10
3361 // if n >= 2^32 - 1/2 then n is too large
3362 // too large if c(0)c(1)...c(9).c(10)...c(q-1) >= 2^32-1/2
3363 // too large if 0.c(0)c(1)...c(q-1) * 10^11 >= 0x9fffffffb, 1<=q<=34
3365 tmp64
= C1
.w
[0] * ten2k64
[11 - q
]; // C scaled up to 11-digit int
3366 // c(0)c(1)...c(9)c(10) or c(0)c(1)...c(q-1)0...0 (11 digits)
3367 if (tmp64
>= 0x9fffffffbull
) {
3369 *pfpsf
|= INVALID_EXCEPTION
;
3370 // return Integer Indefinite
3374 // else cases that can be rounded to a 32-bit int fall through
3375 // to '1 <= q + exp <= 10'
3376 } else { // if (q > 11), i.e. 12 <= q <= 34 and so -24 <= exp <= -2
3377 // 0.c(0)c(1)...c(q-1) * 10^11 >= 0x9fffffffb <=>
3378 // C >= 0x9fffffffb * 10^(q-11) where 1 <= q - 11 <= 23
3379 // (scale 2^32-1/2 up)
3380 tmp64
= 0x9fffffffbull
;
3381 if (q
- 11 <= 19) { // 1 <= q - 11 <= 19; 10^(q-11) requires 64 bits
3382 __mul_64x64_to_128MACH (C
, tmp64
, ten2k64
[q
- 11]);
3383 } else { // 20 <= q - 11 <= 23, and 10^(q-11) requires 128 bits
3384 __mul_128x64_to_128 (C
, tmp64
, ten2k128
[q
- 31]);
3386 if (C1
.w
[1] > C
.w
[1]
3387 || (C1
.w
[1] == C
.w
[1] && C1
.w
[0] >= C
.w
[0])) {
3389 *pfpsf
|= INVALID_EXCEPTION
;
3390 // return Integer Indefinite
3394 // else cases that can be rounded to a 32-bit int fall through
3395 // to '1 <= q + exp <= 10'
3399 // n is not too large to be converted to int32: -1/2 < n < 2^32 - 1/2
3400 // Note: some of the cases tested for above fall through to this point
3401 if ((q
+ exp
) < 0) { // n = +/-0.0...c(0)c(1)...c(q-1)
3403 *pfpsf
|= INEXACT_EXCEPTION
;
3407 } else if ((q
+ exp
) == 0) { // n = +/-0.c(0)c(1)...c(q-1)
3408 // if 0.c(0)c(1)...c(q-1) < 0.5 <=> c(0)c(1)...c(q-1) < 5 * 10^(q-1)
3415 if (ind
<= 18) { // 0 <= ind <= 18
3416 if ((C1
.w
[1] == 0) && (C1
.w
[0] < midpoint64
[ind
])) {
3417 res
= 0x00000000; // return 0
3418 } else if (!x_sign
) { // n > 0
3419 res
= 0x00000001; // return +1
3422 *pfpsf
|= INVALID_EXCEPTION
;
3425 } else { // 19 <= ind <= 33
3426 if ((C1
.w
[1] < midpoint128
[ind
- 19].w
[1])
3427 || ((C1
.w
[1] == midpoint128
[ind
- 19].w
[1])
3428 && (C1
.w
[0] < midpoint128
[ind
- 19].w
[0]))) {
3429 res
= 0x00000000; // return 0
3430 } else if (!x_sign
) { // n > 0
3431 res
= 0x00000001; // return +1
3434 *pfpsf
|= INVALID_EXCEPTION
;
3439 *pfpsf
|= INEXACT_EXCEPTION
;
3440 } else { // if (1 <= q + exp <= 10, 1 <= q <= 34, -33 <= exp <= 9)
3441 if (x_sign
) { // x <= -1
3443 *pfpsf
|= INVALID_EXCEPTION
;
3444 // return Integer Indefinite
3448 // 1 <= x < 2^31-1/2 so x can be rounded
3449 // to nearest-away to a 32-bit signed integer
3450 if (exp
< 0) { // 2 <= q <= 34, -33 <= exp <= -1, 1 <= q + exp <= 10
3451 ind
= -exp
; // 1 <= ind <= 33; ind is a synonym for 'x'
3452 // chop off ind digits from the lower part of C1
3453 // C1 = C1 + 1/2 * 10^ind where the result C1 fits in 127 bits
3456 C1
.w
[0] = C1
.w
[0] + midpoint64
[ind
- 1];
3458 C1
.w
[0] = C1
.w
[0] + midpoint128
[ind
- 20].w
[0];
3459 C1
.w
[1] = C1
.w
[1] + midpoint128
[ind
- 20].w
[1];
3461 if (C1
.w
[0] < tmp64
)
3463 // calculate C* and f*
3464 // C* is actually floor(C*) in this case
3465 // C* and f* need shifting and masking, as shown by
3466 // shiftright128[] and maskhigh128[]
3468 // kx = 10^(-x) = ten2mk128[ind - 1]
3469 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
3470 // the approximation of 10^(-x) was rounded up to 118 bits
3471 __mul_128x128_to_256 (P256
, C1
, ten2mk128
[ind
- 1]);
3472 if (ind
- 1 <= 21) { // 0 <= ind - 1 <= 21
3473 Cstar
.w
[1] = P256
.w
[3];
3474 Cstar
.w
[0] = P256
.w
[2];
3476 fstar
.w
[2] = P256
.w
[2] & maskhigh128
[ind
- 1];
3477 fstar
.w
[1] = P256
.w
[1];
3478 fstar
.w
[0] = P256
.w
[0];
3479 } else { // 22 <= ind - 1 <= 33
3481 Cstar
.w
[0] = P256
.w
[3];
3482 fstar
.w
[3] = P256
.w
[3] & maskhigh128
[ind
- 1];
3483 fstar
.w
[2] = P256
.w
[2];
3484 fstar
.w
[1] = P256
.w
[1];
3485 fstar
.w
[0] = P256
.w
[0];
3487 // the top Ex bits of 10^(-x) are T* = ten2mk128trunc[ind], e.g.
3488 // if x=1, T*=ten2mk128trunc[0]=0x19999999999999999999999999999999
3489 // if (0 < f* < 10^(-x)) then the result is a midpoint
3490 // if floor(C*) is even then C* = floor(C*) - logical right
3491 // shift; C* has p decimal digits, correct by Prop. 1)
3492 // else if floor(C*) is odd C* = floor(C*)-1 (logical right
3493 // shift; C* has p decimal digits, correct by Pr. 1)
3495 // C* = floor(C*) (logical right shift; C has p decimal digits,
3496 // correct by Property 1)
3497 // n = C* * 10^(e+x)
3499 // shift right C* by Ex-128 = shiftright128[ind]
3500 shift
= shiftright128
[ind
- 1]; // 0 <= shift <= 102
3501 if (ind
- 1 <= 21) { // 0 <= ind - 1 <= 21
3503 (Cstar
.w
[0] >> shift
) | (Cstar
.w
[1] << (64 - shift
));
3504 // redundant, it will be 0! Cstar.w[1] = (Cstar.w[1] >> shift);
3505 } else { // 22 <= ind - 1 <= 33
3506 Cstar
.w
[0] = (Cstar
.w
[0] >> (shift
- 64)); // 2 <= shift - 64 <= 38
3508 // if the result was a midpoint, it was already rounded away from zero
3509 // determine inexactness of the rounding of C*
3510 // if (0 < f* - 1/2 < 10^(-x)) then
3511 // the result is exact
3512 // else // if (f* - 1/2 > T*) then
3513 // the result is inexact
3515 if (fstar
.w
[1] > 0x8000000000000000ull
||
3516 (fstar
.w
[1] == 0x8000000000000000ull
3517 && fstar
.w
[0] > 0x0ull
)) {
3518 // f* > 1/2 and the result may be exact
3519 tmp64
= fstar
.w
[1] - 0x8000000000000000ull
; // f* - 1/2
3520 if (tmp64
> ten2mk128trunc
[ind
- 1].w
[1]
3521 || (tmp64
== ten2mk128trunc
[ind
- 1].w
[1]
3522 && fstar
.w
[0] >= ten2mk128trunc
[ind
- 1].w
[0])) {
3523 // set the inexact flag
3524 // *pfpsf |= INEXACT_EXCEPTION;
3526 } // else the result is exact
3527 } else { // the result is inexact; f2* <= 1/2
3528 // set the inexact flag
3529 // *pfpsf |= INEXACT_EXCEPTION;
3532 } else if (ind
- 1 <= 21) { // if 3 <= ind <= 21
3533 if (fstar
.w
[3] > 0x0 ||
3534 (fstar
.w
[3] == 0x0 && fstar
.w
[2] > onehalf128
[ind
- 1]) ||
3535 (fstar
.w
[3] == 0x0 && fstar
.w
[2] == onehalf128
[ind
- 1] &&
3536 (fstar
.w
[1] || fstar
.w
[0]))) {
3537 // f2* > 1/2 and the result may be exact
3538 // Calculate f2* - 1/2
3539 tmp64
= fstar
.w
[2] - onehalf128
[ind
- 1];
3540 tmp64A
= fstar
.w
[3];
3541 if (tmp64
> fstar
.w
[2])
3544 || fstar
.w
[1] > ten2mk128trunc
[ind
- 1].w
[1]
3545 || (fstar
.w
[1] == ten2mk128trunc
[ind
- 1].w
[1]
3546 && fstar
.w
[0] > ten2mk128trunc
[ind
- 1].w
[0])) {
3547 // set the inexact flag
3548 // *pfpsf |= INEXACT_EXCEPTION;
3550 } // else the result is exact
3551 } else { // the result is inexact; f2* <= 1/2
3552 // set the inexact flag
3553 // *pfpsf |= INEXACT_EXCEPTION;
3556 } else { // if 22 <= ind <= 33
3557 if (fstar
.w
[3] > onehalf128
[ind
- 1] ||
3558 (fstar
.w
[3] == onehalf128
[ind
- 1] &&
3559 (fstar
.w
[2] || fstar
.w
[1] || fstar
.w
[0]))) {
3560 // f2* > 1/2 and the result may be exact
3561 // Calculate f2* - 1/2
3562 tmp64
= fstar
.w
[3] - onehalf128
[ind
- 1];
3563 if (tmp64
|| fstar
.w
[2]
3564 || fstar
.w
[1] > ten2mk128trunc
[ind
- 1].w
[1]
3565 || (fstar
.w
[1] == ten2mk128trunc
[ind
- 1].w
[1]
3566 && fstar
.w
[0] > ten2mk128trunc
[ind
- 1].w
[0])) {
3567 // set the inexact flag
3568 // *pfpsf |= INEXACT_EXCEPTION;
3570 } // else the result is exact
3571 } else { // the result is inexact; f2* <= 1/2
3572 // set the inexact flag
3573 // *pfpsf |= INEXACT_EXCEPTION;
3577 // no need to check for midpoints - already rounded away from zero!
3578 res
= Cstar
.w
[0]; // the result is positive
3580 *pfpsf
|= INEXACT_EXCEPTION
;
3581 } else if (exp
== 0) {
3585 } else { // if (exp > 0) => 1 <= exp <= 9, 1 <= q < 9, 2 <= q + exp <= 10
3586 // res = +C * 10^exp (exact)
3587 res
= C1
.w
[0] * ten2k64
[exp
];