Merge -r 127928:132243 from trunk
[official-gcc.git] / libgcc / config / libbid / bid128_round_integral.c
blobd517095a77042c4eabe3e54cd1403df759352983
1 /* Copyright (C) 2007 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 2, or (at your option) any later
8 version.
10 In addition to the permissions in the GNU General Public License, the
11 Free Software Foundation gives you unlimited permission to link the
12 compiled version of this file into combinations with other programs,
13 and to distribute those combinations without any restriction coming
14 from the use of this file. (The General Public License restrictions
15 do apply in other respects; for example, they cover modification of
16 the file, and distribution when not linked into a combine
17 executable.)
19 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
20 WARRANTY; without even the implied warranty of MERCHANTABILITY or
21 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
22 for more details.
24 You should have received a copy of the GNU General Public License
25 along with GCC; see the file COPYING. If not, write to the Free
26 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
27 02110-1301, USA. */
29 #define BID_128RES
31 #include "bid_internal.h"
33 /*****************************************************************************
34 * BID128_round_integral_exact
35 ****************************************************************************/
37 BID128_FUNCTION_ARG1 (bid128_round_integral_exact, x)
39 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull}
41 UINT64 x_sign;
42 UINT64 x_exp;
43 int exp; // unbiased exponent
44 // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
45 UINT64 tmp64;
46 BID_UI64DOUBLE tmp1;
47 unsigned int x_nr_bits;
48 int q, ind, shift;
49 UINT128 C1;
50 UINT256 fstar;
51 UINT256 P256;
53 // check for NaN or Infinity
54 if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
55 // x is special
56 if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
57 // if x = NaN, then res = Q (x)
58 // check first for non-canonical NaN payload
59 if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
60 (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
61 (x.w[0] > 0x38c15b09ffffffffull))) {
62 x.w[1] = x.w[1] & 0xffffc00000000000ull;
63 x.w[0] = 0x0ull;
65 if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN
66 // set invalid flag
67 *pfpsf |= INVALID_EXCEPTION;
68 // return quiet (x)
69 res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
70 res.w[0] = x.w[0];
71 } else { // x is QNaN
72 // return x
73 res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
74 res.w[0] = x.w[0];
76 BID_RETURN (res)
77 } else { // x is not a NaN, so it must be infinity
78 if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf
79 // return +inf
80 res.w[1] = 0x7800000000000000ull;
81 res.w[0] = 0x0000000000000000ull;
82 } else { // x is -inf
83 // return -inf
84 res.w[1] = 0xf800000000000000ull;
85 res.w[0] = 0x0000000000000000ull;
87 BID_RETURN (res);
90 // unpack x
91 x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
92 C1.w[1] = x.w[1] & MASK_COEFF;
93 C1.w[0] = x.w[0];
95 // check for non-canonical values (treated as zero)
96 if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
97 // non-canonical
98 x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
99 C1.w[1] = 0; // significand high
100 C1.w[0] = 0; // significand low
101 } else { // G0_G1 != 11
102 x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits
103 if (C1.w[1] > 0x0001ed09bead87c0ull ||
104 (C1.w[1] == 0x0001ed09bead87c0ull
105 && C1.w[0] > 0x378d8e63ffffffffull)) {
106 // x is non-canonical if coefficient is larger than 10^34 -1
107 C1.w[1] = 0;
108 C1.w[0] = 0;
109 } else { // canonical
114 // test for input equal to zero
115 if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
116 // x is 0
117 // return 0 preserving the sign bit and the preferred exponent
118 // of MAX(Q(x), 0)
119 if (x_exp <= (0x1820ull << 49)) {
120 res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
121 } else {
122 res.w[1] = x_sign | x_exp;
124 res.w[0] = 0x0000000000000000ull;
125 BID_RETURN (res);
127 // x is not special and is not zero
129 switch (rnd_mode) {
130 case ROUNDING_TO_NEAREST:
131 case ROUNDING_TIES_AWAY:
132 // if (exp <= -(p+1)) return 0.0
133 if (x_exp <= 0x2ffa000000000000ull) { // 0x2ffa000000000000ull == -35
134 res.w[1] = x_sign | 0x3040000000000000ull;
135 res.w[0] = 0x0000000000000000ull;
136 *pfpsf |= INEXACT_EXCEPTION;
137 BID_RETURN (res);
139 break;
140 case ROUNDING_DOWN:
141 // if (exp <= -p) return -1.0 or +0.0
142 if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffa000000000000ull == -34
143 if (x_sign) {
144 // if negative, return negative 1, because we know coefficient
145 // is non-zero (would have been caught above)
146 res.w[1] = 0xb040000000000000ull;
147 res.w[0] = 0x0000000000000001ull;
148 } else {
149 // if positive, return positive 0, because we know coefficient is
150 // non-zero (would have been caught above)
151 res.w[1] = 0x3040000000000000ull;
152 res.w[0] = 0x0000000000000000ull;
154 *pfpsf |= INEXACT_EXCEPTION;
155 BID_RETURN (res);
157 break;
158 case ROUNDING_UP:
159 // if (exp <= -p) return -0.0 or +1.0
160 if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34
161 if (x_sign) {
162 // if negative, return negative 0, because we know the coefficient
163 // is non-zero (would have been caught above)
164 res.w[1] = 0xb040000000000000ull;
165 res.w[0] = 0x0000000000000000ull;
166 } else {
167 // if positive, return positive 1, because we know coefficient is
168 // non-zero (would have been caught above)
169 res.w[1] = 0x3040000000000000ull;
170 res.w[0] = 0x0000000000000001ull;
172 *pfpsf |= INEXACT_EXCEPTION;
173 BID_RETURN (res);
175 break;
176 case ROUNDING_TO_ZERO:
177 // if (exp <= -p) return -0.0 or +0.0
178 if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34
179 res.w[1] = x_sign | 0x3040000000000000ull;
180 res.w[0] = 0x0000000000000000ull;
181 *pfpsf |= INEXACT_EXCEPTION;
182 BID_RETURN (res);
184 break;
187 // q = nr. of decimal digits in x
188 // determine first the nr. of bits in x
189 if (C1.w[1] == 0) {
190 if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53
191 // split the 64-bit value in two 32-bit halves to avoid rounding errors
192 if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32
193 tmp1.d = (double) (C1.w[0] >> 32); // exact conversion
194 x_nr_bits =
195 33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
196 } else { // x < 2^32
197 tmp1.d = (double) (C1.w[0]); // exact conversion
198 x_nr_bits =
199 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
201 } else { // if x < 2^53
202 tmp1.d = (double) C1.w[0]; // exact conversion
203 x_nr_bits =
204 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
206 } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
207 tmp1.d = (double) C1.w[1]; // exact conversion
208 x_nr_bits =
209 65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
212 q = nr_digits[x_nr_bits - 1].digits;
213 if (q == 0) {
214 q = nr_digits[x_nr_bits - 1].digits1;
215 if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
216 (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
217 C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
218 q++;
220 exp = (x_exp >> 49) - 6176;
221 if (exp >= 0) { // -exp <= 0
222 // the argument is an integer already
223 res.w[1] = x.w[1];
224 res.w[0] = x.w[0];
225 BID_RETURN (res);
227 // exp < 0
228 switch (rnd_mode) {
229 case ROUNDING_TO_NEAREST:
230 if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q
231 // need to shift right -exp digits from the coefficient; exp will be 0
232 ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x'
233 // chop off ind digits from the lower part of C1
234 // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits
235 tmp64 = C1.w[0];
236 if (ind <= 19) {
237 C1.w[0] = C1.w[0] + midpoint64[ind - 1];
238 } else {
239 C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
240 C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
242 if (C1.w[0] < tmp64)
243 C1.w[1]++;
244 // calculate C* and f*
245 // C* is actually floor(C*) in this case
246 // C* and f* need shifting and masking, as shown by
247 // shiftright128[] and maskhigh128[]
248 // 1 <= x <= 34
249 // kx = 10^(-x) = ten2mk128[ind - 1]
250 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
251 // the approximation of 10^(-x) was rounded up to 118 bits
252 __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
253 // determine the value of res and fstar
255 // determine inexactness of the rounding of C*
256 // if (0 < f* - 1/2 < 10^(-x)) then
257 // the result is exact
258 // else // if (f* - 1/2 > T*) then
259 // the result is inexact
260 // Note: we are going to use ten2mk128[] instead of ten2mk128trunc[]
262 if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
263 // redundant shift = shiftright128[ind - 1]; // shift = 0
264 res.w[1] = P256.w[3];
265 res.w[0] = P256.w[2];
266 // redundant fstar.w[3] = 0;
267 // redundant fstar.w[2] = 0;
268 fstar.w[1] = P256.w[1];
269 fstar.w[0] = P256.w[0];
270 // fraction f* < 10^(-x) <=> midpoint
271 // f* is in the right position to be compared with
272 // 10^(-x) from ten2mk128[]
273 // if 0 < fstar < 10^(-x), subtract 1 if odd (for rounding to even)
274 if ((res.w[0] & 0x0000000000000001ull) && // is result odd?
275 ((fstar.w[1] < (ten2mk128[ind - 1].w[1]))
276 || ((fstar.w[1] == ten2mk128[ind - 1].w[1])
277 && (fstar.w[0] < ten2mk128[ind - 1].w[0])))) {
278 // subract 1 to make even
279 if (res.w[0]-- == 0) {
280 res.w[1]--;
283 if (fstar.w[1] > 0x8000000000000000ull ||
284 (fstar.w[1] == 0x8000000000000000ull
285 && fstar.w[0] > 0x0ull)) {
286 // f* > 1/2 and the result may be exact
287 tmp64 = fstar.w[1] - 0x8000000000000000ull; // f* - 1/2
288 if (tmp64 > ten2mk128[ind - 1].w[1] ||
289 (tmp64 == ten2mk128[ind - 1].w[1] &&
290 fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
291 // set the inexact flag
292 *pfpsf |= INEXACT_EXCEPTION;
293 } // else the result is exact
294 } else { // the result is inexact; f2* <= 1/2
295 // set the inexact flag
296 *pfpsf |= INEXACT_EXCEPTION;
298 } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
299 shift = shiftright128[ind - 1]; // 3 <= shift <= 63
300 res.w[1] = (P256.w[3] >> shift);
301 res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
302 // redundant fstar.w[3] = 0;
303 fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
304 fstar.w[1] = P256.w[1];
305 fstar.w[0] = P256.w[0];
306 // fraction f* < 10^(-x) <=> midpoint
307 // f* is in the right position to be compared with
308 // 10^(-x) from ten2mk128[]
309 if ((res.w[0] & 0x0000000000000001ull) && // is result odd?
310 fstar.w[2] == 0 && (fstar.w[1] < ten2mk128[ind - 1].w[1] ||
311 (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
312 fstar.w[0] < ten2mk128[ind - 1].w[0]))) {
313 // subract 1 to make even
314 if (res.w[0]-- == 0) {
315 res.w[1]--;
318 if (fstar.w[2] > onehalf128[ind - 1] ||
319 (fstar.w[2] == onehalf128[ind - 1]
320 && (fstar.w[1] || fstar.w[0]))) {
321 // f2* > 1/2 and the result may be exact
322 // Calculate f2* - 1/2
323 tmp64 = fstar.w[2] - onehalf128[ind - 1];
324 if (tmp64 || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
325 (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
326 fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
327 // set the inexact flag
328 *pfpsf |= INEXACT_EXCEPTION;
329 } // else the result is exact
330 } else { // the result is inexact; f2* <= 1/2
331 // set the inexact flag
332 *pfpsf |= INEXACT_EXCEPTION;
334 } else { // 22 <= ind - 1 <= 33
335 shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38
336 res.w[1] = 0;
337 res.w[0] = P256.w[3] >> shift;
338 fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
339 fstar.w[2] = P256.w[2];
340 fstar.w[1] = P256.w[1];
341 fstar.w[0] = P256.w[0];
342 // fraction f* < 10^(-x) <=> midpoint
343 // f* is in the right position to be compared with
344 // 10^(-x) from ten2mk128[]
345 if ((res.w[0] & 0x0000000000000001ull) && // is result odd?
346 fstar.w[3] == 0 && fstar.w[2] == 0 &&
347 (fstar.w[1] < ten2mk128[ind - 1].w[1] ||
348 (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
349 fstar.w[0] < ten2mk128[ind - 1].w[0]))) {
350 // subract 1 to make even
351 if (res.w[0]-- == 0) {
352 res.w[1]--;
355 if (fstar.w[3] > onehalf128[ind - 1] ||
356 (fstar.w[3] == onehalf128[ind - 1] &&
357 (fstar.w[2] || fstar.w[1] || fstar.w[0]))) {
358 // f2* > 1/2 and the result may be exact
359 // Calculate f2* - 1/2
360 tmp64 = fstar.w[3] - onehalf128[ind - 1];
361 if (tmp64 || fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1]
362 || (fstar.w[1] == ten2mk128[ind - 1].w[1]
363 && fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
364 // set the inexact flag
365 *pfpsf |= INEXACT_EXCEPTION;
366 } // else the result is exact
367 } else { // the result is inexact; f2* <= 1/2
368 // set the inexact flag
369 *pfpsf |= INEXACT_EXCEPTION;
372 res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
373 BID_RETURN (res);
374 } else { // if ((q + exp) < 0) <=> q < -exp
375 // the result is +0 or -0
376 res.w[1] = x_sign | 0x3040000000000000ull;
377 res.w[0] = 0x0000000000000000ull;
378 *pfpsf |= INEXACT_EXCEPTION;
379 BID_RETURN (res);
381 break;
382 case ROUNDING_TIES_AWAY:
383 if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q
384 // need to shift right -exp digits from the coefficient; exp will be 0
385 ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x'
386 // chop off ind digits from the lower part of C1
387 // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits
388 tmp64 = C1.w[0];
389 if (ind <= 19) {
390 C1.w[0] = C1.w[0] + midpoint64[ind - 1];
391 } else {
392 C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
393 C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
395 if (C1.w[0] < tmp64)
396 C1.w[1]++;
397 // calculate C* and f*
398 // C* is actually floor(C*) in this case
399 // C* and f* need shifting and masking, as shown by
400 // shiftright128[] and maskhigh128[]
401 // 1 <= x <= 34
402 // kx = 10^(-x) = ten2mk128[ind - 1]
403 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
404 // the approximation of 10^(-x) was rounded up to 118 bits
405 __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
406 // the top Ex bits of 10^(-x) are T* = ten2mk128trunc[ind], e.g.
407 // if x=1, T*=ten2mk128trunc[0]=0x19999999999999999999999999999999
408 // if (0 < f* < 10^(-x)) then the result is a midpoint
409 // if floor(C*) is even then C* = floor(C*) - logical right
410 // shift; C* has p decimal digits, correct by Prop. 1)
411 // else if floor(C*) is odd C* = floor(C*)-1 (logical right
412 // shift; C* has p decimal digits, correct by Pr. 1)
413 // else
414 // C* = floor(C*) (logical right shift; C has p decimal digits,
415 // correct by Property 1)
416 // n = C* * 10^(e+x)
418 // determine also the inexactness of the rounding of C*
419 // if (0 < f* - 1/2 < 10^(-x)) then
420 // the result is exact
421 // else // if (f* - 1/2 > T*) then
422 // the result is inexact
423 // Note: we are going to use ten2mk128[] instead of ten2mk128trunc[]
424 // shift right C* by Ex-128 = shiftright128[ind]
425 if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
426 // redundant shift = shiftright128[ind - 1]; // shift = 0
427 res.w[1] = P256.w[3];
428 res.w[0] = P256.w[2];
429 // redundant fstar.w[3] = 0;
430 // redundant fstar.w[2] = 0;
431 fstar.w[1] = P256.w[1];
432 fstar.w[0] = P256.w[0];
433 if (fstar.w[1] > 0x8000000000000000ull ||
434 (fstar.w[1] == 0x8000000000000000ull
435 && fstar.w[0] > 0x0ull)) {
436 // f* > 1/2 and the result may be exact
437 tmp64 = fstar.w[1] - 0x8000000000000000ull; // f* - 1/2
438 if ((tmp64 > ten2mk128[ind - 1].w[1] ||
439 (tmp64 == ten2mk128[ind - 1].w[1] &&
440 fstar.w[0] >= ten2mk128[ind - 1].w[0]))) {
441 // set the inexact flag
442 *pfpsf |= INEXACT_EXCEPTION;
443 } // else the result is exact
444 } else { // the result is inexact; f2* <= 1/2
445 // set the inexact flag
446 *pfpsf |= INEXACT_EXCEPTION;
448 } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
449 shift = shiftright128[ind - 1]; // 3 <= shift <= 63
450 res.w[1] = (P256.w[3] >> shift);
451 res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
452 // redundant fstar.w[3] = 0;
453 fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
454 fstar.w[1] = P256.w[1];
455 fstar.w[0] = P256.w[0];
456 if (fstar.w[2] > onehalf128[ind - 1] ||
457 (fstar.w[2] == onehalf128[ind - 1]
458 && (fstar.w[1] || fstar.w[0]))) {
459 // f2* > 1/2 and the result may be exact
460 // Calculate f2* - 1/2
461 tmp64 = fstar.w[2] - onehalf128[ind - 1];
462 if (tmp64 || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
463 (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
464 fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
465 // set the inexact flag
466 *pfpsf |= INEXACT_EXCEPTION;
467 } // else the result is exact
468 } else { // the result is inexact; f2* <= 1/2
469 // set the inexact flag
470 *pfpsf |= INEXACT_EXCEPTION;
472 } else { // 22 <= ind - 1 <= 33
473 shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38
474 res.w[1] = 0;
475 res.w[0] = P256.w[3] >> shift;
476 fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
477 fstar.w[2] = P256.w[2];
478 fstar.w[1] = P256.w[1];
479 fstar.w[0] = P256.w[0];
480 if (fstar.w[3] > onehalf128[ind - 1] ||
481 (fstar.w[3] == onehalf128[ind - 1] &&
482 (fstar.w[2] || fstar.w[1] || fstar.w[0]))) {
483 // f2* > 1/2 and the result may be exact
484 // Calculate f2* - 1/2
485 tmp64 = fstar.w[3] - onehalf128[ind - 1];
486 if (tmp64 || fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1]
487 || (fstar.w[1] == ten2mk128[ind - 1].w[1]
488 && fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
489 // set the inexact flag
490 *pfpsf |= INEXACT_EXCEPTION;
491 } // else the result is exact
492 } else { // the result is inexact; f2* <= 1/2
493 // set the inexact flag
494 *pfpsf |= INEXACT_EXCEPTION;
497 // if the result was a midpoint, it was already rounded away from zero
498 res.w[1] |= x_sign | 0x3040000000000000ull;
499 BID_RETURN (res);
500 } else { // if ((q + exp) < 0) <=> q < -exp
501 // the result is +0 or -0
502 res.w[1] = x_sign | 0x3040000000000000ull;
503 res.w[0] = 0x0000000000000000ull;
504 *pfpsf |= INEXACT_EXCEPTION;
505 BID_RETURN (res);
507 break;
508 case ROUNDING_DOWN:
509 if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q
510 // need to shift right -exp digits from the coefficient; exp will be 0
511 ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x'
512 // (number of digits to be chopped off)
513 // chop off ind digits from the lower part of C1
514 // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
515 // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
516 // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
517 // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
518 // tmp64 = C1.w[0];
519 // if (ind <= 19) {
520 // C1.w[0] = C1.w[0] + midpoint64[ind - 1];
521 // } else {
522 // C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
523 // C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
524 // }
525 // if (C1.w[0] < tmp64) C1.w[1]++;
526 // if carry-out from C1.w[0], increment C1.w[1]
527 // calculate C* and f*
528 // C* is actually floor(C*) in this case
529 // C* and f* need shifting and masking, as shown by
530 // shiftright128[] and maskhigh128[]
531 // 1 <= x <= 34
532 // kx = 10^(-x) = ten2mk128[ind - 1]
533 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
534 // the approximation of 10^(-x) was rounded up to 118 bits
535 __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
536 if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
537 res.w[1] = P256.w[3];
538 res.w[0] = P256.w[2];
539 // redundant fstar.w[3] = 0;
540 // redundant fstar.w[2] = 0;
541 // redundant fstar.w[1] = P256.w[1];
542 // redundant fstar.w[0] = P256.w[0];
543 // fraction f* > 10^(-x) <=> inexact
544 // f* is in the right position to be compared with
545 // 10^(-x) from ten2mk128[]
546 if ((P256.w[1] > ten2mk128[ind - 1].w[1])
547 || (P256.w[1] == ten2mk128[ind - 1].w[1]
548 && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) {
549 *pfpsf |= INEXACT_EXCEPTION;
550 // if positive, the truncated value is already the correct result
551 if (x_sign) { // if negative
552 if (++res.w[0] == 0) {
553 res.w[1]++;
557 } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
558 shift = shiftright128[ind - 1]; // 0 <= shift <= 102
559 res.w[1] = (P256.w[3] >> shift);
560 res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
561 // redundant fstar.w[3] = 0;
562 fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
563 fstar.w[1] = P256.w[1];
564 fstar.w[0] = P256.w[0];
565 // fraction f* > 10^(-x) <=> inexact
566 // f* is in the right position to be compared with
567 // 10^(-x) from ten2mk128[]
568 if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
569 (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
570 fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
571 *pfpsf |= INEXACT_EXCEPTION;
572 // if positive, the truncated value is already the correct result
573 if (x_sign) { // if negative
574 if (++res.w[0] == 0) {
575 res.w[1]++;
579 } else { // 22 <= ind - 1 <= 33
580 shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38
581 res.w[1] = 0;
582 res.w[0] = P256.w[3] >> shift;
583 fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
584 fstar.w[2] = P256.w[2];
585 fstar.w[1] = P256.w[1];
586 fstar.w[0] = P256.w[0];
587 // fraction f* > 10^(-x) <=> inexact
588 // f* is in the right position to be compared with
589 // 10^(-x) from ten2mk128[]
590 if (fstar.w[3] || fstar.w[2]
591 || fstar.w[1] > ten2mk128[ind - 1].w[1]
592 || (fstar.w[1] == ten2mk128[ind - 1].w[1]
593 && fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
594 *pfpsf |= INEXACT_EXCEPTION;
595 // if positive, the truncated value is already the correct result
596 if (x_sign) { // if negative
597 if (++res.w[0] == 0) {
598 res.w[1]++;
603 res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
604 BID_RETURN (res);
605 } else { // if exp < 0 and q + exp <= 0
606 if (x_sign) { // negative rounds down to -1.0
607 res.w[1] = 0xb040000000000000ull;
608 res.w[0] = 0x0000000000000001ull;
609 } else { // positive rpunds down to +0.0
610 res.w[1] = 0x3040000000000000ull;
611 res.w[0] = 0x0000000000000000ull;
613 *pfpsf |= INEXACT_EXCEPTION;
614 BID_RETURN (res);
616 break;
617 case ROUNDING_UP:
618 if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q
619 // need to shift right -exp digits from the coefficient; exp will be 0
620 ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x'
621 // (number of digits to be chopped off)
622 // chop off ind digits from the lower part of C1
623 // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
624 // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
625 // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
626 // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
627 // tmp64 = C1.w[0];
628 // if (ind <= 19) {
629 // C1.w[0] = C1.w[0] + midpoint64[ind - 1];
630 // } else {
631 // C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
632 // C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
633 // }
634 // if (C1.w[0] < tmp64) C1.w[1]++;
635 // if carry-out from C1.w[0], increment C1.w[1]
636 // calculate C* and f*
637 // C* is actually floor(C*) in this case
638 // C* and f* need shifting and masking, as shown by
639 // shiftright128[] and maskhigh128[]
640 // 1 <= x <= 34
641 // kx = 10^(-x) = ten2mk128[ind - 1]
642 // C* = C1 * 10^(-x)
643 // the approximation of 10^(-x) was rounded up to 118 bits
644 __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
645 if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
646 res.w[1] = P256.w[3];
647 res.w[0] = P256.w[2];
648 // redundant fstar.w[3] = 0;
649 // redundant fstar.w[2] = 0;
650 // redundant fstar.w[1] = P256.w[1];
651 // redundant fstar.w[0] = P256.w[0];
652 // fraction f* > 10^(-x) <=> inexact
653 // f* is in the right position to be compared with
654 // 10^(-x) from ten2mk128[]
655 if ((P256.w[1] > ten2mk128[ind - 1].w[1])
656 || (P256.w[1] == ten2mk128[ind - 1].w[1]
657 && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) {
658 *pfpsf |= INEXACT_EXCEPTION;
659 // if negative, the truncated value is already the correct result
660 if (!x_sign) { // if positive
661 if (++res.w[0] == 0) {
662 res.w[1]++;
666 } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
667 shift = shiftright128[ind - 1]; // 3 <= shift <= 63
668 res.w[1] = (P256.w[3] >> shift);
669 res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
670 // redundant fstar.w[3] = 0;
671 fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
672 fstar.w[1] = P256.w[1];
673 fstar.w[0] = P256.w[0];
674 // fraction f* > 10^(-x) <=> inexact
675 // f* is in the right position to be compared with
676 // 10^(-x) from ten2mk128[]
677 if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
678 (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
679 fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
680 *pfpsf |= INEXACT_EXCEPTION;
681 // if negative, the truncated value is already the correct result
682 if (!x_sign) { // if positive
683 if (++res.w[0] == 0) {
684 res.w[1]++;
688 } else { // 22 <= ind - 1 <= 33
689 shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38
690 res.w[1] = 0;
691 res.w[0] = P256.w[3] >> shift;
692 fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
693 fstar.w[2] = P256.w[2];
694 fstar.w[1] = P256.w[1];
695 fstar.w[0] = P256.w[0];
696 // fraction f* > 10^(-x) <=> inexact
697 // f* is in the right position to be compared with
698 // 10^(-x) from ten2mk128[]
699 if (fstar.w[3] || fstar.w[2]
700 || fstar.w[1] > ten2mk128[ind - 1].w[1]
701 || (fstar.w[1] == ten2mk128[ind - 1].w[1]
702 && fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
703 *pfpsf |= INEXACT_EXCEPTION;
704 // if negative, the truncated value is already the correct result
705 if (!x_sign) { // if positive
706 if (++res.w[0] == 0) {
707 res.w[1]++;
712 res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
713 BID_RETURN (res);
714 } else { // if exp < 0 and q + exp <= 0
715 if (x_sign) { // negative rounds up to -0.0
716 res.w[1] = 0xb040000000000000ull;
717 res.w[0] = 0x0000000000000000ull;
718 } else { // positive rpunds up to +1.0
719 res.w[1] = 0x3040000000000000ull;
720 res.w[0] = 0x0000000000000001ull;
722 *pfpsf |= INEXACT_EXCEPTION;
723 BID_RETURN (res);
725 break;
726 case ROUNDING_TO_ZERO:
727 if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q
728 // need to shift right -exp digits from the coefficient; exp will be 0
729 ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x'
730 // (number of digits to be chopped off)
731 // chop off ind digits from the lower part of C1
732 // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
733 // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
734 // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
735 // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
736 //tmp64 = C1.w[0];
737 // if (ind <= 19) {
738 // C1.w[0] = C1.w[0] + midpoint64[ind - 1];
739 // } else {
740 // C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
741 // C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
742 // }
743 // if (C1.w[0] < tmp64) C1.w[1]++;
744 // if carry-out from C1.w[0], increment C1.w[1]
745 // calculate C* and f*
746 // C* is actually floor(C*) in this case
747 // C* and f* need shifting and masking, as shown by
748 // shiftright128[] and maskhigh128[]
749 // 1 <= x <= 34
750 // kx = 10^(-x) = ten2mk128[ind - 1]
751 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
752 // the approximation of 10^(-x) was rounded up to 118 bits
753 __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
754 if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
755 res.w[1] = P256.w[3];
756 res.w[0] = P256.w[2];
757 // redundant fstar.w[3] = 0;
758 // redundant fstar.w[2] = 0;
759 // redundant fstar.w[1] = P256.w[1];
760 // redundant fstar.w[0] = P256.w[0];
761 // fraction f* > 10^(-x) <=> inexact
762 // f* is in the right position to be compared with
763 // 10^(-x) from ten2mk128[]
764 if ((P256.w[1] > ten2mk128[ind - 1].w[1])
765 || (P256.w[1] == ten2mk128[ind - 1].w[1]
766 && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) {
767 *pfpsf |= INEXACT_EXCEPTION;
769 } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
770 shift = shiftright128[ind - 1]; // 3 <= shift <= 63
771 res.w[1] = (P256.w[3] >> shift);
772 res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
773 // redundant fstar.w[3] = 0;
774 fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
775 fstar.w[1] = P256.w[1];
776 fstar.w[0] = P256.w[0];
777 // fraction f* > 10^(-x) <=> inexact
778 // f* is in the right position to be compared with
779 // 10^(-x) from ten2mk128[]
780 if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
781 (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
782 fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
783 *pfpsf |= INEXACT_EXCEPTION;
785 } else { // 22 <= ind - 1 <= 33
786 shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38
787 res.w[1] = 0;
788 res.w[0] = P256.w[3] >> shift;
789 fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
790 fstar.w[2] = P256.w[2];
791 fstar.w[1] = P256.w[1];
792 fstar.w[0] = P256.w[0];
793 // fraction f* > 10^(-x) <=> inexact
794 // f* is in the right position to be compared with
795 // 10^(-x) from ten2mk128[]
796 if (fstar.w[3] || fstar.w[2]
797 || fstar.w[1] > ten2mk128[ind - 1].w[1]
798 || (fstar.w[1] == ten2mk128[ind - 1].w[1]
799 && fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
800 *pfpsf |= INEXACT_EXCEPTION;
803 res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
804 BID_RETURN (res);
805 } else { // if exp < 0 and q + exp <= 0 the result is +0 or -0
806 res.w[1] = x_sign | 0x3040000000000000ull;
807 res.w[0] = 0x0000000000000000ull;
808 *pfpsf |= INEXACT_EXCEPTION;
809 BID_RETURN (res);
811 break;
814 BID_RETURN (res);
817 /*****************************************************************************
818 * BID128_round_integral_nearest_even
819 ****************************************************************************/
821 BID128_FUNCTION_ARG1_NORND (bid128_round_integral_nearest_even, x)
823 UINT128 res;
824 UINT64 x_sign;
825 UINT64 x_exp;
826 int exp; // unbiased exponent
827 // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
828 UINT64 tmp64;
829 BID_UI64DOUBLE tmp1;
830 unsigned int x_nr_bits;
831 int q, ind, shift;
832 UINT128 C1;
833 // UINT128 res is C* at first - represents up to 34 decimal digits ~ 113 bits
834 UINT256 fstar;
835 UINT256 P256;
837 // check for NaN or Infinity
838 if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
839 // x is special
840 if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
841 // if x = NaN, then res = Q (x)
842 // check first for non-canonical NaN payload
843 if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
844 (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
845 (x.w[0] > 0x38c15b09ffffffffull))) {
846 x.w[1] = x.w[1] & 0xffffc00000000000ull;
847 x.w[0] = 0x0ull;
849 if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN
850 // set invalid flag
851 *pfpsf |= INVALID_EXCEPTION;
852 // return quiet (x)
853 res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
854 res.w[0] = x.w[0];
855 } else { // x is QNaN
856 // return x
857 res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
858 res.w[0] = x.w[0];
860 BID_RETURN (res)
861 } else { // x is not a NaN, so it must be infinity
862 if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf
863 // return +inf
864 res.w[1] = 0x7800000000000000ull;
865 res.w[0] = 0x0000000000000000ull;
866 } else { // x is -inf
867 // return -inf
868 res.w[1] = 0xf800000000000000ull;
869 res.w[0] = 0x0000000000000000ull;
871 BID_RETURN (res);
874 // unpack x
875 x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
876 C1.w[1] = x.w[1] & MASK_COEFF;
877 C1.w[0] = x.w[0];
879 // check for non-canonical values (treated as zero)
880 if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
881 // non-canonical
882 x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
883 C1.w[1] = 0; // significand high
884 C1.w[0] = 0; // significand low
885 } else { // G0_G1 != 11
886 x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits
887 if (C1.w[1] > 0x0001ed09bead87c0ull ||
888 (C1.w[1] == 0x0001ed09bead87c0ull
889 && C1.w[0] > 0x378d8e63ffffffffull)) {
890 // x is non-canonical if coefficient is larger than 10^34 -1
891 C1.w[1] = 0;
892 C1.w[0] = 0;
893 } else { // canonical
898 // test for input equal to zero
899 if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
900 // x is 0
901 // return 0 preserving the sign bit and the preferred exponent
902 // of MAX(Q(x), 0)
903 if (x_exp <= (0x1820ull << 49)) {
904 res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
905 } else {
906 res.w[1] = x_sign | x_exp;
908 res.w[0] = 0x0000000000000000ull;
909 BID_RETURN (res);
911 // x is not special and is not zero
913 // if (exp <= -(p+1)) return 0
914 if (x_exp <= 0x2ffa000000000000ull) { // 0x2ffa000000000000ull == -35
915 res.w[1] = x_sign | 0x3040000000000000ull;
916 res.w[0] = 0x0000000000000000ull;
917 BID_RETURN (res);
919 // q = nr. of decimal digits in x
920 // determine first the nr. of bits in x
921 if (C1.w[1] == 0) {
922 if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53
923 // split the 64-bit value in two 32-bit halves to avoid rounding errors
924 if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32
925 tmp1.d = (double) (C1.w[0] >> 32); // exact conversion
926 x_nr_bits =
927 33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
928 } else { // x < 2^32
929 tmp1.d = (double) (C1.w[0]); // exact conversion
930 x_nr_bits =
931 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
933 } else { // if x < 2^53
934 tmp1.d = (double) C1.w[0]; // exact conversion
935 x_nr_bits =
936 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
938 } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
939 tmp1.d = (double) C1.w[1]; // exact conversion
940 x_nr_bits =
941 65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
944 q = nr_digits[x_nr_bits - 1].digits;
945 if (q == 0) {
946 q = nr_digits[x_nr_bits - 1].digits1;
947 if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi
948 || (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
949 C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
950 q++;
952 exp = (x_exp >> 49) - 6176;
953 if (exp >= 0) { // -exp <= 0
954 // the argument is an integer already
955 res.w[1] = x.w[1];
956 res.w[0] = x.w[0];
957 BID_RETURN (res);
958 } else if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q
959 // need to shift right -exp digits from the coefficient; the exp will be 0
960 ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x'
961 // chop off ind digits from the lower part of C1
962 // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits
963 tmp64 = C1.w[0];
964 if (ind <= 19) {
965 C1.w[0] = C1.w[0] + midpoint64[ind - 1];
966 } else {
967 C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
968 C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
970 if (C1.w[0] < tmp64)
971 C1.w[1]++;
972 // calculate C* and f*
973 // C* is actually floor(C*) in this case
974 // C* and f* need shifting and masking, as shown by
975 // shiftright128[] and maskhigh128[]
976 // 1 <= x <= 34
977 // kx = 10^(-x) = ten2mk128[ind - 1]
978 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
979 // the approximation of 10^(-x) was rounded up to 118 bits
980 __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
981 // determine the value of res and fstar
982 if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
983 // redundant shift = shiftright128[ind - 1]; // shift = 0
984 res.w[1] = P256.w[3];
985 res.w[0] = P256.w[2];
986 // redundant fstar.w[3] = 0;
987 // redundant fstar.w[2] = 0;
988 // redundant fstar.w[1] = P256.w[1];
989 // redundant fstar.w[0] = P256.w[0];
990 // fraction f* < 10^(-x) <=> midpoint
991 // f* is in the right position to be compared with
992 // 10^(-x) from ten2mk128[]
993 // if 0 < fstar < 10^(-x), subtract 1 if odd (for rounding to even)
994 if ((res.w[0] & 0x0000000000000001ull) && // is result odd?
995 ((P256.w[1] < (ten2mk128[ind - 1].w[1]))
996 || ((P256.w[1] == ten2mk128[ind - 1].w[1])
997 && (P256.w[0] < ten2mk128[ind - 1].w[0])))) {
998 // subract 1 to make even
999 if (res.w[0]-- == 0) {
1000 res.w[1]--;
1003 } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
1004 shift = shiftright128[ind - 1]; // 3 <= shift <= 63
1005 res.w[1] = (P256.w[3] >> shift);
1006 res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
1007 // redundant fstar.w[3] = 0;
1008 fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
1009 fstar.w[1] = P256.w[1];
1010 fstar.w[0] = P256.w[0];
1011 // fraction f* < 10^(-x) <=> midpoint
1012 // f* is in the right position to be compared with
1013 // 10^(-x) from ten2mk128[]
1014 if ((res.w[0] & 0x0000000000000001ull) && // is result odd?
1015 fstar.w[2] == 0 && (fstar.w[1] < ten2mk128[ind - 1].w[1] ||
1016 (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
1017 fstar.w[0] < ten2mk128[ind - 1].w[0]))) {
1018 // subract 1 to make even
1019 if (res.w[0]-- == 0) {
1020 res.w[1]--;
1023 } else { // 22 <= ind - 1 <= 33
1024 shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38
1025 res.w[1] = 0;
1026 res.w[0] = P256.w[3] >> shift;
1027 fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
1028 fstar.w[2] = P256.w[2];
1029 fstar.w[1] = P256.w[1];
1030 fstar.w[0] = P256.w[0];
1031 // fraction f* < 10^(-x) <=> midpoint
1032 // f* is in the right position to be compared with
1033 // 10^(-x) from ten2mk128[]
1034 if ((res.w[0] & 0x0000000000000001ull) && // is result odd?
1035 fstar.w[3] == 0 && fstar.w[2] == 0
1036 && (fstar.w[1] < ten2mk128[ind - 1].w[1]
1037 || (fstar.w[1] == ten2mk128[ind - 1].w[1]
1038 && fstar.w[0] < ten2mk128[ind - 1].w[0]))) {
1039 // subract 1 to make even
1040 if (res.w[0]-- == 0) {
1041 res.w[1]--;
1045 res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
1046 BID_RETURN (res);
1047 } else { // if ((q + exp) < 0) <=> q < -exp
1048 // the result is +0 or -0
1049 res.w[1] = x_sign | 0x3040000000000000ull;
1050 res.w[0] = 0x0000000000000000ull;
1051 BID_RETURN (res);
1055 /*****************************************************************************
1056 * BID128_round_integral_negative
1057 ****************************************************************************/
1059 BID128_FUNCTION_ARG1_NORND (bid128_round_integral_negative, x)
1061 UINT128 res;
1062 UINT64 x_sign;
1063 UINT64 x_exp;
1064 int exp; // unbiased exponent
1065 // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo
1066 // (all are UINT64)
1067 BID_UI64DOUBLE tmp1;
1068 unsigned int x_nr_bits;
1069 int q, ind, shift;
1070 UINT128 C1;
1071 // UINT128 res is C* at first - represents up to 34 decimal digits ~
1072 // 113 bits
1073 UINT256 fstar;
1074 UINT256 P256;
1076 // check for NaN or Infinity
1077 if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
1078 // x is special
1079 if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
1080 // if x = NaN, then res = Q (x)
1081 // check first for non-canonical NaN payload
1082 if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
1083 (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
1084 (x.w[0] > 0x38c15b09ffffffffull))) {
1085 x.w[1] = x.w[1] & 0xffffc00000000000ull;
1086 x.w[0] = 0x0ull;
1088 if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN
1089 // set invalid flag
1090 *pfpsf |= INVALID_EXCEPTION;
1091 // return quiet (x)
1092 res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
1093 res.w[0] = x.w[0];
1094 } else { // x is QNaN
1095 // return x
1096 res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
1097 res.w[0] = x.w[0];
1099 BID_RETURN (res)
1100 } else { // x is not a NaN, so it must be infinity
1101 if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf
1102 // return +inf
1103 res.w[1] = 0x7800000000000000ull;
1104 res.w[0] = 0x0000000000000000ull;
1105 } else { // x is -inf
1106 // return -inf
1107 res.w[1] = 0xf800000000000000ull;
1108 res.w[0] = 0x0000000000000000ull;
1110 BID_RETURN (res);
1113 // unpack x
1114 x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
1115 C1.w[1] = x.w[1] & MASK_COEFF;
1116 C1.w[0] = x.w[0];
1118 // check for non-canonical values (treated as zero)
1119 if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
1120 // non-canonical
1121 x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
1122 C1.w[1] = 0; // significand high
1123 C1.w[0] = 0; // significand low
1124 } else { // G0_G1 != 11
1125 x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits
1126 if (C1.w[1] > 0x0001ed09bead87c0ull ||
1127 (C1.w[1] == 0x0001ed09bead87c0ull
1128 && C1.w[0] > 0x378d8e63ffffffffull)) {
1129 // x is non-canonical if coefficient is larger than 10^34 -1
1130 C1.w[1] = 0;
1131 C1.w[0] = 0;
1132 } else { // canonical
1137 // test for input equal to zero
1138 if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
1139 // x is 0
1140 // return 0 preserving the sign bit and the preferred exponent
1141 // of MAX(Q(x), 0)
1142 if (x_exp <= (0x1820ull << 49)) {
1143 res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
1144 } else {
1145 res.w[1] = x_sign | x_exp;
1147 res.w[0] = 0x0000000000000000ull;
1148 BID_RETURN (res);
1150 // x is not special and is not zero
1152 // if (exp <= -p) return -1.0 or +0.0
1153 if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34
1154 if (x_sign) {
1155 // if negative, return negative 1, because we know the coefficient
1156 // is non-zero (would have been caught above)
1157 res.w[1] = 0xb040000000000000ull;
1158 res.w[0] = 0x0000000000000001ull;
1159 } else {
1160 // if positive, return positive 0, because we know coefficient is
1161 // non-zero (would have been caught above)
1162 res.w[1] = 0x3040000000000000ull;
1163 res.w[0] = 0x0000000000000000ull;
1165 BID_RETURN (res);
1167 // q = nr. of decimal digits in x
1168 // determine first the nr. of bits in x
1169 if (C1.w[1] == 0) {
1170 if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53
1171 // split the 64-bit value in two 32-bit halves to avoid rounding errors
1172 if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32
1173 tmp1.d = (double) (C1.w[0] >> 32); // exact conversion
1174 x_nr_bits =
1175 33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
1176 } else { // x < 2^32
1177 tmp1.d = (double) (C1.w[0]); // exact conversion
1178 x_nr_bits =
1179 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
1181 } else { // if x < 2^53
1182 tmp1.d = (double) C1.w[0]; // exact conversion
1183 x_nr_bits =
1184 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
1186 } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
1187 tmp1.d = (double) C1.w[1]; // exact conversion
1188 x_nr_bits =
1189 65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
1192 q = nr_digits[x_nr_bits - 1].digits;
1193 if (q == 0) {
1194 q = nr_digits[x_nr_bits - 1].digits1;
1195 if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
1196 (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
1197 C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
1198 q++;
1200 exp = (x_exp >> 49) - 6176;
1201 if (exp >= 0) { // -exp <= 0
1202 // the argument is an integer already
1203 res.w[1] = x.w[1];
1204 res.w[0] = x.w[0];
1205 BID_RETURN (res);
1206 } else if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q
1207 // need to shift right -exp digits from the coefficient; the exp will be 0
1208 ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x'
1209 // (number of digits to be chopped off)
1210 // chop off ind digits from the lower part of C1
1211 // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
1212 // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
1213 // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
1214 // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
1215 //tmp64 = C1.w[0];
1216 // if (ind <= 19) {
1217 // C1.w[0] = C1.w[0] + midpoint64[ind - 1];
1218 // } else {
1219 // C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
1220 // C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
1221 // }
1222 // if (C1.w[0] < tmp64) C1.w[1]++;
1223 // if carry-out from C1.w[0], increment C1.w[1]
1224 // calculate C* and f*
1225 // C* is actually floor(C*) in this case
1226 // C* and f* need shifting and masking, as shown by
1227 // shiftright128[] and maskhigh128[]
1228 // 1 <= x <= 34
1229 // kx = 10^(-x) = ten2mk128[ind - 1]
1230 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
1231 // the approximation of 10^(-x) was rounded up to 118 bits
1232 __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
1233 if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
1234 res.w[1] = P256.w[3];
1235 res.w[0] = P256.w[2];
1236 // if positive, the truncated value is already the correct result
1237 if (x_sign) { // if negative
1238 // redundant fstar.w[3] = 0;
1239 // redundant fstar.w[2] = 0;
1240 // redundant fstar.w[1] = P256.w[1];
1241 // redundant fstar.w[0] = P256.w[0];
1242 // fraction f* > 10^(-x) <=> inexact
1243 // f* is in the right position to be compared with
1244 // 10^(-x) from ten2mk128[]
1245 if ((P256.w[1] > ten2mk128[ind - 1].w[1])
1246 || (P256.w[1] == ten2mk128[ind - 1].w[1]
1247 && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) {
1248 if (++res.w[0] == 0) {
1249 res.w[1]++;
1253 } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
1254 shift = shiftright128[ind - 1]; // 0 <= shift <= 102
1255 res.w[1] = (P256.w[3] >> shift);
1256 res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
1257 // if positive, the truncated value is already the correct result
1258 if (x_sign) { // if negative
1259 // redundant fstar.w[3] = 0;
1260 fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
1261 fstar.w[1] = P256.w[1];
1262 fstar.w[0] = P256.w[0];
1263 // fraction f* > 10^(-x) <=> inexact
1264 // f* is in the right position to be compared with
1265 // 10^(-x) from ten2mk128[]
1266 if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
1267 (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
1268 fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
1269 if (++res.w[0] == 0) {
1270 res.w[1]++;
1274 } else { // 22 <= ind - 1 <= 33
1275 shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38
1276 res.w[1] = 0;
1277 res.w[0] = P256.w[3] >> shift;
1278 // if positive, the truncated value is already the correct result
1279 if (x_sign) { // if negative
1280 fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
1281 fstar.w[2] = P256.w[2];
1282 fstar.w[1] = P256.w[1];
1283 fstar.w[0] = P256.w[0];
1284 // fraction f* > 10^(-x) <=> inexact
1285 // f* is in the right position to be compared with
1286 // 10^(-x) from ten2mk128[]
1287 if (fstar.w[3] || fstar.w[2]
1288 || fstar.w[1] > ten2mk128[ind - 1].w[1]
1289 || (fstar.w[1] == ten2mk128[ind - 1].w[1]
1290 && fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
1291 if (++res.w[0] == 0) {
1292 res.w[1]++;
1297 res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
1298 BID_RETURN (res);
1299 } else { // if exp < 0 and q + exp <= 0
1300 if (x_sign) { // negative rounds down to -1.0
1301 res.w[1] = 0xb040000000000000ull;
1302 res.w[0] = 0x0000000000000001ull;
1303 } else { // positive rpunds down to +0.0
1304 res.w[1] = 0x3040000000000000ull;
1305 res.w[0] = 0x0000000000000000ull;
1307 BID_RETURN (res);
1311 /*****************************************************************************
1312 * BID128_round_integral_positive
1313 ****************************************************************************/
1315 BID128_FUNCTION_ARG1_NORND (bid128_round_integral_positive, x)
1317 UINT128 res;
1318 UINT64 x_sign;
1319 UINT64 x_exp;
1320 int exp; // unbiased exponent
1321 // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo
1322 // (all are UINT64)
1323 BID_UI64DOUBLE tmp1;
1324 unsigned int x_nr_bits;
1325 int q, ind, shift;
1326 UINT128 C1;
1327 // UINT128 res is C* at first - represents up to 34 decimal digits ~
1328 // 113 bits
1329 UINT256 fstar;
1330 UINT256 P256;
1332 // check for NaN or Infinity
1333 if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
1334 // x is special
1335 if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
1336 // if x = NaN, then res = Q (x)
1337 // check first for non-canonical NaN payload
1338 if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
1339 (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
1340 (x.w[0] > 0x38c15b09ffffffffull))) {
1341 x.w[1] = x.w[1] & 0xffffc00000000000ull;
1342 x.w[0] = 0x0ull;
1344 if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN
1345 // set invalid flag
1346 *pfpsf |= INVALID_EXCEPTION;
1347 // return quiet (x)
1348 res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
1349 res.w[0] = x.w[0];
1350 } else { // x is QNaN
1351 // return x
1352 res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
1353 res.w[0] = x.w[0];
1355 BID_RETURN (res)
1356 } else { // x is not a NaN, so it must be infinity
1357 if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf
1358 // return +inf
1359 res.w[1] = 0x7800000000000000ull;
1360 res.w[0] = 0x0000000000000000ull;
1361 } else { // x is -inf
1362 // return -inf
1363 res.w[1] = 0xf800000000000000ull;
1364 res.w[0] = 0x0000000000000000ull;
1366 BID_RETURN (res);
1369 // unpack x
1370 x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
1371 C1.w[1] = x.w[1] & MASK_COEFF;
1372 C1.w[0] = x.w[0];
1374 // check for non-canonical values (treated as zero)
1375 if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
1376 // non-canonical
1377 x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
1378 C1.w[1] = 0; // significand high
1379 C1.w[0] = 0; // significand low
1380 } else { // G0_G1 != 11
1381 x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits
1382 if (C1.w[1] > 0x0001ed09bead87c0ull ||
1383 (C1.w[1] == 0x0001ed09bead87c0ull
1384 && C1.w[0] > 0x378d8e63ffffffffull)) {
1385 // x is non-canonical if coefficient is larger than 10^34 -1
1386 C1.w[1] = 0;
1387 C1.w[0] = 0;
1388 } else { // canonical
1393 // test for input equal to zero
1394 if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
1395 // x is 0
1396 // return 0 preserving the sign bit and the preferred exponent
1397 // of MAX(Q(x), 0)
1398 if (x_exp <= (0x1820ull << 49)) {
1399 res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
1400 } else {
1401 res.w[1] = x_sign | x_exp;
1403 res.w[0] = 0x0000000000000000ull;
1404 BID_RETURN (res);
1406 // x is not special and is not zero
1408 // if (exp <= -p) return -0.0 or +1.0
1409 if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34
1410 if (x_sign) {
1411 // if negative, return negative 0, because we know the coefficient
1412 // is non-zero (would have been caught above)
1413 res.w[1] = 0xb040000000000000ull;
1414 res.w[0] = 0x0000000000000000ull;
1415 } else {
1416 // if positive, return positive 1, because we know coefficient is
1417 // non-zero (would have been caught above)
1418 res.w[1] = 0x3040000000000000ull;
1419 res.w[0] = 0x0000000000000001ull;
1421 BID_RETURN (res);
1423 // q = nr. of decimal digits in x
1424 // determine first the nr. of bits in x
1425 if (C1.w[1] == 0) {
1426 if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53
1427 // split 64-bit value in two 32-bit halves to avoid rounding errors
1428 if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32
1429 tmp1.d = (double) (C1.w[0] >> 32); // exact conversion
1430 x_nr_bits =
1431 33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
1432 } else { // x < 2^32
1433 tmp1.d = (double) (C1.w[0]); // exact conversion
1434 x_nr_bits =
1435 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
1437 } else { // if x < 2^53
1438 tmp1.d = (double) C1.w[0]; // exact conversion
1439 x_nr_bits =
1440 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
1442 } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
1443 tmp1.d = (double) C1.w[1]; // exact conversion
1444 x_nr_bits =
1445 65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
1448 q = nr_digits[x_nr_bits - 1].digits;
1449 if (q == 0) {
1450 q = nr_digits[x_nr_bits - 1].digits1;
1451 if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
1452 (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
1453 C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
1454 q++;
1456 exp = (x_exp >> 49) - 6176;
1457 if (exp >= 0) { // -exp <= 0
1458 // the argument is an integer already
1459 res.w[1] = x.w[1];
1460 res.w[0] = x.w[0];
1461 BID_RETURN (res);
1462 } else if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q
1463 // need to shift right -exp digits from the coefficient; exp will be 0
1464 ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x'
1465 // (number of digits to be chopped off)
1466 // chop off ind digits from the lower part of C1
1467 // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
1468 // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
1469 // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
1470 // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
1471 // tmp64 = C1.w[0];
1472 // if (ind <= 19) {
1473 // C1.w[0] = C1.w[0] + midpoint64[ind - 1];
1474 // } else {
1475 // C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
1476 // C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
1477 // }
1478 // if (C1.w[0] < tmp64) C1.w[1]++;
1479 // if carry-out from C1.w[0], increment C1.w[1]
1480 // calculate C* and f*
1481 // C* is actually floor(C*) in this case
1482 // C* and f* need shifting and masking, as shown by
1483 // shiftright128[] and maskhigh128[]
1484 // 1 <= x <= 34
1485 // kx = 10^(-x) = ten2mk128[ind - 1]
1486 // C* = C1 * 10^(-x)
1487 // the approximation of 10^(-x) was rounded up to 118 bits
1488 __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
1489 if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
1490 res.w[1] = P256.w[3];
1491 res.w[0] = P256.w[2];
1492 // if negative, the truncated value is already the correct result
1493 if (!x_sign) { // if positive
1494 // redundant fstar.w[3] = 0;
1495 // redundant fstar.w[2] = 0;
1496 // redundant fstar.w[1] = P256.w[1];
1497 // redundant fstar.w[0] = P256.w[0];
1498 // fraction f* > 10^(-x) <=> inexact
1499 // f* is in the right position to be compared with
1500 // 10^(-x) from ten2mk128[]
1501 if ((P256.w[1] > ten2mk128[ind - 1].w[1])
1502 || (P256.w[1] == ten2mk128[ind - 1].w[1]
1503 && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) {
1504 if (++res.w[0] == 0) {
1505 res.w[1]++;
1509 } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
1510 shift = shiftright128[ind - 1]; // 3 <= shift <= 63
1511 res.w[1] = (P256.w[3] >> shift);
1512 res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
1513 // if negative, the truncated value is already the correct result
1514 if (!x_sign) { // if positive
1515 // redundant fstar.w[3] = 0;
1516 fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
1517 fstar.w[1] = P256.w[1];
1518 fstar.w[0] = P256.w[0];
1519 // fraction f* > 10^(-x) <=> inexact
1520 // f* is in the right position to be compared with
1521 // 10^(-x) from ten2mk128[]
1522 if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
1523 (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
1524 fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
1525 if (++res.w[0] == 0) {
1526 res.w[1]++;
1530 } else { // 22 <= ind - 1 <= 33
1531 shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38
1532 res.w[1] = 0;
1533 res.w[0] = P256.w[3] >> shift;
1534 // if negative, the truncated value is already the correct result
1535 if (!x_sign) { // if positive
1536 fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
1537 fstar.w[2] = P256.w[2];
1538 fstar.w[1] = P256.w[1];
1539 fstar.w[0] = P256.w[0];
1540 // fraction f* > 10^(-x) <=> inexact
1541 // f* is in the right position to be compared with
1542 // 10^(-x) from ten2mk128[]
1543 if (fstar.w[3] || fstar.w[2]
1544 || fstar.w[1] > ten2mk128[ind - 1].w[1]
1545 || (fstar.w[1] == ten2mk128[ind - 1].w[1]
1546 && fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
1547 if (++res.w[0] == 0) {
1548 res.w[1]++;
1553 res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
1554 BID_RETURN (res);
1555 } else { // if exp < 0 and q + exp <= 0
1556 if (x_sign) { // negative rounds up to -0.0
1557 res.w[1] = 0xb040000000000000ull;
1558 res.w[0] = 0x0000000000000000ull;
1559 } else { // positive rpunds up to +1.0
1560 res.w[1] = 0x3040000000000000ull;
1561 res.w[0] = 0x0000000000000001ull;
1563 BID_RETURN (res);
1567 /*****************************************************************************
1568 * BID128_round_integral_zero
1569 ****************************************************************************/
1571 BID128_FUNCTION_ARG1_NORND (bid128_round_integral_zero, x)
1573 UINT128 res;
1574 UINT64 x_sign;
1575 UINT64 x_exp;
1576 int exp; // unbiased exponent
1577 // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo
1578 // (all are UINT64)
1579 BID_UI64DOUBLE tmp1;
1580 unsigned int x_nr_bits;
1581 int q, ind, shift;
1582 UINT128 C1;
1583 // UINT128 res is C* at first - represents up to 34 decimal digits ~
1584 // 113 bits
1585 UINT256 P256;
1587 // check for NaN or Infinity
1588 if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
1589 // x is special
1590 if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
1591 // if x = NaN, then res = Q (x)
1592 // check first for non-canonical NaN payload
1593 if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
1594 (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
1595 (x.w[0] > 0x38c15b09ffffffffull))) {
1596 x.w[1] = x.w[1] & 0xffffc00000000000ull;
1597 x.w[0] = 0x0ull;
1599 if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN
1600 // set invalid flag
1601 *pfpsf |= INVALID_EXCEPTION;
1602 // return quiet (x)
1603 res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
1604 res.w[0] = x.w[0];
1605 } else { // x is QNaN
1606 // return x
1607 res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
1608 res.w[0] = x.w[0];
1610 BID_RETURN (res)
1611 } else { // x is not a NaN, so it must be infinity
1612 if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf
1613 // return +inf
1614 res.w[1] = 0x7800000000000000ull;
1615 res.w[0] = 0x0000000000000000ull;
1616 } else { // x is -inf
1617 // return -inf
1618 res.w[1] = 0xf800000000000000ull;
1619 res.w[0] = 0x0000000000000000ull;
1621 BID_RETURN (res);
1624 // unpack x
1625 x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
1626 C1.w[1] = x.w[1] & MASK_COEFF;
1627 C1.w[0] = x.w[0];
1629 // check for non-canonical values (treated as zero)
1630 if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
1631 // non-canonical
1632 x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
1633 C1.w[1] = 0; // significand high
1634 C1.w[0] = 0; // significand low
1635 } else { // G0_G1 != 11
1636 x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits
1637 if (C1.w[1] > 0x0001ed09bead87c0ull ||
1638 (C1.w[1] == 0x0001ed09bead87c0ull
1639 && C1.w[0] > 0x378d8e63ffffffffull)) {
1640 // x is non-canonical if coefficient is larger than 10^34 -1
1641 C1.w[1] = 0;
1642 C1.w[0] = 0;
1643 } else { // canonical
1648 // test for input equal to zero
1649 if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
1650 // x is 0
1651 // return 0 preserving the sign bit and the preferred exponent
1652 // of MAX(Q(x), 0)
1653 if (x_exp <= (0x1820ull << 49)) {
1654 res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
1655 } else {
1656 res.w[1] = x_sign | x_exp;
1658 res.w[0] = 0x0000000000000000ull;
1659 BID_RETURN (res);
1661 // x is not special and is not zero
1663 // if (exp <= -p) return -0.0 or +0.0
1664 if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34
1665 res.w[1] = x_sign | 0x3040000000000000ull;
1666 res.w[0] = 0x0000000000000000ull;
1667 BID_RETURN (res);
1669 // q = nr. of decimal digits in x
1670 // determine first the nr. of bits in x
1671 if (C1.w[1] == 0) {
1672 if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53
1673 // split the 64-bit value in two 32-bit halves to avoid rounding errors
1674 if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32
1675 tmp1.d = (double) (C1.w[0] >> 32); // exact conversion
1676 x_nr_bits =
1677 33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
1678 } else { // x < 2^32
1679 tmp1.d = (double) (C1.w[0]); // exact conversion
1680 x_nr_bits =
1681 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
1683 } else { // if x < 2^53
1684 tmp1.d = (double) C1.w[0]; // exact conversion
1685 x_nr_bits =
1686 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
1688 } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
1689 tmp1.d = (double) C1.w[1]; // exact conversion
1690 x_nr_bits =
1691 65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
1694 q = nr_digits[x_nr_bits - 1].digits;
1695 if (q == 0) {
1696 q = nr_digits[x_nr_bits - 1].digits1;
1697 if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
1698 (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
1699 C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
1700 q++;
1702 exp = (x_exp >> 49) - 6176;
1703 if (exp >= 0) { // -exp <= 0
1704 // the argument is an integer already
1705 res.w[1] = x.w[1];
1706 res.w[0] = x.w[0];
1707 BID_RETURN (res);
1708 } else if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q
1709 // need to shift right -exp digits from the coefficient; the exp will be 0
1710 ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x'
1711 // (number of digits to be chopped off)
1712 // chop off ind digits from the lower part of C1
1713 // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
1714 // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
1715 // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
1716 // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
1717 //tmp64 = C1.w[0];
1718 // if (ind <= 19) {
1719 // C1.w[0] = C1.w[0] + midpoint64[ind - 1];
1720 // } else {
1721 // C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
1722 // C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
1723 // }
1724 // if (C1.w[0] < tmp64) C1.w[1]++;
1725 // if carry-out from C1.w[0], increment C1.w[1]
1726 // calculate C* and f*
1727 // C* is actually floor(C*) in this case
1728 // C* and f* need shifting and masking, as shown by
1729 // shiftright128[] and maskhigh128[]
1730 // 1 <= x <= 34
1731 // kx = 10^(-x) = ten2mk128[ind - 1]
1732 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
1733 // the approximation of 10^(-x) was rounded up to 118 bits
1734 __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
1735 if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
1736 res.w[1] = P256.w[3];
1737 res.w[0] = P256.w[2];
1738 } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
1739 shift = shiftright128[ind - 1]; // 3 <= shift <= 63
1740 res.w[1] = (P256.w[3] >> shift);
1741 res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
1742 } else { // 22 <= ind - 1 <= 33
1743 shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38
1744 res.w[1] = 0;
1745 res.w[0] = P256.w[3] >> shift;
1747 res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
1748 BID_RETURN (res);
1749 } else { // if exp < 0 and q + exp <= 0 the result is +0 or -0
1750 res.w[1] = x_sign | 0x3040000000000000ull;
1751 res.w[0] = 0x0000000000000000ull;
1752 BID_RETURN (res);
1756 /*****************************************************************************
1757 * BID128_round_integral_nearest_away
1758 ****************************************************************************/
1760 BID128_FUNCTION_ARG1_NORND (bid128_round_integral_nearest_away, x)
1762 UINT128 res;
1763 UINT64 x_sign;
1764 UINT64 x_exp;
1765 int exp; // unbiased exponent
1766 // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo
1767 // (all are UINT64)
1768 UINT64 tmp64;
1769 BID_UI64DOUBLE tmp1;
1770 unsigned int x_nr_bits;
1771 int q, ind, shift;
1772 UINT128 C1;
1773 // UINT128 res is C* at first - represents up to 34 decimal digits ~
1774 // 113 bits
1775 // UINT256 fstar;
1776 UINT256 P256;
1778 // check for NaN or Infinity
1779 if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
1780 // x is special
1781 if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
1782 // if x = NaN, then res = Q (x)
1783 // check first for non-canonical NaN payload
1784 if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
1785 (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
1786 (x.w[0] > 0x38c15b09ffffffffull))) {
1787 x.w[1] = x.w[1] & 0xffffc00000000000ull;
1788 x.w[0] = 0x0ull;
1790 if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN
1791 // set invalid flag
1792 *pfpsf |= INVALID_EXCEPTION;
1793 // return quiet (x)
1794 res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
1795 res.w[0] = x.w[0];
1796 } else { // x is QNaN
1797 // return x
1798 res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
1799 res.w[0] = x.w[0];
1801 BID_RETURN (res)
1802 } else { // x is not a NaN, so it must be infinity
1803 if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf
1804 // return +inf
1805 res.w[1] = 0x7800000000000000ull;
1806 res.w[0] = 0x0000000000000000ull;
1807 } else { // x is -inf
1808 // return -inf
1809 res.w[1] = 0xf800000000000000ull;
1810 res.w[0] = 0x0000000000000000ull;
1812 BID_RETURN (res);
1815 // unpack x
1816 x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
1817 C1.w[1] = x.w[1] & MASK_COEFF;
1818 C1.w[0] = x.w[0];
1820 // check for non-canonical values (treated as zero)
1821 if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
1822 // non-canonical
1823 x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
1824 C1.w[1] = 0; // significand high
1825 C1.w[0] = 0; // significand low
1826 } else { // G0_G1 != 11
1827 x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits
1828 if (C1.w[1] > 0x0001ed09bead87c0ull ||
1829 (C1.w[1] == 0x0001ed09bead87c0ull
1830 && C1.w[0] > 0x378d8e63ffffffffull)) {
1831 // x is non-canonical if coefficient is larger than 10^34 -1
1832 C1.w[1] = 0;
1833 C1.w[0] = 0;
1834 } else { // canonical
1839 // test for input equal to zero
1840 if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
1841 // x is 0
1842 // return 0 preserving the sign bit and the preferred exponent
1843 // of MAX(Q(x), 0)
1844 if (x_exp <= (0x1820ull << 49)) {
1845 res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
1846 } else {
1847 res.w[1] = x_sign | x_exp;
1849 res.w[0] = 0x0000000000000000ull;
1850 BID_RETURN (res);
1852 // x is not special and is not zero
1854 // if (exp <= -(p+1)) return 0.0
1855 if (x_exp <= 0x2ffa000000000000ull) { // 0x2ffa000000000000ull == -35
1856 res.w[1] = x_sign | 0x3040000000000000ull;
1857 res.w[0] = 0x0000000000000000ull;
1858 BID_RETURN (res);
1860 // q = nr. of decimal digits in x
1861 // determine first the nr. of bits in x
1862 if (C1.w[1] == 0) {
1863 if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53
1864 // split the 64-bit value in two 32-bit halves to avoid rounding errors
1865 if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32
1866 tmp1.d = (double) (C1.w[0] >> 32); // exact conversion
1867 x_nr_bits =
1868 33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
1869 } else { // x < 2^32
1870 tmp1.d = (double) (C1.w[0]); // exact conversion
1871 x_nr_bits =
1872 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
1874 } else { // if x < 2^53
1875 tmp1.d = (double) C1.w[0]; // exact conversion
1876 x_nr_bits =
1877 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
1879 } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
1880 tmp1.d = (double) C1.w[1]; // exact conversion
1881 x_nr_bits =
1882 65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
1885 q = nr_digits[x_nr_bits - 1].digits;
1886 if (q == 0) {
1887 q = nr_digits[x_nr_bits - 1].digits1;
1888 if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
1889 (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
1890 C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
1891 q++;
1893 exp = (x_exp >> 49) - 6176;
1894 if (exp >= 0) { // -exp <= 0
1895 // the argument is an integer already
1896 res.w[1] = x.w[1];
1897 res.w[0] = x.w[0];
1898 BID_RETURN (res);
1899 } else if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q
1900 // need to shift right -exp digits from the coefficient; the exp will be 0
1901 ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x'
1902 // chop off ind digits from the lower part of C1
1903 // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits
1904 tmp64 = C1.w[0];
1905 if (ind <= 19) {
1906 C1.w[0] = C1.w[0] + midpoint64[ind - 1];
1907 } else {
1908 C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
1909 C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
1911 if (C1.w[0] < tmp64)
1912 C1.w[1]++;
1913 // calculate C* and f*
1914 // C* is actually floor(C*) in this case
1915 // C* and f* need shifting and masking, as shown by
1916 // shiftright128[] and maskhigh128[]
1917 // 1 <= x <= 34
1918 // kx = 10^(-x) = ten2mk128[ind - 1]
1919 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
1920 // the approximation of 10^(-x) was rounded up to 118 bits
1921 __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
1922 // the top Ex bits of 10^(-x) are T* = ten2mk128trunc[ind], e.g.
1923 // if x=1, T*=ten2mk128trunc[0]=0x19999999999999999999999999999999
1924 // if (0 < f* < 10^(-x)) then the result is a midpoint
1925 // if floor(C*) is even then C* = floor(C*) - logical right
1926 // shift; C* has p decimal digits, correct by Prop. 1)
1927 // else if floor(C*) is odd C* = floor(C*)-1 (logical right
1928 // shift; C* has p decimal digits, correct by Pr. 1)
1929 // else
1930 // C* = floor(C*) (logical right shift; C has p decimal digits,
1931 // correct by Property 1)
1932 // n = C* * 10^(e+x)
1934 // shift right C* by Ex-128 = shiftright128[ind]
1935 if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
1936 res.w[1] = P256.w[3];
1937 res.w[0] = P256.w[2];
1938 } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
1939 shift = shiftright128[ind - 1]; // 3 <= shift <= 63
1940 res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
1941 res.w[1] = (P256.w[3] >> shift);
1942 } else { // 22 <= ind - 1 <= 33
1943 shift = shiftright128[ind - 1]; // 2 <= shift <= 38
1944 res.w[1] = 0;
1945 res.w[0] = (P256.w[3] >> (shift - 64)); // 2 <= shift - 64 <= 38
1947 // if the result was a midpoint, it was already rounded away from zero
1948 res.w[1] |= x_sign | 0x3040000000000000ull;
1949 BID_RETURN (res);
1950 } else { // if ((q + exp) < 0) <=> q < -exp
1951 // the result is +0 or -0
1952 res.w[1] = x_sign | 0x3040000000000000ull;
1953 res.w[0] = 0x0000000000000000ull;
1954 BID_RETURN (res);