* gcc-interface/decl.c (gnat_to_gnu_field): Do not set the alignment
[official-gcc.git] / libgcc / config / libbid / bid128_round_integral.c
blob349f7cfef88c9ae92c8931119b18ae248d3fd42a
1 /* Copyright (C) 2007-2017 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 3, or (at your option) any later
8 version.
10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 for more details.
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
24 #define BID_128RES
26 #include "bid_internal.h"
28 /*****************************************************************************
29 * BID128_round_integral_exact
30 ****************************************************************************/
32 BID128_FUNCTION_ARG1 (bid128_round_integral_exact, x)
34 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull}
36 UINT64 x_sign;
37 UINT64 x_exp;
38 int exp; // unbiased exponent
39 // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
40 UINT64 tmp64;
41 BID_UI64DOUBLE tmp1;
42 unsigned int x_nr_bits;
43 int q, ind, shift;
44 UINT128 C1;
45 UINT256 fstar;
46 UINT256 P256;
48 // check for NaN or Infinity
49 if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
50 // x is special
51 if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
52 // if x = NaN, then res = Q (x)
53 // check first for non-canonical NaN payload
54 if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
55 (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
56 (x.w[0] > 0x38c15b09ffffffffull))) {
57 x.w[1] = x.w[1] & 0xffffc00000000000ull;
58 x.w[0] = 0x0ull;
60 if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN
61 // set invalid flag
62 *pfpsf |= INVALID_EXCEPTION;
63 // return quiet (x)
64 res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
65 res.w[0] = x.w[0];
66 } else { // x is QNaN
67 // return x
68 res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
69 res.w[0] = x.w[0];
71 BID_RETURN (res)
72 } else { // x is not a NaN, so it must be infinity
73 if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf
74 // return +inf
75 res.w[1] = 0x7800000000000000ull;
76 res.w[0] = 0x0000000000000000ull;
77 } else { // x is -inf
78 // return -inf
79 res.w[1] = 0xf800000000000000ull;
80 res.w[0] = 0x0000000000000000ull;
82 BID_RETURN (res);
85 // unpack x
86 x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
87 C1.w[1] = x.w[1] & MASK_COEFF;
88 C1.w[0] = x.w[0];
90 // check for non-canonical values (treated as zero)
91 if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
92 // non-canonical
93 x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
94 C1.w[1] = 0; // significand high
95 C1.w[0] = 0; // significand low
96 } else { // G0_G1 != 11
97 x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits
98 if (C1.w[1] > 0x0001ed09bead87c0ull ||
99 (C1.w[1] == 0x0001ed09bead87c0ull
100 && C1.w[0] > 0x378d8e63ffffffffull)) {
101 // x is non-canonical if coefficient is larger than 10^34 -1
102 C1.w[1] = 0;
103 C1.w[0] = 0;
104 } else { // canonical
109 // test for input equal to zero
110 if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
111 // x is 0
112 // return 0 preserving the sign bit and the preferred exponent
113 // of MAX(Q(x), 0)
114 if (x_exp <= (0x1820ull << 49)) {
115 res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
116 } else {
117 res.w[1] = x_sign | x_exp;
119 res.w[0] = 0x0000000000000000ull;
120 BID_RETURN (res);
122 // x is not special and is not zero
124 switch (rnd_mode) {
125 case ROUNDING_TO_NEAREST:
126 case ROUNDING_TIES_AWAY:
127 // if (exp <= -(p+1)) return 0.0
128 if (x_exp <= 0x2ffa000000000000ull) { // 0x2ffa000000000000ull == -35
129 res.w[1] = x_sign | 0x3040000000000000ull;
130 res.w[0] = 0x0000000000000000ull;
131 *pfpsf |= INEXACT_EXCEPTION;
132 BID_RETURN (res);
134 break;
135 case ROUNDING_DOWN:
136 // if (exp <= -p) return -1.0 or +0.0
137 if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffa000000000000ull == -34
138 if (x_sign) {
139 // if negative, return negative 1, because we know coefficient
140 // is non-zero (would have been caught above)
141 res.w[1] = 0xb040000000000000ull;
142 res.w[0] = 0x0000000000000001ull;
143 } else {
144 // if positive, return positive 0, because we know coefficient is
145 // non-zero (would have been caught above)
146 res.w[1] = 0x3040000000000000ull;
147 res.w[0] = 0x0000000000000000ull;
149 *pfpsf |= INEXACT_EXCEPTION;
150 BID_RETURN (res);
152 break;
153 case ROUNDING_UP:
154 // if (exp <= -p) return -0.0 or +1.0
155 if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34
156 if (x_sign) {
157 // if negative, return negative 0, because we know the coefficient
158 // is non-zero (would have been caught above)
159 res.w[1] = 0xb040000000000000ull;
160 res.w[0] = 0x0000000000000000ull;
161 } else {
162 // if positive, return positive 1, because we know coefficient is
163 // non-zero (would have been caught above)
164 res.w[1] = 0x3040000000000000ull;
165 res.w[0] = 0x0000000000000001ull;
167 *pfpsf |= INEXACT_EXCEPTION;
168 BID_RETURN (res);
170 break;
171 case ROUNDING_TO_ZERO:
172 // if (exp <= -p) return -0.0 or +0.0
173 if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34
174 res.w[1] = x_sign | 0x3040000000000000ull;
175 res.w[0] = 0x0000000000000000ull;
176 *pfpsf |= INEXACT_EXCEPTION;
177 BID_RETURN (res);
179 break;
182 // q = nr. of decimal digits in x
183 // determine first the nr. of bits in x
184 if (C1.w[1] == 0) {
185 if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53
186 // split the 64-bit value in two 32-bit halves to avoid rounding errors
187 if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32
188 tmp1.d = (double) (C1.w[0] >> 32); // exact conversion
189 x_nr_bits =
190 33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
191 } else { // x < 2^32
192 tmp1.d = (double) (C1.w[0]); // exact conversion
193 x_nr_bits =
194 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
196 } else { // if x < 2^53
197 tmp1.d = (double) C1.w[0]; // exact conversion
198 x_nr_bits =
199 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
201 } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
202 tmp1.d = (double) C1.w[1]; // exact conversion
203 x_nr_bits =
204 65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
207 q = nr_digits[x_nr_bits - 1].digits;
208 if (q == 0) {
209 q = nr_digits[x_nr_bits - 1].digits1;
210 if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
211 (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
212 C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
213 q++;
215 exp = (x_exp >> 49) - 6176;
216 if (exp >= 0) { // -exp <= 0
217 // the argument is an integer already
218 res.w[1] = x.w[1];
219 res.w[0] = x.w[0];
220 BID_RETURN (res);
222 // exp < 0
223 switch (rnd_mode) {
224 case ROUNDING_TO_NEAREST:
225 if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q
226 // need to shift right -exp digits from the coefficient; exp will be 0
227 ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x'
228 // chop off ind digits from the lower part of C1
229 // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits
230 tmp64 = C1.w[0];
231 if (ind <= 19) {
232 C1.w[0] = C1.w[0] + midpoint64[ind - 1];
233 } else {
234 C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
235 C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
237 if (C1.w[0] < tmp64)
238 C1.w[1]++;
239 // calculate C* and f*
240 // C* is actually floor(C*) in this case
241 // C* and f* need shifting and masking, as shown by
242 // shiftright128[] and maskhigh128[]
243 // 1 <= x <= 34
244 // kx = 10^(-x) = ten2mk128[ind - 1]
245 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
246 // the approximation of 10^(-x) was rounded up to 118 bits
247 __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
248 // determine the value of res and fstar
250 // determine inexactness of the rounding of C*
251 // if (0 < f* - 1/2 < 10^(-x)) then
252 // the result is exact
253 // else // if (f* - 1/2 > T*) then
254 // the result is inexact
255 // Note: we are going to use ten2mk128[] instead of ten2mk128trunc[]
257 if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
258 // redundant shift = shiftright128[ind - 1]; // shift = 0
259 res.w[1] = P256.w[3];
260 res.w[0] = P256.w[2];
261 // redundant fstar.w[3] = 0;
262 // redundant fstar.w[2] = 0;
263 fstar.w[1] = P256.w[1];
264 fstar.w[0] = P256.w[0];
265 // fraction f* < 10^(-x) <=> midpoint
266 // f* is in the right position to be compared with
267 // 10^(-x) from ten2mk128[]
268 // if 0 < fstar < 10^(-x), subtract 1 if odd (for rounding to even)
269 if ((res.w[0] & 0x0000000000000001ull) && // is result odd?
270 ((fstar.w[1] < (ten2mk128[ind - 1].w[1]))
271 || ((fstar.w[1] == ten2mk128[ind - 1].w[1])
272 && (fstar.w[0] < ten2mk128[ind - 1].w[0])))) {
273 // subract 1 to make even
274 if (res.w[0]-- == 0) {
275 res.w[1]--;
278 if (fstar.w[1] > 0x8000000000000000ull ||
279 (fstar.w[1] == 0x8000000000000000ull
280 && fstar.w[0] > 0x0ull)) {
281 // f* > 1/2 and the result may be exact
282 tmp64 = fstar.w[1] - 0x8000000000000000ull; // f* - 1/2
283 if (tmp64 > ten2mk128[ind - 1].w[1] ||
284 (tmp64 == ten2mk128[ind - 1].w[1] &&
285 fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
286 // set the inexact flag
287 *pfpsf |= INEXACT_EXCEPTION;
288 } // else the result is exact
289 } else { // the result is inexact; f2* <= 1/2
290 // set the inexact flag
291 *pfpsf |= INEXACT_EXCEPTION;
293 } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
294 shift = shiftright128[ind - 1]; // 3 <= shift <= 63
295 res.w[1] = (P256.w[3] >> shift);
296 res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
297 // redundant fstar.w[3] = 0;
298 fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
299 fstar.w[1] = P256.w[1];
300 fstar.w[0] = P256.w[0];
301 // fraction f* < 10^(-x) <=> midpoint
302 // f* is in the right position to be compared with
303 // 10^(-x) from ten2mk128[]
304 if ((res.w[0] & 0x0000000000000001ull) && // is result odd?
305 fstar.w[2] == 0 && (fstar.w[1] < ten2mk128[ind - 1].w[1] ||
306 (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
307 fstar.w[0] < ten2mk128[ind - 1].w[0]))) {
308 // subract 1 to make even
309 if (res.w[0]-- == 0) {
310 res.w[1]--;
313 if (fstar.w[2] > onehalf128[ind - 1] ||
314 (fstar.w[2] == onehalf128[ind - 1]
315 && (fstar.w[1] || fstar.w[0]))) {
316 // f2* > 1/2 and the result may be exact
317 // Calculate f2* - 1/2
318 tmp64 = fstar.w[2] - onehalf128[ind - 1];
319 if (tmp64 || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
320 (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
321 fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
322 // set the inexact flag
323 *pfpsf |= INEXACT_EXCEPTION;
324 } // else the result is exact
325 } else { // the result is inexact; f2* <= 1/2
326 // set the inexact flag
327 *pfpsf |= INEXACT_EXCEPTION;
329 } else { // 22 <= ind - 1 <= 33
330 shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38
331 res.w[1] = 0;
332 res.w[0] = P256.w[3] >> shift;
333 fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
334 fstar.w[2] = P256.w[2];
335 fstar.w[1] = P256.w[1];
336 fstar.w[0] = P256.w[0];
337 // fraction f* < 10^(-x) <=> midpoint
338 // f* is in the right position to be compared with
339 // 10^(-x) from ten2mk128[]
340 if ((res.w[0] & 0x0000000000000001ull) && // is result odd?
341 fstar.w[3] == 0 && fstar.w[2] == 0 &&
342 (fstar.w[1] < ten2mk128[ind - 1].w[1] ||
343 (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
344 fstar.w[0] < ten2mk128[ind - 1].w[0]))) {
345 // subract 1 to make even
346 if (res.w[0]-- == 0) {
347 res.w[1]--;
350 if (fstar.w[3] > onehalf128[ind - 1] ||
351 (fstar.w[3] == onehalf128[ind - 1] &&
352 (fstar.w[2] || fstar.w[1] || fstar.w[0]))) {
353 // f2* > 1/2 and the result may be exact
354 // Calculate f2* - 1/2
355 tmp64 = fstar.w[3] - onehalf128[ind - 1];
356 if (tmp64 || fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1]
357 || (fstar.w[1] == ten2mk128[ind - 1].w[1]
358 && fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
359 // set the inexact flag
360 *pfpsf |= INEXACT_EXCEPTION;
361 } // else the result is exact
362 } else { // the result is inexact; f2* <= 1/2
363 // set the inexact flag
364 *pfpsf |= INEXACT_EXCEPTION;
367 res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
368 BID_RETURN (res);
369 } else { // if ((q + exp) < 0) <=> q < -exp
370 // the result is +0 or -0
371 res.w[1] = x_sign | 0x3040000000000000ull;
372 res.w[0] = 0x0000000000000000ull;
373 *pfpsf |= INEXACT_EXCEPTION;
374 BID_RETURN (res);
376 break;
377 case ROUNDING_TIES_AWAY:
378 if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q
379 // need to shift right -exp digits from the coefficient; exp will be 0
380 ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x'
381 // chop off ind digits from the lower part of C1
382 // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits
383 tmp64 = C1.w[0];
384 if (ind <= 19) {
385 C1.w[0] = C1.w[0] + midpoint64[ind - 1];
386 } else {
387 C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
388 C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
390 if (C1.w[0] < tmp64)
391 C1.w[1]++;
392 // calculate C* and f*
393 // C* is actually floor(C*) in this case
394 // C* and f* need shifting and masking, as shown by
395 // shiftright128[] and maskhigh128[]
396 // 1 <= x <= 34
397 // kx = 10^(-x) = ten2mk128[ind - 1]
398 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
399 // the approximation of 10^(-x) was rounded up to 118 bits
400 __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
401 // the top Ex bits of 10^(-x) are T* = ten2mk128trunc[ind], e.g.
402 // if x=1, T*=ten2mk128trunc[0]=0x19999999999999999999999999999999
403 // if (0 < f* < 10^(-x)) then the result is a midpoint
404 // if floor(C*) is even then C* = floor(C*) - logical right
405 // shift; C* has p decimal digits, correct by Prop. 1)
406 // else if floor(C*) is odd C* = floor(C*)-1 (logical right
407 // shift; C* has p decimal digits, correct by Pr. 1)
408 // else
409 // C* = floor(C*) (logical right shift; C has p decimal digits,
410 // correct by Property 1)
411 // n = C* * 10^(e+x)
413 // determine also the inexactness of the rounding of C*
414 // if (0 < f* - 1/2 < 10^(-x)) then
415 // the result is exact
416 // else // if (f* - 1/2 > T*) then
417 // the result is inexact
418 // Note: we are going to use ten2mk128[] instead of ten2mk128trunc[]
419 // shift right C* by Ex-128 = shiftright128[ind]
420 if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
421 // redundant shift = shiftright128[ind - 1]; // shift = 0
422 res.w[1] = P256.w[3];
423 res.w[0] = P256.w[2];
424 // redundant fstar.w[3] = 0;
425 // redundant fstar.w[2] = 0;
426 fstar.w[1] = P256.w[1];
427 fstar.w[0] = P256.w[0];
428 if (fstar.w[1] > 0x8000000000000000ull ||
429 (fstar.w[1] == 0x8000000000000000ull
430 && fstar.w[0] > 0x0ull)) {
431 // f* > 1/2 and the result may be exact
432 tmp64 = fstar.w[1] - 0x8000000000000000ull; // f* - 1/2
433 if ((tmp64 > ten2mk128[ind - 1].w[1] ||
434 (tmp64 == ten2mk128[ind - 1].w[1] &&
435 fstar.w[0] >= ten2mk128[ind - 1].w[0]))) {
436 // set the inexact flag
437 *pfpsf |= INEXACT_EXCEPTION;
438 } // else the result is exact
439 } else { // the result is inexact; f2* <= 1/2
440 // set the inexact flag
441 *pfpsf |= INEXACT_EXCEPTION;
443 } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
444 shift = shiftright128[ind - 1]; // 3 <= shift <= 63
445 res.w[1] = (P256.w[3] >> shift);
446 res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
447 // redundant fstar.w[3] = 0;
448 fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
449 fstar.w[1] = P256.w[1];
450 fstar.w[0] = P256.w[0];
451 if (fstar.w[2] > onehalf128[ind - 1] ||
452 (fstar.w[2] == onehalf128[ind - 1]
453 && (fstar.w[1] || fstar.w[0]))) {
454 // f2* > 1/2 and the result may be exact
455 // Calculate f2* - 1/2
456 tmp64 = fstar.w[2] - onehalf128[ind - 1];
457 if (tmp64 || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
458 (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
459 fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
460 // set the inexact flag
461 *pfpsf |= INEXACT_EXCEPTION;
462 } // else the result is exact
463 } else { // the result is inexact; f2* <= 1/2
464 // set the inexact flag
465 *pfpsf |= INEXACT_EXCEPTION;
467 } else { // 22 <= ind - 1 <= 33
468 shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38
469 res.w[1] = 0;
470 res.w[0] = P256.w[3] >> shift;
471 fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
472 fstar.w[2] = P256.w[2];
473 fstar.w[1] = P256.w[1];
474 fstar.w[0] = P256.w[0];
475 if (fstar.w[3] > onehalf128[ind - 1] ||
476 (fstar.w[3] == onehalf128[ind - 1] &&
477 (fstar.w[2] || fstar.w[1] || fstar.w[0]))) {
478 // f2* > 1/2 and the result may be exact
479 // Calculate f2* - 1/2
480 tmp64 = fstar.w[3] - onehalf128[ind - 1];
481 if (tmp64 || fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1]
482 || (fstar.w[1] == ten2mk128[ind - 1].w[1]
483 && fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
484 // set the inexact flag
485 *pfpsf |= INEXACT_EXCEPTION;
486 } // else the result is exact
487 } else { // the result is inexact; f2* <= 1/2
488 // set the inexact flag
489 *pfpsf |= INEXACT_EXCEPTION;
492 // if the result was a midpoint, it was already rounded away from zero
493 res.w[1] |= x_sign | 0x3040000000000000ull;
494 BID_RETURN (res);
495 } else { // if ((q + exp) < 0) <=> q < -exp
496 // the result is +0 or -0
497 res.w[1] = x_sign | 0x3040000000000000ull;
498 res.w[0] = 0x0000000000000000ull;
499 *pfpsf |= INEXACT_EXCEPTION;
500 BID_RETURN (res);
502 break;
503 case ROUNDING_DOWN:
504 if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q
505 // need to shift right -exp digits from the coefficient; exp will be 0
506 ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x'
507 // (number of digits to be chopped off)
508 // chop off ind digits from the lower part of C1
509 // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
510 // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
511 // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
512 // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
513 // tmp64 = C1.w[0];
514 // if (ind <= 19) {
515 // C1.w[0] = C1.w[0] + midpoint64[ind - 1];
516 // } else {
517 // C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
518 // C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
519 // }
520 // if (C1.w[0] < tmp64) C1.w[1]++;
521 // if carry-out from C1.w[0], increment C1.w[1]
522 // calculate C* and f*
523 // C* is actually floor(C*) in this case
524 // C* and f* need shifting and masking, as shown by
525 // shiftright128[] and maskhigh128[]
526 // 1 <= x <= 34
527 // kx = 10^(-x) = ten2mk128[ind - 1]
528 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
529 // the approximation of 10^(-x) was rounded up to 118 bits
530 __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
531 if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
532 res.w[1] = P256.w[3];
533 res.w[0] = P256.w[2];
534 // redundant fstar.w[3] = 0;
535 // redundant fstar.w[2] = 0;
536 // redundant fstar.w[1] = P256.w[1];
537 // redundant fstar.w[0] = P256.w[0];
538 // fraction f* > 10^(-x) <=> inexact
539 // f* is in the right position to be compared with
540 // 10^(-x) from ten2mk128[]
541 if ((P256.w[1] > ten2mk128[ind - 1].w[1])
542 || (P256.w[1] == ten2mk128[ind - 1].w[1]
543 && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) {
544 *pfpsf |= INEXACT_EXCEPTION;
545 // if positive, the truncated value is already the correct result
546 if (x_sign) { // if negative
547 if (++res.w[0] == 0) {
548 res.w[1]++;
552 } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
553 shift = shiftright128[ind - 1]; // 0 <= shift <= 102
554 res.w[1] = (P256.w[3] >> shift);
555 res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
556 // redundant fstar.w[3] = 0;
557 fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
558 fstar.w[1] = P256.w[1];
559 fstar.w[0] = P256.w[0];
560 // fraction f* > 10^(-x) <=> inexact
561 // f* is in the right position to be compared with
562 // 10^(-x) from ten2mk128[]
563 if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
564 (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
565 fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
566 *pfpsf |= INEXACT_EXCEPTION;
567 // if positive, the truncated value is already the correct result
568 if (x_sign) { // if negative
569 if (++res.w[0] == 0) {
570 res.w[1]++;
574 } else { // 22 <= ind - 1 <= 33
575 shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38
576 res.w[1] = 0;
577 res.w[0] = P256.w[3] >> shift;
578 fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
579 fstar.w[2] = P256.w[2];
580 fstar.w[1] = P256.w[1];
581 fstar.w[0] = P256.w[0];
582 // fraction f* > 10^(-x) <=> inexact
583 // f* is in the right position to be compared with
584 // 10^(-x) from ten2mk128[]
585 if (fstar.w[3] || fstar.w[2]
586 || fstar.w[1] > ten2mk128[ind - 1].w[1]
587 || (fstar.w[1] == ten2mk128[ind - 1].w[1]
588 && fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
589 *pfpsf |= INEXACT_EXCEPTION;
590 // if positive, the truncated value is already the correct result
591 if (x_sign) { // if negative
592 if (++res.w[0] == 0) {
593 res.w[1]++;
598 res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
599 BID_RETURN (res);
600 } else { // if exp < 0 and q + exp <= 0
601 if (x_sign) { // negative rounds down to -1.0
602 res.w[1] = 0xb040000000000000ull;
603 res.w[0] = 0x0000000000000001ull;
604 } else { // positive rpunds down to +0.0
605 res.w[1] = 0x3040000000000000ull;
606 res.w[0] = 0x0000000000000000ull;
608 *pfpsf |= INEXACT_EXCEPTION;
609 BID_RETURN (res);
611 break;
612 case ROUNDING_UP:
613 if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q
614 // need to shift right -exp digits from the coefficient; exp will be 0
615 ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x'
616 // (number of digits to be chopped off)
617 // chop off ind digits from the lower part of C1
618 // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
619 // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
620 // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
621 // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
622 // tmp64 = C1.w[0];
623 // if (ind <= 19) {
624 // C1.w[0] = C1.w[0] + midpoint64[ind - 1];
625 // } else {
626 // C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
627 // C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
628 // }
629 // if (C1.w[0] < tmp64) C1.w[1]++;
630 // if carry-out from C1.w[0], increment C1.w[1]
631 // calculate C* and f*
632 // C* is actually floor(C*) in this case
633 // C* and f* need shifting and masking, as shown by
634 // shiftright128[] and maskhigh128[]
635 // 1 <= x <= 34
636 // kx = 10^(-x) = ten2mk128[ind - 1]
637 // C* = C1 * 10^(-x)
638 // the approximation of 10^(-x) was rounded up to 118 bits
639 __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
640 if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
641 res.w[1] = P256.w[3];
642 res.w[0] = P256.w[2];
643 // redundant fstar.w[3] = 0;
644 // redundant fstar.w[2] = 0;
645 // redundant fstar.w[1] = P256.w[1];
646 // redundant fstar.w[0] = P256.w[0];
647 // fraction f* > 10^(-x) <=> inexact
648 // f* is in the right position to be compared with
649 // 10^(-x) from ten2mk128[]
650 if ((P256.w[1] > ten2mk128[ind - 1].w[1])
651 || (P256.w[1] == ten2mk128[ind - 1].w[1]
652 && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) {
653 *pfpsf |= INEXACT_EXCEPTION;
654 // if negative, the truncated value is already the correct result
655 if (!x_sign) { // if positive
656 if (++res.w[0] == 0) {
657 res.w[1]++;
661 } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
662 shift = shiftright128[ind - 1]; // 3 <= shift <= 63
663 res.w[1] = (P256.w[3] >> shift);
664 res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
665 // redundant fstar.w[3] = 0;
666 fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
667 fstar.w[1] = P256.w[1];
668 fstar.w[0] = P256.w[0];
669 // fraction f* > 10^(-x) <=> inexact
670 // f* is in the right position to be compared with
671 // 10^(-x) from ten2mk128[]
672 if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
673 (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
674 fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
675 *pfpsf |= INEXACT_EXCEPTION;
676 // if negative, the truncated value is already the correct result
677 if (!x_sign) { // if positive
678 if (++res.w[0] == 0) {
679 res.w[1]++;
683 } else { // 22 <= ind - 1 <= 33
684 shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38
685 res.w[1] = 0;
686 res.w[0] = P256.w[3] >> shift;
687 fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
688 fstar.w[2] = P256.w[2];
689 fstar.w[1] = P256.w[1];
690 fstar.w[0] = P256.w[0];
691 // fraction f* > 10^(-x) <=> inexact
692 // f* is in the right position to be compared with
693 // 10^(-x) from ten2mk128[]
694 if (fstar.w[3] || fstar.w[2]
695 || fstar.w[1] > ten2mk128[ind - 1].w[1]
696 || (fstar.w[1] == ten2mk128[ind - 1].w[1]
697 && fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
698 *pfpsf |= INEXACT_EXCEPTION;
699 // if negative, the truncated value is already the correct result
700 if (!x_sign) { // if positive
701 if (++res.w[0] == 0) {
702 res.w[1]++;
707 res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
708 BID_RETURN (res);
709 } else { // if exp < 0 and q + exp <= 0
710 if (x_sign) { // negative rounds up to -0.0
711 res.w[1] = 0xb040000000000000ull;
712 res.w[0] = 0x0000000000000000ull;
713 } else { // positive rpunds up to +1.0
714 res.w[1] = 0x3040000000000000ull;
715 res.w[0] = 0x0000000000000001ull;
717 *pfpsf |= INEXACT_EXCEPTION;
718 BID_RETURN (res);
720 break;
721 case ROUNDING_TO_ZERO:
722 if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q
723 // need to shift right -exp digits from the coefficient; exp will be 0
724 ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x'
725 // (number of digits to be chopped off)
726 // chop off ind digits from the lower part of C1
727 // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
728 // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
729 // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
730 // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
731 //tmp64 = C1.w[0];
732 // if (ind <= 19) {
733 // C1.w[0] = C1.w[0] + midpoint64[ind - 1];
734 // } else {
735 // C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
736 // C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
737 // }
738 // if (C1.w[0] < tmp64) C1.w[1]++;
739 // if carry-out from C1.w[0], increment C1.w[1]
740 // calculate C* and f*
741 // C* is actually floor(C*) in this case
742 // C* and f* need shifting and masking, as shown by
743 // shiftright128[] and maskhigh128[]
744 // 1 <= x <= 34
745 // kx = 10^(-x) = ten2mk128[ind - 1]
746 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
747 // the approximation of 10^(-x) was rounded up to 118 bits
748 __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
749 if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
750 res.w[1] = P256.w[3];
751 res.w[0] = P256.w[2];
752 // redundant fstar.w[3] = 0;
753 // redundant fstar.w[2] = 0;
754 // redundant fstar.w[1] = P256.w[1];
755 // redundant fstar.w[0] = P256.w[0];
756 // fraction f* > 10^(-x) <=> inexact
757 // f* is in the right position to be compared with
758 // 10^(-x) from ten2mk128[]
759 if ((P256.w[1] > ten2mk128[ind - 1].w[1])
760 || (P256.w[1] == ten2mk128[ind - 1].w[1]
761 && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) {
762 *pfpsf |= INEXACT_EXCEPTION;
764 } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
765 shift = shiftright128[ind - 1]; // 3 <= shift <= 63
766 res.w[1] = (P256.w[3] >> shift);
767 res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
768 // redundant fstar.w[3] = 0;
769 fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
770 fstar.w[1] = P256.w[1];
771 fstar.w[0] = P256.w[0];
772 // fraction f* > 10^(-x) <=> inexact
773 // f* is in the right position to be compared with
774 // 10^(-x) from ten2mk128[]
775 if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
776 (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
777 fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
778 *pfpsf |= INEXACT_EXCEPTION;
780 } else { // 22 <= ind - 1 <= 33
781 shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38
782 res.w[1] = 0;
783 res.w[0] = P256.w[3] >> shift;
784 fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
785 fstar.w[2] = P256.w[2];
786 fstar.w[1] = P256.w[1];
787 fstar.w[0] = P256.w[0];
788 // fraction f* > 10^(-x) <=> inexact
789 // f* is in the right position to be compared with
790 // 10^(-x) from ten2mk128[]
791 if (fstar.w[3] || fstar.w[2]
792 || fstar.w[1] > ten2mk128[ind - 1].w[1]
793 || (fstar.w[1] == ten2mk128[ind - 1].w[1]
794 && fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
795 *pfpsf |= INEXACT_EXCEPTION;
798 res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
799 BID_RETURN (res);
800 } else { // if exp < 0 and q + exp <= 0 the result is +0 or -0
801 res.w[1] = x_sign | 0x3040000000000000ull;
802 res.w[0] = 0x0000000000000000ull;
803 *pfpsf |= INEXACT_EXCEPTION;
804 BID_RETURN (res);
806 break;
809 BID_RETURN (res);
812 /*****************************************************************************
813 * BID128_round_integral_nearest_even
814 ****************************************************************************/
816 BID128_FUNCTION_ARG1_NORND (bid128_round_integral_nearest_even, x)
818 UINT128 res;
819 UINT64 x_sign;
820 UINT64 x_exp;
821 int exp; // unbiased exponent
822 // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64)
823 UINT64 tmp64;
824 BID_UI64DOUBLE tmp1;
825 unsigned int x_nr_bits;
826 int q, ind, shift;
827 UINT128 C1;
828 // UINT128 res is C* at first - represents up to 34 decimal digits ~ 113 bits
829 UINT256 fstar;
830 UINT256 P256;
832 // check for NaN or Infinity
833 if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
834 // x is special
835 if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
836 // if x = NaN, then res = Q (x)
837 // check first for non-canonical NaN payload
838 if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
839 (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
840 (x.w[0] > 0x38c15b09ffffffffull))) {
841 x.w[1] = x.w[1] & 0xffffc00000000000ull;
842 x.w[0] = 0x0ull;
844 if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN
845 // set invalid flag
846 *pfpsf |= INVALID_EXCEPTION;
847 // return quiet (x)
848 res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
849 res.w[0] = x.w[0];
850 } else { // x is QNaN
851 // return x
852 res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
853 res.w[0] = x.w[0];
855 BID_RETURN (res)
856 } else { // x is not a NaN, so it must be infinity
857 if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf
858 // return +inf
859 res.w[1] = 0x7800000000000000ull;
860 res.w[0] = 0x0000000000000000ull;
861 } else { // x is -inf
862 // return -inf
863 res.w[1] = 0xf800000000000000ull;
864 res.w[0] = 0x0000000000000000ull;
866 BID_RETURN (res);
869 // unpack x
870 x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
871 C1.w[1] = x.w[1] & MASK_COEFF;
872 C1.w[0] = x.w[0];
874 // check for non-canonical values (treated as zero)
875 if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
876 // non-canonical
877 x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
878 C1.w[1] = 0; // significand high
879 C1.w[0] = 0; // significand low
880 } else { // G0_G1 != 11
881 x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits
882 if (C1.w[1] > 0x0001ed09bead87c0ull ||
883 (C1.w[1] == 0x0001ed09bead87c0ull
884 && C1.w[0] > 0x378d8e63ffffffffull)) {
885 // x is non-canonical if coefficient is larger than 10^34 -1
886 C1.w[1] = 0;
887 C1.w[0] = 0;
888 } else { // canonical
893 // test for input equal to zero
894 if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
895 // x is 0
896 // return 0 preserving the sign bit and the preferred exponent
897 // of MAX(Q(x), 0)
898 if (x_exp <= (0x1820ull << 49)) {
899 res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
900 } else {
901 res.w[1] = x_sign | x_exp;
903 res.w[0] = 0x0000000000000000ull;
904 BID_RETURN (res);
906 // x is not special and is not zero
908 // if (exp <= -(p+1)) return 0
909 if (x_exp <= 0x2ffa000000000000ull) { // 0x2ffa000000000000ull == -35
910 res.w[1] = x_sign | 0x3040000000000000ull;
911 res.w[0] = 0x0000000000000000ull;
912 BID_RETURN (res);
914 // q = nr. of decimal digits in x
915 // determine first the nr. of bits in x
916 if (C1.w[1] == 0) {
917 if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53
918 // split the 64-bit value in two 32-bit halves to avoid rounding errors
919 if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32
920 tmp1.d = (double) (C1.w[0] >> 32); // exact conversion
921 x_nr_bits =
922 33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
923 } else { // x < 2^32
924 tmp1.d = (double) (C1.w[0]); // exact conversion
925 x_nr_bits =
926 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
928 } else { // if x < 2^53
929 tmp1.d = (double) C1.w[0]; // exact conversion
930 x_nr_bits =
931 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
933 } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
934 tmp1.d = (double) C1.w[1]; // exact conversion
935 x_nr_bits =
936 65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
939 q = nr_digits[x_nr_bits - 1].digits;
940 if (q == 0) {
941 q = nr_digits[x_nr_bits - 1].digits1;
942 if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi
943 || (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
944 C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
945 q++;
947 exp = (x_exp >> 49) - 6176;
948 if (exp >= 0) { // -exp <= 0
949 // the argument is an integer already
950 res.w[1] = x.w[1];
951 res.w[0] = x.w[0];
952 BID_RETURN (res);
953 } else if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q
954 // need to shift right -exp digits from the coefficient; the exp will be 0
955 ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x'
956 // chop off ind digits from the lower part of C1
957 // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits
958 tmp64 = C1.w[0];
959 if (ind <= 19) {
960 C1.w[0] = C1.w[0] + midpoint64[ind - 1];
961 } else {
962 C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
963 C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
965 if (C1.w[0] < tmp64)
966 C1.w[1]++;
967 // calculate C* and f*
968 // C* is actually floor(C*) in this case
969 // C* and f* need shifting and masking, as shown by
970 // shiftright128[] and maskhigh128[]
971 // 1 <= x <= 34
972 // kx = 10^(-x) = ten2mk128[ind - 1]
973 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
974 // the approximation of 10^(-x) was rounded up to 118 bits
975 __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
976 // determine the value of res and fstar
977 if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
978 // redundant shift = shiftright128[ind - 1]; // shift = 0
979 res.w[1] = P256.w[3];
980 res.w[0] = P256.w[2];
981 // redundant fstar.w[3] = 0;
982 // redundant fstar.w[2] = 0;
983 // redundant fstar.w[1] = P256.w[1];
984 // redundant fstar.w[0] = P256.w[0];
985 // fraction f* < 10^(-x) <=> midpoint
986 // f* is in the right position to be compared with
987 // 10^(-x) from ten2mk128[]
988 // if 0 < fstar < 10^(-x), subtract 1 if odd (for rounding to even)
989 if ((res.w[0] & 0x0000000000000001ull) && // is result odd?
990 ((P256.w[1] < (ten2mk128[ind - 1].w[1]))
991 || ((P256.w[1] == ten2mk128[ind - 1].w[1])
992 && (P256.w[0] < ten2mk128[ind - 1].w[0])))) {
993 // subract 1 to make even
994 if (res.w[0]-- == 0) {
995 res.w[1]--;
998 } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
999 shift = shiftright128[ind - 1]; // 3 <= shift <= 63
1000 res.w[1] = (P256.w[3] >> shift);
1001 res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
1002 // redundant fstar.w[3] = 0;
1003 fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
1004 fstar.w[1] = P256.w[1];
1005 fstar.w[0] = P256.w[0];
1006 // fraction f* < 10^(-x) <=> midpoint
1007 // f* is in the right position to be compared with
1008 // 10^(-x) from ten2mk128[]
1009 if ((res.w[0] & 0x0000000000000001ull) && // is result odd?
1010 fstar.w[2] == 0 && (fstar.w[1] < ten2mk128[ind - 1].w[1] ||
1011 (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
1012 fstar.w[0] < ten2mk128[ind - 1].w[0]))) {
1013 // subract 1 to make even
1014 if (res.w[0]-- == 0) {
1015 res.w[1]--;
1018 } else { // 22 <= ind - 1 <= 33
1019 shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38
1020 res.w[1] = 0;
1021 res.w[0] = P256.w[3] >> shift;
1022 fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
1023 fstar.w[2] = P256.w[2];
1024 fstar.w[1] = P256.w[1];
1025 fstar.w[0] = P256.w[0];
1026 // fraction f* < 10^(-x) <=> midpoint
1027 // f* is in the right position to be compared with
1028 // 10^(-x) from ten2mk128[]
1029 if ((res.w[0] & 0x0000000000000001ull) && // is result odd?
1030 fstar.w[3] == 0 && fstar.w[2] == 0
1031 && (fstar.w[1] < ten2mk128[ind - 1].w[1]
1032 || (fstar.w[1] == ten2mk128[ind - 1].w[1]
1033 && fstar.w[0] < ten2mk128[ind - 1].w[0]))) {
1034 // subract 1 to make even
1035 if (res.w[0]-- == 0) {
1036 res.w[1]--;
1040 res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
1041 BID_RETURN (res);
1042 } else { // if ((q + exp) < 0) <=> q < -exp
1043 // the result is +0 or -0
1044 res.w[1] = x_sign | 0x3040000000000000ull;
1045 res.w[0] = 0x0000000000000000ull;
1046 BID_RETURN (res);
1050 /*****************************************************************************
1051 * BID128_round_integral_negative
1052 ****************************************************************************/
1054 BID128_FUNCTION_ARG1_NORND (bid128_round_integral_negative, x)
1056 UINT128 res;
1057 UINT64 x_sign;
1058 UINT64 x_exp;
1059 int exp; // unbiased exponent
1060 // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo
1061 // (all are UINT64)
1062 BID_UI64DOUBLE tmp1;
1063 unsigned int x_nr_bits;
1064 int q, ind, shift;
1065 UINT128 C1;
1066 // UINT128 res is C* at first - represents up to 34 decimal digits ~
1067 // 113 bits
1068 UINT256 fstar;
1069 UINT256 P256;
1071 // check for NaN or Infinity
1072 if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
1073 // x is special
1074 if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
1075 // if x = NaN, then res = Q (x)
1076 // check first for non-canonical NaN payload
1077 if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
1078 (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
1079 (x.w[0] > 0x38c15b09ffffffffull))) {
1080 x.w[1] = x.w[1] & 0xffffc00000000000ull;
1081 x.w[0] = 0x0ull;
1083 if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN
1084 // set invalid flag
1085 *pfpsf |= INVALID_EXCEPTION;
1086 // return quiet (x)
1087 res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
1088 res.w[0] = x.w[0];
1089 } else { // x is QNaN
1090 // return x
1091 res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
1092 res.w[0] = x.w[0];
1094 BID_RETURN (res)
1095 } else { // x is not a NaN, so it must be infinity
1096 if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf
1097 // return +inf
1098 res.w[1] = 0x7800000000000000ull;
1099 res.w[0] = 0x0000000000000000ull;
1100 } else { // x is -inf
1101 // return -inf
1102 res.w[1] = 0xf800000000000000ull;
1103 res.w[0] = 0x0000000000000000ull;
1105 BID_RETURN (res);
1108 // unpack x
1109 x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
1110 C1.w[1] = x.w[1] & MASK_COEFF;
1111 C1.w[0] = x.w[0];
1113 // check for non-canonical values (treated as zero)
1114 if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
1115 // non-canonical
1116 x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
1117 C1.w[1] = 0; // significand high
1118 C1.w[0] = 0; // significand low
1119 } else { // G0_G1 != 11
1120 x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits
1121 if (C1.w[1] > 0x0001ed09bead87c0ull ||
1122 (C1.w[1] == 0x0001ed09bead87c0ull
1123 && C1.w[0] > 0x378d8e63ffffffffull)) {
1124 // x is non-canonical if coefficient is larger than 10^34 -1
1125 C1.w[1] = 0;
1126 C1.w[0] = 0;
1127 } else { // canonical
1132 // test for input equal to zero
1133 if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
1134 // x is 0
1135 // return 0 preserving the sign bit and the preferred exponent
1136 // of MAX(Q(x), 0)
1137 if (x_exp <= (0x1820ull << 49)) {
1138 res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
1139 } else {
1140 res.w[1] = x_sign | x_exp;
1142 res.w[0] = 0x0000000000000000ull;
1143 BID_RETURN (res);
1145 // x is not special and is not zero
1147 // if (exp <= -p) return -1.0 or +0.0
1148 if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34
1149 if (x_sign) {
1150 // if negative, return negative 1, because we know the coefficient
1151 // is non-zero (would have been caught above)
1152 res.w[1] = 0xb040000000000000ull;
1153 res.w[0] = 0x0000000000000001ull;
1154 } else {
1155 // if positive, return positive 0, because we know coefficient is
1156 // non-zero (would have been caught above)
1157 res.w[1] = 0x3040000000000000ull;
1158 res.w[0] = 0x0000000000000000ull;
1160 BID_RETURN (res);
1162 // q = nr. of decimal digits in x
1163 // determine first the nr. of bits in x
1164 if (C1.w[1] == 0) {
1165 if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53
1166 // split the 64-bit value in two 32-bit halves to avoid rounding errors
1167 if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32
1168 tmp1.d = (double) (C1.w[0] >> 32); // exact conversion
1169 x_nr_bits =
1170 33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
1171 } else { // x < 2^32
1172 tmp1.d = (double) (C1.w[0]); // exact conversion
1173 x_nr_bits =
1174 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
1176 } else { // if x < 2^53
1177 tmp1.d = (double) C1.w[0]; // exact conversion
1178 x_nr_bits =
1179 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
1181 } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
1182 tmp1.d = (double) C1.w[1]; // exact conversion
1183 x_nr_bits =
1184 65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
1187 q = nr_digits[x_nr_bits - 1].digits;
1188 if (q == 0) {
1189 q = nr_digits[x_nr_bits - 1].digits1;
1190 if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
1191 (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
1192 C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
1193 q++;
1195 exp = (x_exp >> 49) - 6176;
1196 if (exp >= 0) { // -exp <= 0
1197 // the argument is an integer already
1198 res.w[1] = x.w[1];
1199 res.w[0] = x.w[0];
1200 BID_RETURN (res);
1201 } else if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q
1202 // need to shift right -exp digits from the coefficient; the exp will be 0
1203 ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x'
1204 // (number of digits to be chopped off)
1205 // chop off ind digits from the lower part of C1
1206 // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
1207 // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
1208 // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
1209 // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
1210 //tmp64 = C1.w[0];
1211 // if (ind <= 19) {
1212 // C1.w[0] = C1.w[0] + midpoint64[ind - 1];
1213 // } else {
1214 // C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
1215 // C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
1216 // }
1217 // if (C1.w[0] < tmp64) C1.w[1]++;
1218 // if carry-out from C1.w[0], increment C1.w[1]
1219 // calculate C* and f*
1220 // C* is actually floor(C*) in this case
1221 // C* and f* need shifting and masking, as shown by
1222 // shiftright128[] and maskhigh128[]
1223 // 1 <= x <= 34
1224 // kx = 10^(-x) = ten2mk128[ind - 1]
1225 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
1226 // the approximation of 10^(-x) was rounded up to 118 bits
1227 __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
1228 if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
1229 res.w[1] = P256.w[3];
1230 res.w[0] = P256.w[2];
1231 // if positive, the truncated value is already the correct result
1232 if (x_sign) { // if negative
1233 // redundant fstar.w[3] = 0;
1234 // redundant fstar.w[2] = 0;
1235 // redundant fstar.w[1] = P256.w[1];
1236 // redundant fstar.w[0] = P256.w[0];
1237 // fraction f* > 10^(-x) <=> inexact
1238 // f* is in the right position to be compared with
1239 // 10^(-x) from ten2mk128[]
1240 if ((P256.w[1] > ten2mk128[ind - 1].w[1])
1241 || (P256.w[1] == ten2mk128[ind - 1].w[1]
1242 && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) {
1243 if (++res.w[0] == 0) {
1244 res.w[1]++;
1248 } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
1249 shift = shiftright128[ind - 1]; // 0 <= shift <= 102
1250 res.w[1] = (P256.w[3] >> shift);
1251 res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
1252 // if positive, the truncated value is already the correct result
1253 if (x_sign) { // if negative
1254 // redundant fstar.w[3] = 0;
1255 fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
1256 fstar.w[1] = P256.w[1];
1257 fstar.w[0] = P256.w[0];
1258 // fraction f* > 10^(-x) <=> inexact
1259 // f* is in the right position to be compared with
1260 // 10^(-x) from ten2mk128[]
1261 if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
1262 (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
1263 fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
1264 if (++res.w[0] == 0) {
1265 res.w[1]++;
1269 } else { // 22 <= ind - 1 <= 33
1270 shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38
1271 res.w[1] = 0;
1272 res.w[0] = P256.w[3] >> shift;
1273 // if positive, the truncated value is already the correct result
1274 if (x_sign) { // if negative
1275 fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
1276 fstar.w[2] = P256.w[2];
1277 fstar.w[1] = P256.w[1];
1278 fstar.w[0] = P256.w[0];
1279 // fraction f* > 10^(-x) <=> inexact
1280 // f* is in the right position to be compared with
1281 // 10^(-x) from ten2mk128[]
1282 if (fstar.w[3] || fstar.w[2]
1283 || fstar.w[1] > ten2mk128[ind - 1].w[1]
1284 || (fstar.w[1] == ten2mk128[ind - 1].w[1]
1285 && fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
1286 if (++res.w[0] == 0) {
1287 res.w[1]++;
1292 res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
1293 BID_RETURN (res);
1294 } else { // if exp < 0 and q + exp <= 0
1295 if (x_sign) { // negative rounds down to -1.0
1296 res.w[1] = 0xb040000000000000ull;
1297 res.w[0] = 0x0000000000000001ull;
1298 } else { // positive rpunds down to +0.0
1299 res.w[1] = 0x3040000000000000ull;
1300 res.w[0] = 0x0000000000000000ull;
1302 BID_RETURN (res);
1306 /*****************************************************************************
1307 * BID128_round_integral_positive
1308 ****************************************************************************/
1310 BID128_FUNCTION_ARG1_NORND (bid128_round_integral_positive, x)
1312 UINT128 res;
1313 UINT64 x_sign;
1314 UINT64 x_exp;
1315 int exp; // unbiased exponent
1316 // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo
1317 // (all are UINT64)
1318 BID_UI64DOUBLE tmp1;
1319 unsigned int x_nr_bits;
1320 int q, ind, shift;
1321 UINT128 C1;
1322 // UINT128 res is C* at first - represents up to 34 decimal digits ~
1323 // 113 bits
1324 UINT256 fstar;
1325 UINT256 P256;
1327 // check for NaN or Infinity
1328 if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
1329 // x is special
1330 if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
1331 // if x = NaN, then res = Q (x)
1332 // check first for non-canonical NaN payload
1333 if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
1334 (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
1335 (x.w[0] > 0x38c15b09ffffffffull))) {
1336 x.w[1] = x.w[1] & 0xffffc00000000000ull;
1337 x.w[0] = 0x0ull;
1339 if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN
1340 // set invalid flag
1341 *pfpsf |= INVALID_EXCEPTION;
1342 // return quiet (x)
1343 res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
1344 res.w[0] = x.w[0];
1345 } else { // x is QNaN
1346 // return x
1347 res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
1348 res.w[0] = x.w[0];
1350 BID_RETURN (res)
1351 } else { // x is not a NaN, so it must be infinity
1352 if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf
1353 // return +inf
1354 res.w[1] = 0x7800000000000000ull;
1355 res.w[0] = 0x0000000000000000ull;
1356 } else { // x is -inf
1357 // return -inf
1358 res.w[1] = 0xf800000000000000ull;
1359 res.w[0] = 0x0000000000000000ull;
1361 BID_RETURN (res);
1364 // unpack x
1365 x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
1366 C1.w[1] = x.w[1] & MASK_COEFF;
1367 C1.w[0] = x.w[0];
1369 // check for non-canonical values (treated as zero)
1370 if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
1371 // non-canonical
1372 x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
1373 C1.w[1] = 0; // significand high
1374 C1.w[0] = 0; // significand low
1375 } else { // G0_G1 != 11
1376 x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits
1377 if (C1.w[1] > 0x0001ed09bead87c0ull ||
1378 (C1.w[1] == 0x0001ed09bead87c0ull
1379 && C1.w[0] > 0x378d8e63ffffffffull)) {
1380 // x is non-canonical if coefficient is larger than 10^34 -1
1381 C1.w[1] = 0;
1382 C1.w[0] = 0;
1383 } else { // canonical
1388 // test for input equal to zero
1389 if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
1390 // x is 0
1391 // return 0 preserving the sign bit and the preferred exponent
1392 // of MAX(Q(x), 0)
1393 if (x_exp <= (0x1820ull << 49)) {
1394 res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
1395 } else {
1396 res.w[1] = x_sign | x_exp;
1398 res.w[0] = 0x0000000000000000ull;
1399 BID_RETURN (res);
1401 // x is not special and is not zero
1403 // if (exp <= -p) return -0.0 or +1.0
1404 if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34
1405 if (x_sign) {
1406 // if negative, return negative 0, because we know the coefficient
1407 // is non-zero (would have been caught above)
1408 res.w[1] = 0xb040000000000000ull;
1409 res.w[0] = 0x0000000000000000ull;
1410 } else {
1411 // if positive, return positive 1, because we know coefficient is
1412 // non-zero (would have been caught above)
1413 res.w[1] = 0x3040000000000000ull;
1414 res.w[0] = 0x0000000000000001ull;
1416 BID_RETURN (res);
1418 // q = nr. of decimal digits in x
1419 // determine first the nr. of bits in x
1420 if (C1.w[1] == 0) {
1421 if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53
1422 // split 64-bit value in two 32-bit halves to avoid rounding errors
1423 if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32
1424 tmp1.d = (double) (C1.w[0] >> 32); // exact conversion
1425 x_nr_bits =
1426 33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
1427 } else { // x < 2^32
1428 tmp1.d = (double) (C1.w[0]); // exact conversion
1429 x_nr_bits =
1430 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
1432 } else { // if x < 2^53
1433 tmp1.d = (double) C1.w[0]; // exact conversion
1434 x_nr_bits =
1435 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
1437 } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
1438 tmp1.d = (double) C1.w[1]; // exact conversion
1439 x_nr_bits =
1440 65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
1443 q = nr_digits[x_nr_bits - 1].digits;
1444 if (q == 0) {
1445 q = nr_digits[x_nr_bits - 1].digits1;
1446 if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
1447 (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
1448 C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
1449 q++;
1451 exp = (x_exp >> 49) - 6176;
1452 if (exp >= 0) { // -exp <= 0
1453 // the argument is an integer already
1454 res.w[1] = x.w[1];
1455 res.w[0] = x.w[0];
1456 BID_RETURN (res);
1457 } else if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q
1458 // need to shift right -exp digits from the coefficient; exp will be 0
1459 ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x'
1460 // (number of digits to be chopped off)
1461 // chop off ind digits from the lower part of C1
1462 // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
1463 // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
1464 // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
1465 // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
1466 // tmp64 = C1.w[0];
1467 // if (ind <= 19) {
1468 // C1.w[0] = C1.w[0] + midpoint64[ind - 1];
1469 // } else {
1470 // C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
1471 // C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
1472 // }
1473 // if (C1.w[0] < tmp64) C1.w[1]++;
1474 // if carry-out from C1.w[0], increment C1.w[1]
1475 // calculate C* and f*
1476 // C* is actually floor(C*) in this case
1477 // C* and f* need shifting and masking, as shown by
1478 // shiftright128[] and maskhigh128[]
1479 // 1 <= x <= 34
1480 // kx = 10^(-x) = ten2mk128[ind - 1]
1481 // C* = C1 * 10^(-x)
1482 // the approximation of 10^(-x) was rounded up to 118 bits
1483 __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
1484 if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
1485 res.w[1] = P256.w[3];
1486 res.w[0] = P256.w[2];
1487 // if negative, the truncated value is already the correct result
1488 if (!x_sign) { // if positive
1489 // redundant fstar.w[3] = 0;
1490 // redundant fstar.w[2] = 0;
1491 // redundant fstar.w[1] = P256.w[1];
1492 // redundant fstar.w[0] = P256.w[0];
1493 // fraction f* > 10^(-x) <=> inexact
1494 // f* is in the right position to be compared with
1495 // 10^(-x) from ten2mk128[]
1496 if ((P256.w[1] > ten2mk128[ind - 1].w[1])
1497 || (P256.w[1] == ten2mk128[ind - 1].w[1]
1498 && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) {
1499 if (++res.w[0] == 0) {
1500 res.w[1]++;
1504 } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
1505 shift = shiftright128[ind - 1]; // 3 <= shift <= 63
1506 res.w[1] = (P256.w[3] >> shift);
1507 res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
1508 // if negative, the truncated value is already the correct result
1509 if (!x_sign) { // if positive
1510 // redundant fstar.w[3] = 0;
1511 fstar.w[2] = P256.w[2] & maskhigh128[ind - 1];
1512 fstar.w[1] = P256.w[1];
1513 fstar.w[0] = P256.w[0];
1514 // fraction f* > 10^(-x) <=> inexact
1515 // f* is in the right position to be compared with
1516 // 10^(-x) from ten2mk128[]
1517 if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] ||
1518 (fstar.w[1] == ten2mk128[ind - 1].w[1] &&
1519 fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
1520 if (++res.w[0] == 0) {
1521 res.w[1]++;
1525 } else { // 22 <= ind - 1 <= 33
1526 shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38
1527 res.w[1] = 0;
1528 res.w[0] = P256.w[3] >> shift;
1529 // if negative, the truncated value is already the correct result
1530 if (!x_sign) { // if positive
1531 fstar.w[3] = P256.w[3] & maskhigh128[ind - 1];
1532 fstar.w[2] = P256.w[2];
1533 fstar.w[1] = P256.w[1];
1534 fstar.w[0] = P256.w[0];
1535 // fraction f* > 10^(-x) <=> inexact
1536 // f* is in the right position to be compared with
1537 // 10^(-x) from ten2mk128[]
1538 if (fstar.w[3] || fstar.w[2]
1539 || fstar.w[1] > ten2mk128[ind - 1].w[1]
1540 || (fstar.w[1] == ten2mk128[ind - 1].w[1]
1541 && fstar.w[0] >= ten2mk128[ind - 1].w[0])) {
1542 if (++res.w[0] == 0) {
1543 res.w[1]++;
1548 res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
1549 BID_RETURN (res);
1550 } else { // if exp < 0 and q + exp <= 0
1551 if (x_sign) { // negative rounds up to -0.0
1552 res.w[1] = 0xb040000000000000ull;
1553 res.w[0] = 0x0000000000000000ull;
1554 } else { // positive rpunds up to +1.0
1555 res.w[1] = 0x3040000000000000ull;
1556 res.w[0] = 0x0000000000000001ull;
1558 BID_RETURN (res);
1562 /*****************************************************************************
1563 * BID128_round_integral_zero
1564 ****************************************************************************/
1566 BID128_FUNCTION_ARG1_NORND (bid128_round_integral_zero, x)
1568 UINT128 res;
1569 UINT64 x_sign;
1570 UINT64 x_exp;
1571 int exp; // unbiased exponent
1572 // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo
1573 // (all are UINT64)
1574 BID_UI64DOUBLE tmp1;
1575 unsigned int x_nr_bits;
1576 int q, ind, shift;
1577 UINT128 C1;
1578 // UINT128 res is C* at first - represents up to 34 decimal digits ~
1579 // 113 bits
1580 UINT256 P256;
1582 // check for NaN or Infinity
1583 if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
1584 // x is special
1585 if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
1586 // if x = NaN, then res = Q (x)
1587 // check first for non-canonical NaN payload
1588 if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
1589 (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
1590 (x.w[0] > 0x38c15b09ffffffffull))) {
1591 x.w[1] = x.w[1] & 0xffffc00000000000ull;
1592 x.w[0] = 0x0ull;
1594 if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN
1595 // set invalid flag
1596 *pfpsf |= INVALID_EXCEPTION;
1597 // return quiet (x)
1598 res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
1599 res.w[0] = x.w[0];
1600 } else { // x is QNaN
1601 // return x
1602 res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
1603 res.w[0] = x.w[0];
1605 BID_RETURN (res)
1606 } else { // x is not a NaN, so it must be infinity
1607 if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf
1608 // return +inf
1609 res.w[1] = 0x7800000000000000ull;
1610 res.w[0] = 0x0000000000000000ull;
1611 } else { // x is -inf
1612 // return -inf
1613 res.w[1] = 0xf800000000000000ull;
1614 res.w[0] = 0x0000000000000000ull;
1616 BID_RETURN (res);
1619 // unpack x
1620 x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
1621 C1.w[1] = x.w[1] & MASK_COEFF;
1622 C1.w[0] = x.w[0];
1624 // check for non-canonical values (treated as zero)
1625 if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
1626 // non-canonical
1627 x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
1628 C1.w[1] = 0; // significand high
1629 C1.w[0] = 0; // significand low
1630 } else { // G0_G1 != 11
1631 x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits
1632 if (C1.w[1] > 0x0001ed09bead87c0ull ||
1633 (C1.w[1] == 0x0001ed09bead87c0ull
1634 && C1.w[0] > 0x378d8e63ffffffffull)) {
1635 // x is non-canonical if coefficient is larger than 10^34 -1
1636 C1.w[1] = 0;
1637 C1.w[0] = 0;
1638 } else { // canonical
1643 // test for input equal to zero
1644 if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
1645 // x is 0
1646 // return 0 preserving the sign bit and the preferred exponent
1647 // of MAX(Q(x), 0)
1648 if (x_exp <= (0x1820ull << 49)) {
1649 res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
1650 } else {
1651 res.w[1] = x_sign | x_exp;
1653 res.w[0] = 0x0000000000000000ull;
1654 BID_RETURN (res);
1656 // x is not special and is not zero
1658 // if (exp <= -p) return -0.0 or +0.0
1659 if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34
1660 res.w[1] = x_sign | 0x3040000000000000ull;
1661 res.w[0] = 0x0000000000000000ull;
1662 BID_RETURN (res);
1664 // q = nr. of decimal digits in x
1665 // determine first the nr. of bits in x
1666 if (C1.w[1] == 0) {
1667 if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53
1668 // split the 64-bit value in two 32-bit halves to avoid rounding errors
1669 if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32
1670 tmp1.d = (double) (C1.w[0] >> 32); // exact conversion
1671 x_nr_bits =
1672 33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
1673 } else { // x < 2^32
1674 tmp1.d = (double) (C1.w[0]); // exact conversion
1675 x_nr_bits =
1676 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
1678 } else { // if x < 2^53
1679 tmp1.d = (double) C1.w[0]; // exact conversion
1680 x_nr_bits =
1681 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
1683 } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
1684 tmp1.d = (double) C1.w[1]; // exact conversion
1685 x_nr_bits =
1686 65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
1689 q = nr_digits[x_nr_bits - 1].digits;
1690 if (q == 0) {
1691 q = nr_digits[x_nr_bits - 1].digits1;
1692 if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
1693 (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
1694 C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
1695 q++;
1697 exp = (x_exp >> 49) - 6176;
1698 if (exp >= 0) { // -exp <= 0
1699 // the argument is an integer already
1700 res.w[1] = x.w[1];
1701 res.w[0] = x.w[0];
1702 BID_RETURN (res);
1703 } else if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q
1704 // need to shift right -exp digits from the coefficient; the exp will be 0
1705 ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x'
1706 // (number of digits to be chopped off)
1707 // chop off ind digits from the lower part of C1
1708 // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate
1709 // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP
1710 // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE
1711 // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE
1712 //tmp64 = C1.w[0];
1713 // if (ind <= 19) {
1714 // C1.w[0] = C1.w[0] + midpoint64[ind - 1];
1715 // } else {
1716 // C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
1717 // C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
1718 // }
1719 // if (C1.w[0] < tmp64) C1.w[1]++;
1720 // if carry-out from C1.w[0], increment C1.w[1]
1721 // calculate C* and f*
1722 // C* is actually floor(C*) in this case
1723 // C* and f* need shifting and masking, as shown by
1724 // shiftright128[] and maskhigh128[]
1725 // 1 <= x <= 34
1726 // kx = 10^(-x) = ten2mk128[ind - 1]
1727 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
1728 // the approximation of 10^(-x) was rounded up to 118 bits
1729 __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
1730 if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
1731 res.w[1] = P256.w[3];
1732 res.w[0] = P256.w[2];
1733 } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
1734 shift = shiftright128[ind - 1]; // 3 <= shift <= 63
1735 res.w[1] = (P256.w[3] >> shift);
1736 res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
1737 } else { // 22 <= ind - 1 <= 33
1738 shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38
1739 res.w[1] = 0;
1740 res.w[0] = P256.w[3] >> shift;
1742 res.w[1] = x_sign | 0x3040000000000000ull | res.w[1];
1743 BID_RETURN (res);
1744 } else { // if exp < 0 and q + exp <= 0 the result is +0 or -0
1745 res.w[1] = x_sign | 0x3040000000000000ull;
1746 res.w[0] = 0x0000000000000000ull;
1747 BID_RETURN (res);
1751 /*****************************************************************************
1752 * BID128_round_integral_nearest_away
1753 ****************************************************************************/
1755 BID128_FUNCTION_ARG1_NORND (bid128_round_integral_nearest_away, x)
1757 UINT128 res;
1758 UINT64 x_sign;
1759 UINT64 x_exp;
1760 int exp; // unbiased exponent
1761 // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo
1762 // (all are UINT64)
1763 UINT64 tmp64;
1764 BID_UI64DOUBLE tmp1;
1765 unsigned int x_nr_bits;
1766 int q, ind, shift;
1767 UINT128 C1;
1768 // UINT128 res is C* at first - represents up to 34 decimal digits ~
1769 // 113 bits
1770 // UINT256 fstar;
1771 UINT256 P256;
1773 // check for NaN or Infinity
1774 if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) {
1775 // x is special
1776 if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
1777 // if x = NaN, then res = Q (x)
1778 // check first for non-canonical NaN payload
1779 if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
1780 (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
1781 (x.w[0] > 0x38c15b09ffffffffull))) {
1782 x.w[1] = x.w[1] & 0xffffc00000000000ull;
1783 x.w[0] = 0x0ull;
1785 if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN
1786 // set invalid flag
1787 *pfpsf |= INVALID_EXCEPTION;
1788 // return quiet (x)
1789 res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
1790 res.w[0] = x.w[0];
1791 } else { // x is QNaN
1792 // return x
1793 res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
1794 res.w[0] = x.w[0];
1796 BID_RETURN (res)
1797 } else { // x is not a NaN, so it must be infinity
1798 if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf
1799 // return +inf
1800 res.w[1] = 0x7800000000000000ull;
1801 res.w[0] = 0x0000000000000000ull;
1802 } else { // x is -inf
1803 // return -inf
1804 res.w[1] = 0xf800000000000000ull;
1805 res.w[0] = 0x0000000000000000ull;
1807 BID_RETURN (res);
1810 // unpack x
1811 x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
1812 C1.w[1] = x.w[1] & MASK_COEFF;
1813 C1.w[0] = x.w[0];
1815 // check for non-canonical values (treated as zero)
1816 if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
1817 // non-canonical
1818 x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
1819 C1.w[1] = 0; // significand high
1820 C1.w[0] = 0; // significand low
1821 } else { // G0_G1 != 11
1822 x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits
1823 if (C1.w[1] > 0x0001ed09bead87c0ull ||
1824 (C1.w[1] == 0x0001ed09bead87c0ull
1825 && C1.w[0] > 0x378d8e63ffffffffull)) {
1826 // x is non-canonical if coefficient is larger than 10^34 -1
1827 C1.w[1] = 0;
1828 C1.w[0] = 0;
1829 } else { // canonical
1834 // test for input equal to zero
1835 if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) {
1836 // x is 0
1837 // return 0 preserving the sign bit and the preferred exponent
1838 // of MAX(Q(x), 0)
1839 if (x_exp <= (0x1820ull << 49)) {
1840 res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull;
1841 } else {
1842 res.w[1] = x_sign | x_exp;
1844 res.w[0] = 0x0000000000000000ull;
1845 BID_RETURN (res);
1847 // x is not special and is not zero
1849 // if (exp <= -(p+1)) return 0.0
1850 if (x_exp <= 0x2ffa000000000000ull) { // 0x2ffa000000000000ull == -35
1851 res.w[1] = x_sign | 0x3040000000000000ull;
1852 res.w[0] = 0x0000000000000000ull;
1853 BID_RETURN (res);
1855 // q = nr. of decimal digits in x
1856 // determine first the nr. of bits in x
1857 if (C1.w[1] == 0) {
1858 if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53
1859 // split the 64-bit value in two 32-bit halves to avoid rounding errors
1860 if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32
1861 tmp1.d = (double) (C1.w[0] >> 32); // exact conversion
1862 x_nr_bits =
1863 33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
1864 } else { // x < 2^32
1865 tmp1.d = (double) (C1.w[0]); // exact conversion
1866 x_nr_bits =
1867 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
1869 } else { // if x < 2^53
1870 tmp1.d = (double) C1.w[0]; // exact conversion
1871 x_nr_bits =
1872 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
1874 } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
1875 tmp1.d = (double) C1.w[1]; // exact conversion
1876 x_nr_bits =
1877 65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
1880 q = nr_digits[x_nr_bits - 1].digits;
1881 if (q == 0) {
1882 q = nr_digits[x_nr_bits - 1].digits1;
1883 if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
1884 (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
1885 C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
1886 q++;
1888 exp = (x_exp >> 49) - 6176;
1889 if (exp >= 0) { // -exp <= 0
1890 // the argument is an integer already
1891 res.w[1] = x.w[1];
1892 res.w[0] = x.w[0];
1893 BID_RETURN (res);
1894 } else if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q
1895 // need to shift right -exp digits from the coefficient; the exp will be 0
1896 ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x'
1897 // chop off ind digits from the lower part of C1
1898 // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits
1899 tmp64 = C1.w[0];
1900 if (ind <= 19) {
1901 C1.w[0] = C1.w[0] + midpoint64[ind - 1];
1902 } else {
1903 C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0];
1904 C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1];
1906 if (C1.w[0] < tmp64)
1907 C1.w[1]++;
1908 // calculate C* and f*
1909 // C* is actually floor(C*) in this case
1910 // C* and f* need shifting and masking, as shown by
1911 // shiftright128[] and maskhigh128[]
1912 // 1 <= x <= 34
1913 // kx = 10^(-x) = ten2mk128[ind - 1]
1914 // C* = (C1 + 1/2 * 10^x) * 10^(-x)
1915 // the approximation of 10^(-x) was rounded up to 118 bits
1916 __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]);
1917 // the top Ex bits of 10^(-x) are T* = ten2mk128trunc[ind], e.g.
1918 // if x=1, T*=ten2mk128trunc[0]=0x19999999999999999999999999999999
1919 // if (0 < f* < 10^(-x)) then the result is a midpoint
1920 // if floor(C*) is even then C* = floor(C*) - logical right
1921 // shift; C* has p decimal digits, correct by Prop. 1)
1922 // else if floor(C*) is odd C* = floor(C*)-1 (logical right
1923 // shift; C* has p decimal digits, correct by Pr. 1)
1924 // else
1925 // C* = floor(C*) (logical right shift; C has p decimal digits,
1926 // correct by Property 1)
1927 // n = C* * 10^(e+x)
1929 // shift right C* by Ex-128 = shiftright128[ind]
1930 if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0
1931 res.w[1] = P256.w[3];
1932 res.w[0] = P256.w[2];
1933 } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63
1934 shift = shiftright128[ind - 1]; // 3 <= shift <= 63
1935 res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift);
1936 res.w[1] = (P256.w[3] >> shift);
1937 } else { // 22 <= ind - 1 <= 33
1938 shift = shiftright128[ind - 1]; // 2 <= shift <= 38
1939 res.w[1] = 0;
1940 res.w[0] = (P256.w[3] >> (shift - 64)); // 2 <= shift - 64 <= 38
1942 // if the result was a midpoint, it was already rounded away from zero
1943 res.w[1] |= x_sign | 0x3040000000000000ull;
1944 BID_RETURN (res);
1945 } else { // if ((q + exp) < 0) <=> q < -exp
1946 // the result is +0 or -0
1947 res.w[1] = x_sign | 0x3040000000000000ull;
1948 res.w[0] = 0x0000000000000000ull;
1949 BID_RETURN (res);