Reverting merge from trunk
[official-gcc.git] / libgcc / config / libbid / bid128_add.c
blob1411672ea7f47c42a5489aea507bc703e69da195
1 /* Copyright (C) 2007-2013 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 3, or (at your option) any later
8 version.
10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 for more details.
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
24 #include "bid_internal.h"
27 #if DECIMAL_CALL_BY_REFERENCE
28 void
29 bid64dq_add (UINT64 * pres, UINT64 * px, UINT128 * py
30 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
31 _EXC_INFO_PARAM) {
32 UINT64 x = *px;
33 #if !DECIMAL_GLOBAL_ROUNDING
34 unsigned int rnd_mode = *prnd_mode;
35 #endif
36 #else
37 UINT64
38 bid64dq_add (UINT64 x, UINT128 y
39 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
40 _EXC_INFO_PARAM) {
41 #endif
42 UINT64 res = 0xbaddbaddbaddbaddull;
43 UINT128 x1;
45 #if DECIMAL_CALL_BY_REFERENCE
46 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
47 bid64qq_add (&res, &x1, py
48 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
49 _EXC_INFO_ARG);
50 #else
51 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
52 res = bid64qq_add (x1, y
53 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
54 _EXC_INFO_ARG);
55 #endif
56 BID_RETURN (res);
60 #if DECIMAL_CALL_BY_REFERENCE
61 void
62 bid64qd_add (UINT64 * pres, UINT128 * px, UINT64 * py
63 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
64 _EXC_INFO_PARAM) {
65 UINT64 y = *py;
66 #if !DECIMAL_GLOBAL_ROUNDING
67 unsigned int rnd_mode = *prnd_mode;
68 #endif
69 #else
70 UINT64
71 bid64qd_add (UINT128 x, UINT64 y
72 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
73 _EXC_INFO_PARAM) {
74 #endif
75 UINT64 res = 0xbaddbaddbaddbaddull;
76 UINT128 y1;
78 #if DECIMAL_CALL_BY_REFERENCE
79 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
80 bid64qq_add (&res, px, &y1
81 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
82 _EXC_INFO_ARG);
83 #else
84 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
85 res = bid64qq_add (x, y1
86 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
87 _EXC_INFO_ARG);
88 #endif
89 BID_RETURN (res);
93 #if DECIMAL_CALL_BY_REFERENCE
94 void
95 bid64qq_add (UINT64 * pres, UINT128 * px, UINT128 * py
96 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
97 _EXC_INFO_PARAM) {
98 UINT128 x = *px, y = *py;
99 #if !DECIMAL_GLOBAL_ROUNDING
100 unsigned int rnd_mode = *prnd_mode;
101 #endif
102 #else
103 UINT64
104 bid64qq_add (UINT128 x, UINT128 y
105 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
106 _EXC_INFO_PARAM) {
107 #endif
109 UINT128 one = { {0x0000000000000001ull, 0x3040000000000000ull}
111 UINT64 res = 0xbaddbaddbaddbaddull;
113 BID_SWAP128 (one);
114 #if DECIMAL_CALL_BY_REFERENCE
115 bid64qqq_fma (&res, &one, &x, &y
116 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
117 _EXC_INFO_ARG);
118 #else
119 res = bid64qqq_fma (one, x, y
120 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
121 _EXC_INFO_ARG);
122 #endif
123 BID_RETURN (res);
127 #if DECIMAL_CALL_BY_REFERENCE
128 void
129 bid128dd_add (UINT128 * pres, UINT64 * px, UINT64 * py
130 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
131 _EXC_INFO_PARAM) {
132 UINT64 x = *px, y = *py;
133 #if !DECIMAL_GLOBAL_ROUNDING
134 unsigned int rnd_mode = *prnd_mode;
135 #endif
136 #else
137 UINT128
138 bid128dd_add (UINT64 x, UINT64 y
139 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
140 _EXC_INFO_PARAM) {
141 #endif
142 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull}
144 UINT128 x1, y1;
146 #if DECIMAL_CALL_BY_REFERENCE
147 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
148 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
149 bid128_add (&res, &x1, &y1
150 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
151 _EXC_INFO_ARG);
152 #else
153 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
154 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
155 res = bid128_add (x1, y1
156 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
157 _EXC_INFO_ARG);
158 #endif
159 BID_RETURN (res);
163 #if DECIMAL_CALL_BY_REFERENCE
164 void
165 bid128dq_add (UINT128 * pres, UINT64 * px, UINT128 * py
166 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
167 _EXC_INFO_PARAM) {
168 UINT64 x = *px;
169 #if !DECIMAL_GLOBAL_ROUNDING
170 unsigned int rnd_mode = *prnd_mode;
171 #endif
172 #else
173 UINT128
174 bid128dq_add (UINT64 x, UINT128 y
175 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
176 _EXC_INFO_PARAM) {
177 #endif
178 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull}
180 UINT128 x1;
182 #if DECIMAL_CALL_BY_REFERENCE
183 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
184 bid128_add (&res, &x1, py
185 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
186 _EXC_INFO_ARG);
187 #else
188 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
189 res = bid128_add (x1, y
190 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
191 _EXC_INFO_ARG);
192 #endif
193 BID_RETURN (res);
197 #if DECIMAL_CALL_BY_REFERENCE
198 void
199 bid128qd_add (UINT128 * pres, UINT128 * px, UINT64 * py
200 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
201 _EXC_INFO_PARAM) {
202 UINT64 y = *py;
203 #if !DECIMAL_GLOBAL_ROUNDING
204 unsigned int rnd_mode = *prnd_mode;
205 #endif
206 #else
207 UINT128
208 bid128qd_add (UINT128 x, UINT64 y
209 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
210 _EXC_INFO_PARAM) {
211 #endif
212 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull}
214 UINT128 y1;
216 #if DECIMAL_CALL_BY_REFERENCE
217 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
218 bid128_add (&res, px, &y1
219 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
220 _EXC_INFO_ARG);
221 #else
222 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
223 res = bid128_add (x, y1
224 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
225 _EXC_INFO_ARG);
226 #endif
227 BID_RETURN (res);
231 // bid128_add stands for bid128qq_add
234 /*****************************************************************************
235 * BID64/BID128 sub
236 ****************************************************************************/
238 #if DECIMAL_CALL_BY_REFERENCE
239 void
240 bid64dq_sub (UINT64 * pres, UINT64 * px, UINT128 * py
241 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
242 _EXC_INFO_PARAM) {
243 UINT64 x = *px;
244 #if !DECIMAL_GLOBAL_ROUNDING
245 unsigned int rnd_mode = *prnd_mode;
246 #endif
247 #else
248 UINT64
249 bid64dq_sub (UINT64 x, UINT128 y
250 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
251 _EXC_INFO_PARAM) {
252 #endif
253 UINT64 res = 0xbaddbaddbaddbaddull;
254 UINT128 x1;
256 #if DECIMAL_CALL_BY_REFERENCE
257 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
258 bid64qq_sub (&res, &x1, py
259 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
260 _EXC_INFO_ARG);
261 #else
262 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
263 res = bid64qq_sub (x1, y
264 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
265 _EXC_INFO_ARG);
266 #endif
267 BID_RETURN (res);
271 #if DECIMAL_CALL_BY_REFERENCE
272 void
273 bid64qd_sub (UINT64 * pres, UINT128 * px, UINT64 * py
274 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
275 _EXC_INFO_PARAM) {
276 UINT64 y = *py;
277 #if !DECIMAL_GLOBAL_ROUNDING
278 unsigned int rnd_mode = *prnd_mode;
279 #endif
280 #else
281 UINT64
282 bid64qd_sub (UINT128 x, UINT64 y
283 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
284 _EXC_INFO_PARAM) {
285 #endif
286 UINT64 res = 0xbaddbaddbaddbaddull;
287 UINT128 y1;
289 #if DECIMAL_CALL_BY_REFERENCE
290 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
291 bid64qq_sub (&res, px, &y1
292 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
293 _EXC_INFO_ARG);
294 #else
295 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
296 res = bid64qq_sub (x, y1
297 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
298 _EXC_INFO_ARG);
299 #endif
300 BID_RETURN (res);
304 #if DECIMAL_CALL_BY_REFERENCE
305 void
306 bid64qq_sub (UINT64 * pres, UINT128 * px, UINT128 * py
307 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
308 _EXC_INFO_PARAM) {
309 UINT128 x = *px, y = *py;
310 #if !DECIMAL_GLOBAL_ROUNDING
311 unsigned int rnd_mode = *prnd_mode;
312 #endif
313 #else
314 UINT64
315 bid64qq_sub (UINT128 x, UINT128 y
316 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
317 _EXC_INFO_PARAM) {
318 #endif
320 UINT128 one = { {0x0000000000000001ull, 0x3040000000000000ull}
322 UINT64 res = 0xbaddbaddbaddbaddull;
323 UINT64 y_sign;
325 BID_SWAP128 (one);
326 if ((y.w[HIGH_128W] & MASK_NAN) != MASK_NAN) { // y is not NAN
327 // change its sign
328 y_sign = y.w[HIGH_128W] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
329 if (y_sign)
330 y.w[HIGH_128W] = y.w[HIGH_128W] & 0x7fffffffffffffffull;
331 else
332 y.w[HIGH_128W] = y.w[HIGH_128W] | 0x8000000000000000ull;
334 #if DECIMAL_CALL_BY_REFERENCE
335 bid64qqq_fma (&res, &one, &x, &y
336 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
337 _EXC_INFO_ARG);
338 #else
339 res = bid64qqq_fma (one, x, y
340 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
341 _EXC_INFO_ARG);
342 #endif
343 BID_RETURN (res);
347 #if DECIMAL_CALL_BY_REFERENCE
348 void
349 bid128dd_sub (UINT128 * pres, UINT64 * px, UINT64 * py
350 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
351 _EXC_INFO_PARAM) {
352 UINT64 x = *px, y = *py;
353 #if !DECIMAL_GLOBAL_ROUNDING
354 unsigned int rnd_mode = *prnd_mode;
355 #endif
356 #else
357 UINT128
358 bid128dd_sub (UINT64 x, UINT64 y
359 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
360 _EXC_INFO_PARAM) {
361 #endif
362 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull}
364 UINT128 x1, y1;
366 #if DECIMAL_CALL_BY_REFERENCE
367 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
368 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
369 bid128_sub (&res, &x1, &y1
370 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
371 _EXC_INFO_ARG);
372 #else
373 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
374 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
375 res = bid128_sub (x1, y1
376 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
377 _EXC_INFO_ARG);
378 #endif
379 BID_RETURN (res);
383 #if DECIMAL_CALL_BY_REFERENCE
384 void
385 bid128dq_sub (UINT128 * pres, UINT64 * px, UINT128 * py
386 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
387 _EXC_INFO_PARAM) {
388 UINT64 x = *px;
389 #if !DECIMAL_GLOBAL_ROUNDING
390 unsigned int rnd_mode = *prnd_mode;
391 #endif
392 #else
393 UINT128
394 bid128dq_sub (UINT64 x, UINT128 y
395 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
396 _EXC_INFO_PARAM) {
397 #endif
398 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull}
400 UINT128 x1;
402 #if DECIMAL_CALL_BY_REFERENCE
403 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
404 bid128_sub (&res, &x1, py
405 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
406 _EXC_INFO_ARG);
407 #else
408 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
409 res = bid128_sub (x1, y
410 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
411 _EXC_INFO_ARG);
412 #endif
413 BID_RETURN (res);
417 #if DECIMAL_CALL_BY_REFERENCE
418 void
419 bid128qd_sub (UINT128 * pres, UINT128 * px, UINT64 * py
420 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
421 _EXC_INFO_PARAM) {
422 UINT64 y = *py;
423 #if !DECIMAL_GLOBAL_ROUNDING
424 unsigned int rnd_mode = *prnd_mode;
425 #endif
426 #else
427 UINT128
428 bid128qd_sub (UINT128 x, UINT64 y
429 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
430 _EXC_INFO_PARAM) {
431 #endif
432 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull}
434 UINT128 y1;
436 #if DECIMAL_CALL_BY_REFERENCE
437 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
438 bid128_sub (&res, px, &y1
439 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
440 _EXC_INFO_ARG);
441 #else
442 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
443 res = bid128_sub (x, y1
444 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
445 _EXC_INFO_ARG);
446 #endif
447 BID_RETURN (res);
450 #if DECIMAL_CALL_BY_REFERENCE
451 void
452 bid128_add (UINT128 * pres, UINT128 * px, UINT128 * py
453 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
454 _EXC_INFO_PARAM) {
455 UINT128 x = *px, y = *py;
456 #if !DECIMAL_GLOBAL_ROUNDING
457 unsigned int rnd_mode = *prnd_mode;
458 #endif
459 #else
460 UINT128
461 bid128_add (UINT128 x, UINT128 y
462 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
463 _EXC_INFO_PARAM) {
464 #endif
465 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull}
467 UINT64 x_sign, y_sign, tmp_sign;
468 UINT64 x_exp, y_exp, tmp_exp; // e1 = x_exp, e2 = y_exp
469 UINT64 C1_hi, C2_hi, tmp_signif_hi;
470 UINT64 C1_lo, C2_lo, tmp_signif_lo;
471 // Note: C1.w[1], C1.w[0] represent C1_hi, C1_lo (all UINT64)
472 // Note: C2.w[1], C2.w[0] represent C2_hi, C2_lo (all UINT64)
473 UINT64 tmp64, tmp64A, tmp64B;
474 BID_UI64DOUBLE tmp1, tmp2;
475 int x_nr_bits, y_nr_bits;
476 int q1, q2, delta, scale, x1, ind, shift, tmp_inexact = 0;
477 UINT64 halfulp64;
478 UINT128 halfulp128;
479 UINT128 C1, C2;
480 UINT128 ten2m1;
481 UINT128 highf2star; // top 128 bits in f2*; low 128 bits in R256[1], R256[0]
482 UINT256 P256, Q256, R256;
483 int is_inexact = 0, is_midpoint_lt_even = 0, is_midpoint_gt_even = 0;
484 int is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
485 int second_pass = 0;
487 BID_SWAP128 (x);
488 BID_SWAP128 (y);
489 x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
490 y_sign = y.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
492 // check for NaN or Infinity
493 if (((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL)
494 || ((y.w[1] & MASK_SPECIAL) == MASK_SPECIAL)) {
495 // x is special or y is special
496 if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
497 // check first for non-canonical NaN payload
498 if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
499 (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull)
500 && (x.w[0] > 0x38c15b09ffffffffull))) {
501 x.w[1] = x.w[1] & 0xffffc00000000000ull;
502 x.w[0] = 0x0ull;
504 if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN
505 // set invalid flag
506 *pfpsf |= INVALID_EXCEPTION;
507 // return quiet (x)
508 res.w[1] = x.w[1] & 0xfc003fffffffffffull;
509 // clear out also G[6]-G[16]
510 res.w[0] = x.w[0];
511 } else { // x is QNaN
512 // return x
513 res.w[1] = x.w[1] & 0xfc003fffffffffffull;
514 // clear out G[6]-G[16]
515 res.w[0] = x.w[0];
516 // if y = SNaN signal invalid exception
517 if ((y.w[1] & MASK_SNAN) == MASK_SNAN) {
518 // set invalid flag
519 *pfpsf |= INVALID_EXCEPTION;
522 BID_SWAP128 (res);
523 BID_RETURN (res);
524 } else if ((y.w[1] & MASK_NAN) == MASK_NAN) { // y is NAN
525 // check first for non-canonical NaN payload
526 if (((y.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
527 (((y.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull)
528 && (y.w[0] > 0x38c15b09ffffffffull))) {
529 y.w[1] = y.w[1] & 0xffffc00000000000ull;
530 y.w[0] = 0x0ull;
532 if ((y.w[1] & MASK_SNAN) == MASK_SNAN) { // y is SNAN
533 // set invalid flag
534 *pfpsf |= INVALID_EXCEPTION;
535 // return quiet (y)
536 res.w[1] = y.w[1] & 0xfc003fffffffffffull;
537 // clear out also G[6]-G[16]
538 res.w[0] = y.w[0];
539 } else { // y is QNaN
540 // return y
541 res.w[1] = y.w[1] & 0xfc003fffffffffffull;
542 // clear out G[6]-G[16]
543 res.w[0] = y.w[0];
545 BID_SWAP128 (res);
546 BID_RETURN (res);
547 } else { // neither x not y is NaN; at least one is infinity
548 if ((x.w[1] & MASK_ANY_INF) == MASK_INF) { // x is infinity
549 if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y is infinity
550 // if same sign, return either of them
551 if ((x.w[1] & MASK_SIGN) == (y.w[1] & MASK_SIGN)) {
552 res.w[1] = x_sign | MASK_INF;
553 res.w[0] = 0x0ull;
554 } else { // x and y are infinities of opposite signs
555 // set invalid flag
556 *pfpsf |= INVALID_EXCEPTION;
557 // return QNaN Indefinite
558 res.w[1] = 0x7c00000000000000ull;
559 res.w[0] = 0x0000000000000000ull;
561 } else { // y is 0 or finite
562 // return x
563 res.w[1] = x_sign | MASK_INF;
564 res.w[0] = 0x0ull;
566 } else { // x is not NaN or infinity, so y must be infinity
567 res.w[1] = y_sign | MASK_INF;
568 res.w[0] = 0x0ull;
570 BID_SWAP128 (res);
571 BID_RETURN (res);
574 // unpack the arguments
576 // unpack x
577 C1_hi = x.w[1] & MASK_COEFF;
578 C1_lo = x.w[0];
579 // test for non-canonical values:
580 // - values whose encoding begins with x00, x01, or x10 and whose
581 // coefficient is larger than 10^34 -1, or
582 // - values whose encoding begins with x1100, x1101, x1110 (if NaNs
583 // and infinitis were eliminated already this test is reduced to
584 // checking for x10x)
586 // x is not infinity; check for non-canonical values - treated as zero
587 if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) {
588 // G0_G1=11; non-canonical
589 x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
590 C1_hi = 0; // significand high
591 C1_lo = 0; // significand low
592 } else { // G0_G1 != 11
593 x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits
594 if (C1_hi > 0x0001ed09bead87c0ull ||
595 (C1_hi == 0x0001ed09bead87c0ull
596 && C1_lo > 0x378d8e63ffffffffull)) {
597 // x is non-canonical if coefficient is larger than 10^34 -1
598 C1_hi = 0;
599 C1_lo = 0;
600 } else { // canonical
605 // unpack y
606 C2_hi = y.w[1] & MASK_COEFF;
607 C2_lo = y.w[0];
608 // y is not infinity; check for non-canonical values - treated as zero
609 if ((y.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) {
610 // G0_G1=11; non-canonical
611 y_exp = (y.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
612 C2_hi = 0; // significand high
613 C2_lo = 0; // significand low
614 } else { // G0_G1 != 11
615 y_exp = y.w[1] & MASK_EXP; // biased and shifted left 49 bits
616 if (C2_hi > 0x0001ed09bead87c0ull ||
617 (C2_hi == 0x0001ed09bead87c0ull
618 && C2_lo > 0x378d8e63ffffffffull)) {
619 // y is non-canonical if coefficient is larger than 10^34 -1
620 C2_hi = 0;
621 C2_lo = 0;
622 } else { // canonical
627 if ((C1_hi == 0x0ull) && (C1_lo == 0x0ull)) {
628 // x is 0 and y is not special
629 // if y is 0 return 0 with the smaller exponent
630 if ((C2_hi == 0x0ull) && (C2_lo == 0x0ull)) {
631 if (x_exp < y_exp)
632 res.w[1] = x_exp;
633 else
634 res.w[1] = y_exp;
635 if (x_sign && y_sign)
636 res.w[1] = res.w[1] | x_sign; // both negative
637 else if (rnd_mode == ROUNDING_DOWN && x_sign != y_sign)
638 res.w[1] = res.w[1] | 0x8000000000000000ull; // -0
639 // else; // res = +0
640 res.w[0] = 0;
641 } else {
642 // for 0 + y return y, with the preferred exponent
643 if (y_exp <= x_exp) {
644 res.w[1] = y.w[1];
645 res.w[0] = y.w[0];
646 } else { // if y_exp > x_exp
647 // return (C2 * 10^scale) * 10^(y_exp - scale)
648 // where scale = min (P34-q2, y_exp-x_exp)
649 // determine q2 = nr. of decimal digits in y
650 // determine first the nr. of bits in y (y_nr_bits)
652 if (C2_hi == 0) { // y_bits is the nr. of bits in C2_lo
653 if (C2_lo >= 0x0020000000000000ull) { // y >= 2^53
654 // split the 64-bit value in two 32-bit halves to avoid
655 // rounding errors
656 if (C2_lo >= 0x0000000100000000ull) { // y >= 2^32
657 tmp2.d = (double) (C2_lo >> 32); // exact conversion
658 y_nr_bits =
659 32 +
660 ((((unsigned int) (tmp2.ui64 >> 52)) & 0x7ff) - 0x3ff);
661 } else { // y < 2^32
662 tmp2.d = (double) (C2_lo); // exact conversion
663 y_nr_bits =
664 ((((unsigned int) (tmp2.ui64 >> 52)) & 0x7ff) - 0x3ff);
666 } else { // if y < 2^53
667 tmp2.d = (double) C2_lo; // exact conversion
668 y_nr_bits =
669 ((((unsigned int) (tmp2.ui64 >> 52)) & 0x7ff) - 0x3ff);
671 } else { // C2_hi != 0 => nr. bits = 64 + nr_bits (C2_hi)
672 tmp2.d = (double) C2_hi; // exact conversion
673 y_nr_bits =
674 64 + ((((unsigned int) (tmp2.ui64 >> 52)) & 0x7ff) - 0x3ff);
676 q2 = nr_digits[y_nr_bits].digits;
677 if (q2 == 0) {
678 q2 = nr_digits[y_nr_bits].digits1;
679 if (C2_hi > nr_digits[y_nr_bits].threshold_hi ||
680 (C2_hi == nr_digits[y_nr_bits].threshold_hi &&
681 C2_lo >= nr_digits[y_nr_bits].threshold_lo))
682 q2++;
684 // return (C2 * 10^scale) * 10^(y_exp - scale)
685 // where scale = min (P34-q2, y_exp-x_exp)
686 scale = P34 - q2;
687 ind = (y_exp - x_exp) >> 49;
688 if (ind < scale)
689 scale = ind;
690 if (scale == 0) {
691 res.w[1] = y.w[1];
692 res.w[0] = y.w[0];
693 } else if (q2 <= 19) { // y fits in 64 bits
694 if (scale <= 19) { // 10^scale fits in 64 bits
695 // 64 x 64 C2_lo * ten2k64[scale]
696 __mul_64x64_to_128MACH (res, C2_lo, ten2k64[scale]);
697 } else { // 10^scale fits in 128 bits
698 // 64 x 128 C2_lo * ten2k128[scale - 20]
699 __mul_128x64_to_128 (res, C2_lo, ten2k128[scale - 20]);
701 } else { // y fits in 128 bits, but 10^scale must fit in 64 bits
702 // 64 x 128 ten2k64[scale] * C2
703 C2.w[1] = C2_hi;
704 C2.w[0] = C2_lo;
705 __mul_128x64_to_128 (res, ten2k64[scale], C2);
707 // subtract scale from the exponent
708 y_exp = y_exp - ((UINT64) scale << 49);
709 res.w[1] = res.w[1] | y_sign | y_exp;
712 BID_SWAP128 (res);
713 BID_RETURN (res);
714 } else if ((C2_hi == 0x0ull) && (C2_lo == 0x0ull)) {
715 // y is 0 and x is not special, and not zero
716 // for x + 0 return x, with the preferred exponent
717 if (x_exp <= y_exp) {
718 res.w[1] = x.w[1];
719 res.w[0] = x.w[0];
720 } else { // if x_exp > y_exp
721 // return (C1 * 10^scale) * 10^(x_exp - scale)
722 // where scale = min (P34-q1, x_exp-y_exp)
723 // determine q1 = nr. of decimal digits in x
724 // determine first the nr. of bits in x
725 if (C1_hi == 0) { // x_bits is the nr. of bits in C1_lo
726 if (C1_lo >= 0x0020000000000000ull) { // x >= 2^53
727 // split the 64-bit value in two 32-bit halves to avoid
728 // rounding errors
729 if (C1_lo >= 0x0000000100000000ull) { // x >= 2^32
730 tmp1.d = (double) (C1_lo >> 32); // exact conversion
731 x_nr_bits =
732 32 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) -
733 0x3ff);
734 } else { // x < 2^32
735 tmp1.d = (double) (C1_lo); // exact conversion
736 x_nr_bits =
737 ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
739 } else { // if x < 2^53
740 tmp1.d = (double) C1_lo; // exact conversion
741 x_nr_bits =
742 ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
744 } else { // C1_hi != 0 => nr. bits = 64 + nr_bits (C1_hi)
745 tmp1.d = (double) C1_hi; // exact conversion
746 x_nr_bits =
747 64 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
749 q1 = nr_digits[x_nr_bits].digits;
750 if (q1 == 0) {
751 q1 = nr_digits[x_nr_bits].digits1;
752 if (C1_hi > nr_digits[x_nr_bits].threshold_hi ||
753 (C1_hi == nr_digits[x_nr_bits].threshold_hi &&
754 C1_lo >= nr_digits[x_nr_bits].threshold_lo))
755 q1++;
757 // return (C1 * 10^scale) * 10^(x_exp - scale)
758 // where scale = min (P34-q1, x_exp-y_exp)
759 scale = P34 - q1;
760 ind = (x_exp - y_exp) >> 49;
761 if (ind < scale)
762 scale = ind;
763 if (scale == 0) {
764 res.w[1] = x.w[1];
765 res.w[0] = x.w[0];
766 } else if (q1 <= 19) { // x fits in 64 bits
767 if (scale <= 19) { // 10^scale fits in 64 bits
768 // 64 x 64 C1_lo * ten2k64[scale]
769 __mul_64x64_to_128MACH (res, C1_lo, ten2k64[scale]);
770 } else { // 10^scale fits in 128 bits
771 // 64 x 128 C1_lo * ten2k128[scale - 20]
772 __mul_128x64_to_128 (res, C1_lo, ten2k128[scale - 20]);
774 } else { // x fits in 128 bits, but 10^scale must fit in 64 bits
775 // 64 x 128 ten2k64[scale] * C1
776 C1.w[1] = C1_hi;
777 C1.w[0] = C1_lo;
778 __mul_128x64_to_128 (res, ten2k64[scale], C1);
780 // subtract scale from the exponent
781 x_exp = x_exp - ((UINT64) scale << 49);
782 res.w[1] = res.w[1] | x_sign | x_exp;
784 BID_SWAP128 (res);
785 BID_RETURN (res);
786 } else { // x and y are not canonical, not special, and are not zero
787 // note that the result may still be zero, and then it has to have the
788 // preferred exponent
789 if (x_exp < y_exp) { // if exp_x < exp_y then swap x and y
790 tmp_sign = x_sign;
791 tmp_exp = x_exp;
792 tmp_signif_hi = C1_hi;
793 tmp_signif_lo = C1_lo;
794 x_sign = y_sign;
795 x_exp = y_exp;
796 C1_hi = C2_hi;
797 C1_lo = C2_lo;
798 y_sign = tmp_sign;
799 y_exp = tmp_exp;
800 C2_hi = tmp_signif_hi;
801 C2_lo = tmp_signif_lo;
803 // q1 = nr. of decimal digits in x
804 // determine first the nr. of bits in x
805 if (C1_hi == 0) { // x_bits is the nr. of bits in C1_lo
806 if (C1_lo >= 0x0020000000000000ull) { // x >= 2^53
807 //split the 64-bit value in two 32-bit halves to avoid rounding errors
808 if (C1_lo >= 0x0000000100000000ull) { // x >= 2^32
809 tmp1.d = (double) (C1_lo >> 32); // exact conversion
810 x_nr_bits =
811 32 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
812 } else { // x < 2^32
813 tmp1.d = (double) (C1_lo); // exact conversion
814 x_nr_bits =
815 ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
817 } else { // if x < 2^53
818 tmp1.d = (double) C1_lo; // exact conversion
819 x_nr_bits =
820 ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
822 } else { // C1_hi != 0 => nr. bits = 64 + nr_bits (C1_hi)
823 tmp1.d = (double) C1_hi; // exact conversion
824 x_nr_bits =
825 64 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
828 q1 = nr_digits[x_nr_bits].digits;
829 if (q1 == 0) {
830 q1 = nr_digits[x_nr_bits].digits1;
831 if (C1_hi > nr_digits[x_nr_bits].threshold_hi ||
832 (C1_hi == nr_digits[x_nr_bits].threshold_hi &&
833 C1_lo >= nr_digits[x_nr_bits].threshold_lo))
834 q1++;
836 // q2 = nr. of decimal digits in y
837 // determine first the nr. of bits in y (y_nr_bits)
838 if (C2_hi == 0) { // y_bits is the nr. of bits in C2_lo
839 if (C2_lo >= 0x0020000000000000ull) { // y >= 2^53
840 //split the 64-bit value in two 32-bit halves to avoid rounding errors
841 if (C2_lo >= 0x0000000100000000ull) { // y >= 2^32
842 tmp2.d = (double) (C2_lo >> 32); // exact conversion
843 y_nr_bits =
844 32 + ((((unsigned int) (tmp2.ui64 >> 52)) & 0x7ff) - 0x3ff);
845 } else { // y < 2^32
846 tmp2.d = (double) (C2_lo); // exact conversion
847 y_nr_bits =
848 ((((unsigned int) (tmp2.ui64 >> 52)) & 0x7ff) - 0x3ff);
850 } else { // if y < 2^53
851 tmp2.d = (double) C2_lo; // exact conversion
852 y_nr_bits =
853 ((((unsigned int) (tmp2.ui64 >> 52)) & 0x7ff) - 0x3ff);
855 } else { // C2_hi != 0 => nr. bits = 64 + nr_bits (C2_hi)
856 tmp2.d = (double) C2_hi; // exact conversion
857 y_nr_bits =
858 64 + ((((unsigned int) (tmp2.ui64 >> 52)) & 0x7ff) - 0x3ff);
861 q2 = nr_digits[y_nr_bits].digits;
862 if (q2 == 0) {
863 q2 = nr_digits[y_nr_bits].digits1;
864 if (C2_hi > nr_digits[y_nr_bits].threshold_hi ||
865 (C2_hi == nr_digits[y_nr_bits].threshold_hi &&
866 C2_lo >= nr_digits[y_nr_bits].threshold_lo))
867 q2++;
870 delta = q1 + (int) (x_exp >> 49) - q2 - (int) (y_exp >> 49);
872 if (delta >= P34) {
873 // round the result directly because 0 < C2 < ulp (C1 * 10^(x_exp-e2))
874 // n = C1 * 10^e1 or n = C1 +/- 10^(q1-P34)) * 10^e1
875 // the result is inexact; the preferred exponent is the least possible
877 if (delta >= P34 + 1) {
878 // for RN the result is the operand with the larger magnitude,
879 // possibly scaled up by 10^(P34-q1)
880 // an overflow cannot occur in this case (rounding to nearest)
881 if (q1 < P34) { // scale C1 up by 10^(P34-q1)
882 // Note: because delta >= P34+1 it is certain that
883 // x_exp - ((UINT64)scale << 49) will stay above e_min
884 scale = P34 - q1;
885 if (q1 <= 19) { // C1 fits in 64 bits
886 // 1 <= q1 <= 19 => 15 <= scale <= 33
887 if (scale <= 19) { // 10^scale fits in 64 bits
888 __mul_64x64_to_128MACH (C1, ten2k64[scale], C1_lo);
889 } else { // if 20 <= scale <= 33
890 // C1 * 10^scale = (C1 * 10^(scale-19)) * 10^19 where
891 // (C1 * 10^(scale-19)) fits in 64 bits
892 C1_lo = C1_lo * ten2k64[scale - 19];
893 __mul_64x64_to_128MACH (C1, ten2k64[19], C1_lo);
895 } else { //if 20 <= q1 <= 33=P34-1 then C1 fits only in 128 bits
896 // => 1 <= P34 - q1 <= 14 so 10^(P34-q1) fits in 64 bits
897 C1.w[1] = C1_hi;
898 C1.w[0] = C1_lo;
899 // C1 = ten2k64[P34 - q1] * C1
900 __mul_128x64_to_128 (C1, ten2k64[P34 - q1], C1);
902 x_exp = x_exp - ((UINT64) scale << 49);
903 C1_hi = C1.w[1];
904 C1_lo = C1.w[0];
906 // some special cases arise: if delta = P34 + 1 and C1 = 10^(P34-1)
907 // (after scaling) and x_sign != y_sign and C2 > 5*10^(q2-1) =>
908 // subtract 1 ulp
909 // Note: do this only for rounding to nearest; for other rounding
910 // modes the correction will be applied next
911 if ((rnd_mode == ROUNDING_TO_NEAREST
912 || rnd_mode == ROUNDING_TIES_AWAY) && delta == (P34 + 1)
913 && C1_hi == 0x0000314dc6448d93ull
914 && C1_lo == 0x38c15b0a00000000ull && x_sign != y_sign
915 && ((q2 <= 19 && C2_lo > midpoint64[q2 - 1]) || (q2 >= 20
916 && (C2_hi >
917 midpoint128
918 [q2 -
919 20].
920 w[1]
922 (C2_hi
924 midpoint128
925 [q2 -
926 20].
927 w[1]
929 C2_lo
931 midpoint128
932 [q2 -
933 20].
935 [0])))))
937 // C1 = 10^34 - 1 and decrement x_exp by 1 (no underflow possible)
938 C1_hi = 0x0001ed09bead87c0ull;
939 C1_lo = 0x378d8e63ffffffffull;
940 x_exp = x_exp - EXP_P1;
942 if (rnd_mode != ROUNDING_TO_NEAREST) {
943 if ((rnd_mode == ROUNDING_DOWN && x_sign && y_sign) ||
944 (rnd_mode == ROUNDING_UP && !x_sign && !y_sign)) {
945 // add 1 ulp and then check for overflow
946 C1_lo = C1_lo + 1;
947 if (C1_lo == 0) { // rounding overflow in the low 64 bits
948 C1_hi = C1_hi + 1;
950 if (C1_hi == 0x0001ed09bead87c0ull
951 && C1_lo == 0x378d8e6400000000ull) {
952 // C1 = 10^34 => rounding overflow
953 C1_hi = 0x0000314dc6448d93ull;
954 C1_lo = 0x38c15b0a00000000ull; // 10^33
955 x_exp = x_exp + EXP_P1;
956 if (x_exp == EXP_MAX_P1) { // overflow
957 C1_hi = 0x7800000000000000ull; // +inf
958 C1_lo = 0x0ull;
959 x_exp = 0; // x_sign is preserved
960 // set overflow flag (the inexact flag was set too)
961 *pfpsf |= OVERFLOW_EXCEPTION;
964 } else if ((rnd_mode == ROUNDING_DOWN && !x_sign && y_sign) ||
965 (rnd_mode == ROUNDING_UP && x_sign && !y_sign) ||
966 (rnd_mode == ROUNDING_TO_ZERO
967 && x_sign != y_sign)) {
968 // subtract 1 ulp from C1
969 // Note: because delta >= P34 + 1 the result cannot be zero
970 C1_lo = C1_lo - 1;
971 if (C1_lo == 0xffffffffffffffffull)
972 C1_hi = C1_hi - 1;
973 // if the coefficient is 10^33 - 1 then make it 10^34 - 1 and
974 // decrease the exponent by 1 (because delta >= P34 + 1 the
975 // exponent will not become less than e_min)
976 // 10^33 - 1 = 0x0000314dc6448d9338c15b09ffffffff
977 // 10^34 - 1 = 0x0001ed09bead87c0378d8e63ffffffff
978 if (C1_hi == 0x0000314dc6448d93ull
979 && C1_lo == 0x38c15b09ffffffffull) {
980 // make C1 = 10^34 - 1
981 C1_hi = 0x0001ed09bead87c0ull;
982 C1_lo = 0x378d8e63ffffffffull;
983 x_exp = x_exp - EXP_P1;
985 } else {
986 ; // the result is already correct
989 // set the inexact flag
990 *pfpsf |= INEXACT_EXCEPTION;
991 // assemble the result
992 res.w[1] = x_sign | x_exp | C1_hi;
993 res.w[0] = C1_lo;
994 } else { // delta = P34
995 // in most cases, the smaller operand may be < or = or > 1/2 ulp of the
996 // larger operand
997 // however, the case C1 = 10^(q1-1) and x_sign != y_sign is special due
998 // to accuracy loss after subtraction, and will be treated separately
999 if (x_sign == y_sign || (q1 <= 20
1000 && (C1_hi != 0
1001 || C1_lo != ten2k64[q1 - 1]))
1002 || (q1 >= 21 && (C1_hi != ten2k128[q1 - 21].w[1]
1003 || C1_lo != ten2k128[q1 - 21].w[0]))) {
1004 // if x_sign == y_sign or C1 != 10^(q1-1)
1005 // compare C2 with 1/2 ulp = 5 * 10^(q2-1), the latter read from table
1006 // Note: cases q1<=19 and q1>=20 can be coalesced at some latency cost
1007 if (q2 <= 19) { // C2 and 5*10^(q2-1) both fit in 64 bits
1008 halfulp64 = midpoint64[q2 - 1]; // 5 * 10^(q2-1)
1009 if (C2_lo < halfulp64) { // n2 < 1/2 ulp (n1)
1010 // for RN the result is the operand with the larger magnitude,
1011 // possibly scaled up by 10^(P34-q1)
1012 // an overflow cannot occur in this case (rounding to nearest)
1013 if (q1 < P34) { // scale C1 up by 10^(P34-q1)
1014 // Note: because delta = P34 it is certain that
1015 // x_exp - ((UINT64)scale << 49) will stay above e_min
1016 scale = P34 - q1;
1017 if (q1 <= 19) { // C1 fits in 64 bits
1018 // 1 <= q1 <= 19 => 15 <= scale <= 33
1019 if (scale <= 19) { // 10^scale fits in 64 bits
1020 __mul_64x64_to_128MACH (C1, ten2k64[scale], C1_lo);
1021 } else { // if 20 <= scale <= 33
1022 // C1 * 10^scale = (C1 * 10^(scale-19)) * 10^19 where
1023 // (C1 * 10^(scale-19)) fits in 64 bits
1024 C1_lo = C1_lo * ten2k64[scale - 19];
1025 __mul_64x64_to_128MACH (C1, ten2k64[19], C1_lo);
1027 } else { //if 20 <= q1 <= 33=P34-1 then C1 fits only in 128 bits
1028 // => 1 <= P34 - q1 <= 14 so 10^(P34-q1) fits in 64 bits
1029 C1.w[1] = C1_hi;
1030 C1.w[0] = C1_lo;
1031 // C1 = ten2k64[P34 - q1] * C1
1032 __mul_128x64_to_128 (C1, ten2k64[P34 - q1], C1);
1034 x_exp = x_exp - ((UINT64) scale << 49);
1035 C1_hi = C1.w[1];
1036 C1_lo = C1.w[0];
1038 if (rnd_mode != ROUNDING_TO_NEAREST) {
1039 if ((rnd_mode == ROUNDING_DOWN && x_sign && y_sign) ||
1040 (rnd_mode == ROUNDING_UP && !x_sign && !y_sign)) {
1041 // add 1 ulp and then check for overflow
1042 C1_lo = C1_lo + 1;
1043 if (C1_lo == 0) { // rounding overflow in the low 64 bits
1044 C1_hi = C1_hi + 1;
1046 if (C1_hi == 0x0001ed09bead87c0ull
1047 && C1_lo == 0x378d8e6400000000ull) {
1048 // C1 = 10^34 => rounding overflow
1049 C1_hi = 0x0000314dc6448d93ull;
1050 C1_lo = 0x38c15b0a00000000ull; // 10^33
1051 x_exp = x_exp + EXP_P1;
1052 if (x_exp == EXP_MAX_P1) { // overflow
1053 C1_hi = 0x7800000000000000ull; // +inf
1054 C1_lo = 0x0ull;
1055 x_exp = 0; // x_sign is preserved
1056 // set overflow flag (the inexact flag was set too)
1057 *pfpsf |= OVERFLOW_EXCEPTION;
1060 } else
1061 if ((rnd_mode == ROUNDING_DOWN && !x_sign && y_sign)
1062 || (rnd_mode == ROUNDING_UP && x_sign && !y_sign)
1063 || (rnd_mode == ROUNDING_TO_ZERO
1064 && x_sign != y_sign)) {
1065 // subtract 1 ulp from C1
1066 // Note: because delta >= P34 + 1 the result cannot be zero
1067 C1_lo = C1_lo - 1;
1068 if (C1_lo == 0xffffffffffffffffull)
1069 C1_hi = C1_hi - 1;
1070 // if the coefficient is 10^33-1 then make it 10^34-1 and
1071 // decrease the exponent by 1 (because delta >= P34 + 1 the
1072 // exponent will not become less than e_min)
1073 // 10^33 - 1 = 0x0000314dc6448d9338c15b09ffffffff
1074 // 10^34 - 1 = 0x0001ed09bead87c0378d8e63ffffffff
1075 if (C1_hi == 0x0000314dc6448d93ull
1076 && C1_lo == 0x38c15b09ffffffffull) {
1077 // make C1 = 10^34 - 1
1078 C1_hi = 0x0001ed09bead87c0ull;
1079 C1_lo = 0x378d8e63ffffffffull;
1080 x_exp = x_exp - EXP_P1;
1082 } else {
1083 ; // the result is already correct
1086 // set the inexact flag
1087 *pfpsf |= INEXACT_EXCEPTION;
1088 // assemble the result
1089 res.w[1] = x_sign | x_exp | C1_hi;
1090 res.w[0] = C1_lo;
1091 } else if ((C2_lo == halfulp64)
1092 && (q1 < P34 || ((C1_lo & 0x1) == 0))) {
1093 // n2 = 1/2 ulp (n1) and C1 is even
1094 // the result is the operand with the larger magnitude,
1095 // possibly scaled up by 10^(P34-q1)
1096 // an overflow cannot occur in this case (rounding to nearest)
1097 if (q1 < P34) { // scale C1 up by 10^(P34-q1)
1098 // Note: because delta = P34 it is certain that
1099 // x_exp - ((UINT64)scale << 49) will stay above e_min
1100 scale = P34 - q1;
1101 if (q1 <= 19) { // C1 fits in 64 bits
1102 // 1 <= q1 <= 19 => 15 <= scale <= 33
1103 if (scale <= 19) { // 10^scale fits in 64 bits
1104 __mul_64x64_to_128MACH (C1, ten2k64[scale], C1_lo);
1105 } else { // if 20 <= scale <= 33
1106 // C1 * 10^scale = (C1 * 10^(scale-19)) * 10^19 where
1107 // (C1 * 10^(scale-19)) fits in 64 bits
1108 C1_lo = C1_lo * ten2k64[scale - 19];
1109 __mul_64x64_to_128MACH (C1, ten2k64[19], C1_lo);
1111 } else { //if 20 <= q1 <= 33=P34-1 then C1 fits only in 128 bits
1112 // => 1 <= P34 - q1 <= 14 so 10^(P34-q1) fits in 64 bits
1113 C1.w[1] = C1_hi;
1114 C1.w[0] = C1_lo;
1115 // C1 = ten2k64[P34 - q1] * C1
1116 __mul_128x64_to_128 (C1, ten2k64[P34 - q1], C1);
1118 x_exp = x_exp - ((UINT64) scale << 49);
1119 C1_hi = C1.w[1];
1120 C1_lo = C1.w[0];
1122 if ((rnd_mode == ROUNDING_TO_NEAREST && x_sign == y_sign
1123 && (C1_lo & 0x01)) || (rnd_mode == ROUNDING_TIES_AWAY
1124 && x_sign == y_sign)
1125 || (rnd_mode == ROUNDING_UP && !x_sign && !y_sign)
1126 || (rnd_mode == ROUNDING_DOWN && x_sign && y_sign)) {
1127 // add 1 ulp and then check for overflow
1128 C1_lo = C1_lo + 1;
1129 if (C1_lo == 0) { // rounding overflow in the low 64 bits
1130 C1_hi = C1_hi + 1;
1132 if (C1_hi == 0x0001ed09bead87c0ull
1133 && C1_lo == 0x378d8e6400000000ull) {
1134 // C1 = 10^34 => rounding overflow
1135 C1_hi = 0x0000314dc6448d93ull;
1136 C1_lo = 0x38c15b0a00000000ull; // 10^33
1137 x_exp = x_exp + EXP_P1;
1138 if (x_exp == EXP_MAX_P1) { // overflow
1139 C1_hi = 0x7800000000000000ull; // +inf
1140 C1_lo = 0x0ull;
1141 x_exp = 0; // x_sign is preserved
1142 // set overflow flag (the inexact flag was set too)
1143 *pfpsf |= OVERFLOW_EXCEPTION;
1146 } else
1147 if ((rnd_mode == ROUNDING_TO_NEAREST && x_sign != y_sign
1148 && (C1_lo & 0x01)) || (rnd_mode == ROUNDING_DOWN
1149 && !x_sign && y_sign)
1150 || (rnd_mode == ROUNDING_UP && x_sign && !y_sign)
1151 || (rnd_mode == ROUNDING_TO_ZERO
1152 && x_sign != y_sign)) {
1153 // subtract 1 ulp from C1
1154 // Note: because delta >= P34 + 1 the result cannot be zero
1155 C1_lo = C1_lo - 1;
1156 if (C1_lo == 0xffffffffffffffffull)
1157 C1_hi = C1_hi - 1;
1158 // if the coefficient is 10^33 - 1 then make it 10^34 - 1
1159 // and decrease the exponent by 1 (because delta >= P34 + 1
1160 // the exponent will not become less than e_min)
1161 // 10^33 - 1 = 0x0000314dc6448d9338c15b09ffffffff
1162 // 10^34 - 1 = 0x0001ed09bead87c0378d8e63ffffffff
1163 if (C1_hi == 0x0000314dc6448d93ull
1164 && C1_lo == 0x38c15b09ffffffffull) {
1165 // make C1 = 10^34 - 1
1166 C1_hi = 0x0001ed09bead87c0ull;
1167 C1_lo = 0x378d8e63ffffffffull;
1168 x_exp = x_exp - EXP_P1;
1170 } else {
1171 ; // the result is already correct
1173 // set the inexact flag
1174 *pfpsf |= INEXACT_EXCEPTION;
1175 // assemble the result
1176 res.w[1] = x_sign | x_exp | C1_hi;
1177 res.w[0] = C1_lo;
1178 } else { // if C2_lo > halfulp64 ||
1179 // (C2_lo == halfulp64 && q1 == P34 && ((C1_lo & 0x1) == 1)), i.e.
1180 // 1/2 ulp(n1) < n2 < 1 ulp(n1) or n2 = 1/2 ulp(n1) and C1 odd
1181 // res = x+1 ulp if n1*n2 > 0 and res = x-1 ulp if n1*n2 < 0
1182 if (q1 < P34) { // then 1 ulp = 10^(e1+q1-P34) < 10^e1
1183 // Note: if (q1 == P34) then 1 ulp = 10^(e1+q1-P34) = 10^e1
1184 // because q1 < P34 we must first replace C1 by
1185 // C1 * 10^(P34-q1), and must decrease the exponent by
1186 // (P34-q1) (it will still be at least e_min)
1187 scale = P34 - q1;
1188 if (q1 <= 19) { // C1 fits in 64 bits
1189 // 1 <= q1 <= 19 => 15 <= scale <= 33
1190 if (scale <= 19) { // 10^scale fits in 64 bits
1191 __mul_64x64_to_128MACH (C1, ten2k64[scale], C1_lo);
1192 } else { // if 20 <= scale <= 33
1193 // C1 * 10^scale = (C1 * 10^(scale-19)) * 10^19 where
1194 // (C1 * 10^(scale-19)) fits in 64 bits
1195 C1_lo = C1_lo * ten2k64[scale - 19];
1196 __mul_64x64_to_128MACH (C1, ten2k64[19], C1_lo);
1198 } else { //if 20 <= q1 <= 33=P34-1 then C1 fits only in 128 bits
1199 // => 1 <= P34 - q1 <= 14 so 10^(P34-q1) fits in 64 bits
1200 C1.w[1] = C1_hi;
1201 C1.w[0] = C1_lo;
1202 // C1 = ten2k64[P34 - q1] * C1
1203 __mul_128x64_to_128 (C1, ten2k64[P34 - q1], C1);
1205 x_exp = x_exp - ((UINT64) scale << 49);
1206 C1_hi = C1.w[1];
1207 C1_lo = C1.w[0];
1208 // check for rounding overflow
1209 if (C1_hi == 0x0001ed09bead87c0ull
1210 && C1_lo == 0x378d8e6400000000ull) {
1211 // C1 = 10^34 => rounding overflow
1212 C1_hi = 0x0000314dc6448d93ull;
1213 C1_lo = 0x38c15b0a00000000ull; // 10^33
1214 x_exp = x_exp + EXP_P1;
1217 if ((rnd_mode == ROUNDING_TO_NEAREST && x_sign != y_sign)
1218 || (rnd_mode == ROUNDING_TIES_AWAY && x_sign != y_sign
1219 && C2_lo != halfulp64)
1220 || (rnd_mode == ROUNDING_DOWN && !x_sign && y_sign)
1221 || (rnd_mode == ROUNDING_UP && x_sign && !y_sign)
1222 || (rnd_mode == ROUNDING_TO_ZERO
1223 && x_sign != y_sign)) {
1224 // the result is x - 1
1225 // for RN n1 * n2 < 0; underflow not possible
1226 C1_lo = C1_lo - 1;
1227 if (C1_lo == 0xffffffffffffffffull)
1228 C1_hi--;
1229 // check if we crossed into the lower decade
1230 if (C1_hi == 0x0000314dc6448d93ull && C1_lo == 0x38c15b09ffffffffull) { // 10^33 - 1
1231 C1_hi = 0x0001ed09bead87c0ull; // 10^34 - 1
1232 C1_lo = 0x378d8e63ffffffffull;
1233 x_exp = x_exp - EXP_P1; // no underflow, because n1 >> n2
1235 } else
1236 if ((rnd_mode == ROUNDING_TO_NEAREST
1237 && x_sign == y_sign)
1238 || (rnd_mode == ROUNDING_TIES_AWAY
1239 && x_sign == y_sign)
1240 || (rnd_mode == ROUNDING_DOWN && x_sign && y_sign)
1241 || (rnd_mode == ROUNDING_UP && !x_sign
1242 && !y_sign)) {
1243 // the result is x + 1
1244 // for RN x_sign = y_sign, i.e. n1*n2 > 0
1245 C1_lo = C1_lo + 1;
1246 if (C1_lo == 0) { // rounding overflow in the low 64 bits
1247 C1_hi = C1_hi + 1;
1249 if (C1_hi == 0x0001ed09bead87c0ull
1250 && C1_lo == 0x378d8e6400000000ull) {
1251 // C1 = 10^34 => rounding overflow
1252 C1_hi = 0x0000314dc6448d93ull;
1253 C1_lo = 0x38c15b0a00000000ull; // 10^33
1254 x_exp = x_exp + EXP_P1;
1255 if (x_exp == EXP_MAX_P1) { // overflow
1256 C1_hi = 0x7800000000000000ull; // +inf
1257 C1_lo = 0x0ull;
1258 x_exp = 0; // x_sign is preserved
1259 // set the overflow flag
1260 *pfpsf |= OVERFLOW_EXCEPTION;
1263 } else {
1264 ; // the result is x
1266 // set the inexact flag
1267 *pfpsf |= INEXACT_EXCEPTION;
1268 // assemble the result
1269 res.w[1] = x_sign | x_exp | C1_hi;
1270 res.w[0] = C1_lo;
1272 } else { // if q2 >= 20 then 5*10^(q2-1) and C2 (the latter in
1273 // most cases) fit only in more than 64 bits
1274 halfulp128 = midpoint128[q2 - 20]; // 5 * 10^(q2-1)
1275 if ((C2_hi < halfulp128.w[1])
1276 || (C2_hi == halfulp128.w[1]
1277 && C2_lo < halfulp128.w[0])) {
1278 // n2 < 1/2 ulp (n1)
1279 // the result is the operand with the larger magnitude,
1280 // possibly scaled up by 10^(P34-q1)
1281 // an overflow cannot occur in this case (rounding to nearest)
1282 if (q1 < P34) { // scale C1 up by 10^(P34-q1)
1283 // Note: because delta = P34 it is certain that
1284 // x_exp - ((UINT64)scale << 49) will stay above e_min
1285 scale = P34 - q1;
1286 if (q1 <= 19) { // C1 fits in 64 bits
1287 // 1 <= q1 <= 19 => 15 <= scale <= 33
1288 if (scale <= 19) { // 10^scale fits in 64 bits
1289 __mul_64x64_to_128MACH (C1, ten2k64[scale], C1_lo);
1290 } else { // if 20 <= scale <= 33
1291 // C1 * 10^scale = (C1 * 10^(scale-19)) * 10^19 where
1292 // (C1 * 10^(scale-19)) fits in 64 bits
1293 C1_lo = C1_lo * ten2k64[scale - 19];
1294 __mul_64x64_to_128MACH (C1, ten2k64[19], C1_lo);
1296 } else { //if 20 <= q1 <= 33=P34-1 then C1 fits only in 128 bits
1297 // => 1 <= P34 - q1 <= 14 so 10^(P34-q1) fits in 64 bits
1298 C1.w[1] = C1_hi;
1299 C1.w[0] = C1_lo;
1300 // C1 = ten2k64[P34 - q1] * C1
1301 __mul_128x64_to_128 (C1, ten2k64[P34 - q1], C1);
1303 C1_hi = C1.w[1];
1304 C1_lo = C1.w[0];
1305 x_exp = x_exp - ((UINT64) scale << 49);
1307 if (rnd_mode != ROUNDING_TO_NEAREST) {
1308 if ((rnd_mode == ROUNDING_DOWN && x_sign && y_sign) ||
1309 (rnd_mode == ROUNDING_UP && !x_sign && !y_sign)) {
1310 // add 1 ulp and then check for overflow
1311 C1_lo = C1_lo + 1;
1312 if (C1_lo == 0) { // rounding overflow in the low 64 bits
1313 C1_hi = C1_hi + 1;
1315 if (C1_hi == 0x0001ed09bead87c0ull
1316 && C1_lo == 0x378d8e6400000000ull) {
1317 // C1 = 10^34 => rounding overflow
1318 C1_hi = 0x0000314dc6448d93ull;
1319 C1_lo = 0x38c15b0a00000000ull; // 10^33
1320 x_exp = x_exp + EXP_P1;
1321 if (x_exp == EXP_MAX_P1) { // overflow
1322 C1_hi = 0x7800000000000000ull; // +inf
1323 C1_lo = 0x0ull;
1324 x_exp = 0; // x_sign is preserved
1325 // set overflow flag (the inexact flag was set too)
1326 *pfpsf |= OVERFLOW_EXCEPTION;
1329 } else
1330 if ((rnd_mode == ROUNDING_DOWN && !x_sign && y_sign)
1331 || (rnd_mode == ROUNDING_UP && x_sign && !y_sign)
1332 || (rnd_mode == ROUNDING_TO_ZERO
1333 && x_sign != y_sign)) {
1334 // subtract 1 ulp from C1
1335 // Note: because delta >= P34 + 1 the result cannot be zero
1336 C1_lo = C1_lo - 1;
1337 if (C1_lo == 0xffffffffffffffffull)
1338 C1_hi = C1_hi - 1;
1339 // if the coefficient is 10^33-1 then make it 10^34-1 and
1340 // decrease the exponent by 1 (because delta >= P34 + 1 the
1341 // exponent will not become less than e_min)
1342 // 10^33 - 1 = 0x0000314dc6448d9338c15b09ffffffff
1343 // 10^34 - 1 = 0x0001ed09bead87c0378d8e63ffffffff
1344 if (C1_hi == 0x0000314dc6448d93ull
1345 && C1_lo == 0x38c15b09ffffffffull) {
1346 // make C1 = 10^34 - 1
1347 C1_hi = 0x0001ed09bead87c0ull;
1348 C1_lo = 0x378d8e63ffffffffull;
1349 x_exp = x_exp - EXP_P1;
1351 } else {
1352 ; // the result is already correct
1355 // set the inexact flag
1356 *pfpsf |= INEXACT_EXCEPTION;
1357 // assemble the result
1358 res.w[1] = x_sign | x_exp | C1_hi;
1359 res.w[0] = C1_lo;
1360 } else if ((C2_hi == halfulp128.w[1]
1361 && C2_lo == halfulp128.w[0])
1362 && (q1 < P34 || ((C1_lo & 0x1) == 0))) {
1363 // midpoint & lsb in C1 is 0
1364 // n2 = 1/2 ulp (n1) and C1 is even
1365 // the result is the operand with the larger magnitude,
1366 // possibly scaled up by 10^(P34-q1)
1367 // an overflow cannot occur in this case (rounding to nearest)
1368 if (q1 < P34) { // scale C1 up by 10^(P34-q1)
1369 // Note: because delta = P34 it is certain that
1370 // x_exp - ((UINT64)scale << 49) will stay above e_min
1371 scale = P34 - q1;
1372 if (q1 <= 19) { // C1 fits in 64 bits
1373 // 1 <= q1 <= 19 => 15 <= scale <= 33
1374 if (scale <= 19) { // 10^scale fits in 64 bits
1375 __mul_64x64_to_128MACH (C1, ten2k64[scale], C1_lo);
1376 } else { // if 20 <= scale <= 33
1377 // C1 * 10^scale = (C1 * 10^(scale-19)) * 10^19 where
1378 // (C1 * 10^(scale-19)) fits in 64 bits
1379 C1_lo = C1_lo * ten2k64[scale - 19];
1380 __mul_64x64_to_128MACH (C1, ten2k64[19], C1_lo);
1382 } else { //if 20 <= q1 <= 33=P34-1 then C1 fits only in 128 bits
1383 // => 1 <= P34 - q1 <= 14 so 10^(P34-q1) fits in 64 bits
1384 C1.w[1] = C1_hi;
1385 C1.w[0] = C1_lo;
1386 // C1 = ten2k64[P34 - q1] * C1
1387 __mul_128x64_to_128 (C1, ten2k64[P34 - q1], C1);
1389 x_exp = x_exp - ((UINT64) scale << 49);
1390 C1_hi = C1.w[1];
1391 C1_lo = C1.w[0];
1393 if (rnd_mode != ROUNDING_TO_NEAREST) {
1394 if ((rnd_mode == ROUNDING_TIES_AWAY && x_sign == y_sign)
1395 || (rnd_mode == ROUNDING_UP && !y_sign)) {
1396 // add 1 ulp and then check for overflow
1397 C1_lo = C1_lo + 1;
1398 if (C1_lo == 0) { // rounding overflow in the low 64 bits
1399 C1_hi = C1_hi + 1;
1401 if (C1_hi == 0x0001ed09bead87c0ull
1402 && C1_lo == 0x378d8e6400000000ull) {
1403 // C1 = 10^34 => rounding overflow
1404 C1_hi = 0x0000314dc6448d93ull;
1405 C1_lo = 0x38c15b0a00000000ull; // 10^33
1406 x_exp = x_exp + EXP_P1;
1407 if (x_exp == EXP_MAX_P1) { // overflow
1408 C1_hi = 0x7800000000000000ull; // +inf
1409 C1_lo = 0x0ull;
1410 x_exp = 0; // x_sign is preserved
1411 // set overflow flag (the inexact flag was set too)
1412 *pfpsf |= OVERFLOW_EXCEPTION;
1415 } else if ((rnd_mode == ROUNDING_DOWN && y_sign)
1416 || (rnd_mode == ROUNDING_TO_ZERO
1417 && x_sign != y_sign)) {
1418 // subtract 1 ulp from C1
1419 // Note: because delta >= P34 + 1 the result cannot be zero
1420 C1_lo = C1_lo - 1;
1421 if (C1_lo == 0xffffffffffffffffull)
1422 C1_hi = C1_hi - 1;
1423 // if the coefficient is 10^33 - 1 then make it 10^34 - 1
1424 // and decrease the exponent by 1 (because delta >= P34 + 1
1425 // the exponent will not become less than e_min)
1426 // 10^33 - 1 = 0x0000314dc6448d9338c15b09ffffffff
1427 // 10^34 - 1 = 0x0001ed09bead87c0378d8e63ffffffff
1428 if (C1_hi == 0x0000314dc6448d93ull
1429 && C1_lo == 0x38c15b09ffffffffull) {
1430 // make C1 = 10^34 - 1
1431 C1_hi = 0x0001ed09bead87c0ull;
1432 C1_lo = 0x378d8e63ffffffffull;
1433 x_exp = x_exp - EXP_P1;
1435 } else {
1436 ; // the result is already correct
1439 // set the inexact flag
1440 *pfpsf |= INEXACT_EXCEPTION;
1441 // assemble the result
1442 res.w[1] = x_sign | x_exp | C1_hi;
1443 res.w[0] = C1_lo;
1444 } else { // if C2 > halfulp128 ||
1445 // (C2 == halfulp128 && q1 == P34 && ((C1 & 0x1) == 1)), i.e.
1446 // 1/2 ulp(n1) < n2 < 1 ulp(n1) or n2 = 1/2 ulp(n1) and C1 odd
1447 // res = x+1 ulp if n1*n2 > 0 and res = x-1 ulp if n1*n2 < 0
1448 if (q1 < P34) { // then 1 ulp = 10^(e1+q1-P34) < 10^e1
1449 // Note: if (q1 == P34) then 1 ulp = 10^(e1+q1-P34) = 10^e1
1450 // because q1 < P34 we must first replace C1 by C1*10^(P34-q1),
1451 // and must decrease the exponent by (P34-q1) (it will still be
1452 // at least e_min)
1453 scale = P34 - q1;
1454 if (q1 <= 19) { // C1 fits in 64 bits
1455 // 1 <= q1 <= 19 => 15 <= scale <= 33
1456 if (scale <= 19) { // 10^scale fits in 64 bits
1457 __mul_64x64_to_128MACH (C1, ten2k64[scale], C1_lo);
1458 } else { // if 20 <= scale <= 33
1459 // C1 * 10^scale = (C1 * 10^(scale-19)) * 10^19 where
1460 // (C1 * 10^(scale-19)) fits in 64 bits
1461 C1_lo = C1_lo * ten2k64[scale - 19];
1462 __mul_64x64_to_128MACH (C1, ten2k64[19], C1_lo);
1464 } else { //if 20 <= q1 <= 33=P34-1 then C1 fits only in 128 bits
1465 // => 1 <= P34 - q1 <= 14 so 10^(P34-q1) fits in 64 bits
1466 C1.w[1] = C1_hi;
1467 C1.w[0] = C1_lo;
1468 // C1 = ten2k64[P34 - q1] * C1
1469 __mul_128x64_to_128 (C1, ten2k64[P34 - q1], C1);
1471 C1_hi = C1.w[1];
1472 C1_lo = C1.w[0];
1473 x_exp = x_exp - ((UINT64) scale << 49);
1475 if ((rnd_mode == ROUNDING_TO_NEAREST && x_sign != y_sign)
1476 || (rnd_mode == ROUNDING_TIES_AWAY && x_sign != y_sign
1477 && (C2_hi != halfulp128.w[1]
1478 || C2_lo != halfulp128.w[0]))
1479 || (rnd_mode == ROUNDING_DOWN && !x_sign && y_sign)
1480 || (rnd_mode == ROUNDING_UP && x_sign && !y_sign)
1481 || (rnd_mode == ROUNDING_TO_ZERO
1482 && x_sign != y_sign)) {
1483 // the result is x - 1
1484 // for RN n1 * n2 < 0; underflow not possible
1485 C1_lo = C1_lo - 1;
1486 if (C1_lo == 0xffffffffffffffffull)
1487 C1_hi--;
1488 // check if we crossed into the lower decade
1489 if (C1_hi == 0x0000314dc6448d93ull && C1_lo == 0x38c15b09ffffffffull) { // 10^33 - 1
1490 C1_hi = 0x0001ed09bead87c0ull; // 10^34 - 1
1491 C1_lo = 0x378d8e63ffffffffull;
1492 x_exp = x_exp - EXP_P1; // no underflow, because n1 >> n2
1494 } else
1495 if ((rnd_mode == ROUNDING_TO_NEAREST
1496 && x_sign == y_sign)
1497 || (rnd_mode == ROUNDING_TIES_AWAY
1498 && x_sign == y_sign)
1499 || (rnd_mode == ROUNDING_DOWN && x_sign && y_sign)
1500 || (rnd_mode == ROUNDING_UP && !x_sign
1501 && !y_sign)) {
1502 // the result is x + 1
1503 // for RN x_sign = y_sign, i.e. n1*n2 > 0
1504 C1_lo = C1_lo + 1;
1505 if (C1_lo == 0) { // rounding overflow in the low 64 bits
1506 C1_hi = C1_hi + 1;
1508 if (C1_hi == 0x0001ed09bead87c0ull
1509 && C1_lo == 0x378d8e6400000000ull) {
1510 // C1 = 10^34 => rounding overflow
1511 C1_hi = 0x0000314dc6448d93ull;
1512 C1_lo = 0x38c15b0a00000000ull; // 10^33
1513 x_exp = x_exp + EXP_P1;
1514 if (x_exp == EXP_MAX_P1) { // overflow
1515 C1_hi = 0x7800000000000000ull; // +inf
1516 C1_lo = 0x0ull;
1517 x_exp = 0; // x_sign is preserved
1518 // set the overflow flag
1519 *pfpsf |= OVERFLOW_EXCEPTION;
1522 } else {
1523 ; // the result is x
1525 // set the inexact flag
1526 *pfpsf |= INEXACT_EXCEPTION;
1527 // assemble the result
1528 res.w[1] = x_sign | x_exp | C1_hi;
1529 res.w[0] = C1_lo;
1531 } // end q1 >= 20
1532 // end case where C1 != 10^(q1-1)
1533 } else { // C1 = 10^(q1-1) and x_sign != y_sign
1534 // instead of C' = (C1 * 10^(e1-e2) + C2)rnd,P34
1535 // calculate C' = C1 * 10^(e1-e2-x1) + (C2 * 10^(-x1))rnd,P34
1536 // where x1 = q2 - 1, 0 <= x1 <= P34 - 1
1537 // Because C1 = 10^(q1-1) and x_sign != y_sign, C' will have P34
1538 // digits and n = C' * 10^(e2+x1)
1539 // If the result has P34+1 digits, redo the steps above with x1+1
1540 // If the result has P34-1 digits or less, redo the steps above with
1541 // x1-1 but only if initially x1 >= 1
1542 // NOTE: these two steps can be improved, e.g we could guess if
1543 // P34+1 or P34-1 digits will be obtained by adding/subtracting
1544 // just the top 64 bits of the two operands
1545 // The result cannot be zero, and it cannot overflow
1546 x1 = q2 - 1; // 0 <= x1 <= P34-1
1547 // Calculate C1 * 10^(e1-e2-x1) where 1 <= e1-e2-x1 <= P34
1548 // scale = (int)(e1 >> 49) - (int)(e2 >> 49) - x1; 0 <= scale <= P34-1
1549 scale = P34 - q1 + 1; // scale=e1-e2-x1 = P34+1-q1; 1<=scale<=P34
1550 // either C1 or 10^(e1-e2-x1) may not fit is 64 bits,
1551 // but their product fits with certainty in 128 bits
1552 if (scale >= 20) { //10^(e1-e2-x1) doesn't fit in 64 bits, but C1 does
1553 __mul_128x64_to_128 (C1, C1_lo, ten2k128[scale - 20]);
1554 } else { // if (scale >= 1
1555 // if 1 <= scale <= 19 then 10^(e1-e2-x1) fits in 64 bits
1556 if (q1 <= 19) { // C1 fits in 64 bits
1557 __mul_64x64_to_128MACH (C1, C1_lo, ten2k64[scale]);
1558 } else { // q1 >= 20
1559 C1.w[1] = C1_hi;
1560 C1.w[0] = C1_lo;
1561 __mul_128x64_to_128 (C1, ten2k64[scale], C1);
1564 tmp64 = C1.w[0]; // C1.w[1], C1.w[0] contains C1 * 10^(e1-e2-x1)
1566 // now round C2 to q2-x1 = 1 decimal digit
1567 // C2' = C2 + 1/2 * 10^x1 = C2 + 5 * 10^(x1-1)
1568 ind = x1 - 1; // -1 <= ind <= P34 - 2
1569 if (ind >= 0) { // if (x1 >= 1)
1570 C2.w[0] = C2_lo;
1571 C2.w[1] = C2_hi;
1572 if (ind <= 18) {
1573 C2.w[0] = C2.w[0] + midpoint64[ind];
1574 if (C2.w[0] < C2_lo)
1575 C2.w[1]++;
1576 } else { // 19 <= ind <= 32
1577 C2.w[0] = C2.w[0] + midpoint128[ind - 19].w[0];
1578 C2.w[1] = C2.w[1] + midpoint128[ind - 19].w[1];
1579 if (C2.w[0] < C2_lo)
1580 C2.w[1]++;
1582 // the approximation of 10^(-x1) was rounded up to 118 bits
1583 __mul_128x128_to_256 (R256, C2, ten2mk128[ind]); // R256 = C2*, f2*
1584 // calculate C2* and f2*
1585 // C2* is actually floor(C2*) in this case
1586 // C2* and f2* need shifting and masking, as shown by
1587 // shiftright128[] and maskhigh128[]
1588 // the top Ex bits of 10^(-x1) are T* = ten2mk128trunc[ind], e.g.
1589 // if x1=1, T*=ten2mk128trunc[0]=0x19999999999999999999999999999999
1590 // if (0 < f2* < 10^(-x1)) then
1591 // if floor(C1+C2*) is even then C2* = floor(C2*) - logical right
1592 // shift; C2* has p decimal digits, correct by Prop. 1)
1593 // else if floor(C1+C2*) is odd C2* = floor(C2*)-1 (logical right
1594 // shift; C2* has p decimal digits, correct by Pr. 1)
1595 // else
1596 // C2* = floor(C2*) (logical right shift; C has p decimal digits,
1597 // correct by Property 1)
1598 // n = C2* * 10^(e2+x1)
1600 if (ind <= 2) {
1601 highf2star.w[1] = 0x0;
1602 highf2star.w[0] = 0x0; // low f2* ok
1603 } else if (ind <= 21) {
1604 highf2star.w[1] = 0x0;
1605 highf2star.w[0] = R256.w[2] & maskhigh128[ind]; // low f2* ok
1606 } else {
1607 highf2star.w[1] = R256.w[3] & maskhigh128[ind];
1608 highf2star.w[0] = R256.w[2]; // low f2* is ok
1610 // shift right C2* by Ex-128 = shiftright128[ind]
1611 if (ind >= 3) {
1612 shift = shiftright128[ind];
1613 if (shift < 64) { // 3 <= shift <= 63
1614 R256.w[2] =
1615 (R256.w[2] >> shift) | (R256.w[3] << (64 - shift));
1616 R256.w[3] = (R256.w[3] >> shift);
1617 } else { // 66 <= shift <= 102
1618 R256.w[2] = (R256.w[3] >> (shift - 64));
1619 R256.w[3] = 0x0ULL;
1622 // redundant
1623 is_inexact_lt_midpoint = 0;
1624 is_inexact_gt_midpoint = 0;
1625 is_midpoint_lt_even = 0;
1626 is_midpoint_gt_even = 0;
1627 // determine inexactness of the rounding of C2*
1628 // (cannot be followed by a second rounding)
1629 // if (0 < f2* - 1/2 < 10^(-x1)) then
1630 // the result is exact
1631 // else (if f2* - 1/2 > T* then)
1632 // the result of is inexact
1633 if (ind <= 2) {
1634 if (R256.w[1] > 0x8000000000000000ull ||
1635 (R256.w[1] == 0x8000000000000000ull
1636 && R256.w[0] > 0x0ull)) {
1637 // f2* > 1/2 and the result may be exact
1638 tmp64A = R256.w[1] - 0x8000000000000000ull; // f* - 1/2
1639 if ((tmp64A > ten2mk128trunc[ind].w[1]
1640 || (tmp64A == ten2mk128trunc[ind].w[1]
1641 && R256.w[0] >= ten2mk128trunc[ind].w[0]))) {
1642 // set the inexact flag
1643 *pfpsf |= INEXACT_EXCEPTION;
1644 // this rounding is applied to C2 only!
1645 // x_sign != y_sign
1646 is_inexact_gt_midpoint = 1;
1647 } // else the result is exact
1648 // rounding down, unless a midpoint in [ODD, EVEN]
1649 } else { // the result is inexact; f2* <= 1/2
1650 // set the inexact flag
1651 *pfpsf |= INEXACT_EXCEPTION;
1652 // this rounding is applied to C2 only!
1653 // x_sign != y_sign
1654 is_inexact_lt_midpoint = 1;
1656 } else if (ind <= 21) { // if 3 <= ind <= 21
1657 if (highf2star.w[1] > 0x0 || (highf2star.w[1] == 0x0
1658 && highf2star.w[0] >
1659 onehalf128[ind])
1660 || (highf2star.w[1] == 0x0
1661 && highf2star.w[0] == onehalf128[ind]
1662 && (R256.w[1] || R256.w[0]))) {
1663 // f2* > 1/2 and the result may be exact
1664 // Calculate f2* - 1/2
1665 tmp64A = highf2star.w[0] - onehalf128[ind];
1666 tmp64B = highf2star.w[1];
1667 if (tmp64A > highf2star.w[0])
1668 tmp64B--;
1669 if (tmp64B || tmp64A
1670 || R256.w[1] > ten2mk128trunc[ind].w[1]
1671 || (R256.w[1] == ten2mk128trunc[ind].w[1]
1672 && R256.w[0] > ten2mk128trunc[ind].w[0])) {
1673 // set the inexact flag
1674 *pfpsf |= INEXACT_EXCEPTION;
1675 // this rounding is applied to C2 only!
1676 // x_sign != y_sign
1677 is_inexact_gt_midpoint = 1;
1678 } // else the result is exact
1679 } else { // the result is inexact; f2* <= 1/2
1680 // set the inexact flag
1681 *pfpsf |= INEXACT_EXCEPTION;
1682 // this rounding is applied to C2 only!
1683 // x_sign != y_sign
1684 is_inexact_lt_midpoint = 1;
1686 } else { // if 22 <= ind <= 33
1687 if (highf2star.w[1] > onehalf128[ind]
1688 || (highf2star.w[1] == onehalf128[ind]
1689 && (highf2star.w[0] || R256.w[1]
1690 || R256.w[0]))) {
1691 // f2* > 1/2 and the result may be exact
1692 // Calculate f2* - 1/2
1693 // tmp64A = highf2star.w[0];
1694 tmp64B = highf2star.w[1] - onehalf128[ind];
1695 if (tmp64B || highf2star.w[0]
1696 || R256.w[1] > ten2mk128trunc[ind].w[1]
1697 || (R256.w[1] == ten2mk128trunc[ind].w[1]
1698 && R256.w[0] > ten2mk128trunc[ind].w[0])) {
1699 // set the inexact flag
1700 *pfpsf |= INEXACT_EXCEPTION;
1701 // this rounding is applied to C2 only!
1702 // x_sign != y_sign
1703 is_inexact_gt_midpoint = 1;
1704 } // else the result is exact
1705 } else { // the result is inexact; f2* <= 1/2
1706 // set the inexact flag
1707 *pfpsf |= INEXACT_EXCEPTION;
1708 // this rounding is applied to C2 only!
1709 // x_sign != y_sign
1710 is_inexact_lt_midpoint = 1;
1713 // check for midpoints after determining inexactness
1714 if ((R256.w[1] || R256.w[0]) && (highf2star.w[1] == 0)
1715 && (highf2star.w[0] == 0)
1716 && (R256.w[1] < ten2mk128trunc[ind].w[1]
1717 || (R256.w[1] == ten2mk128trunc[ind].w[1]
1718 && R256.w[0] <= ten2mk128trunc[ind].w[0]))) {
1719 // the result is a midpoint
1720 if ((tmp64 + R256.w[2]) & 0x01) { // MP in [EVEN, ODD]
1721 // if floor(C2*) is odd C = floor(C2*) - 1; the result may be 0
1722 R256.w[2]--;
1723 if (R256.w[2] == 0xffffffffffffffffull)
1724 R256.w[3]--;
1725 // this rounding is applied to C2 only!
1726 // x_sign != y_sign
1727 is_midpoint_lt_even = 1;
1728 is_inexact_lt_midpoint = 0;
1729 is_inexact_gt_midpoint = 0;
1730 } else {
1731 // else MP in [ODD, EVEN]
1732 // this rounding is applied to C2 only!
1733 // x_sign != y_sign
1734 is_midpoint_gt_even = 1;
1735 is_inexact_lt_midpoint = 0;
1736 is_inexact_gt_midpoint = 0;
1739 } else { // if (ind == -1) only when x1 = 0
1740 R256.w[2] = C2_lo;
1741 R256.w[3] = C2_hi;
1742 is_midpoint_lt_even = 0;
1743 is_midpoint_gt_even = 0;
1744 is_inexact_lt_midpoint = 0;
1745 is_inexact_gt_midpoint = 0;
1747 // and now subtract C1 * 10^(e1-e2-x1) - (C2 * 10^(-x1))rnd,P34
1748 // because x_sign != y_sign this last operation is exact
1749 C1.w[0] = C1.w[0] - R256.w[2];
1750 C1.w[1] = C1.w[1] - R256.w[3];
1751 if (C1.w[0] > tmp64)
1752 C1.w[1]--; // borrow
1753 if (C1.w[1] >= 0x8000000000000000ull) { // negative coefficient!
1754 C1.w[0] = ~C1.w[0];
1755 C1.w[0]++;
1756 C1.w[1] = ~C1.w[1];
1757 if (C1.w[0] == 0x0)
1758 C1.w[1]++;
1759 tmp_sign = y_sign; // the result will have the sign of y
1760 } else {
1761 tmp_sign = x_sign;
1763 // the difference has exactly P34 digits
1764 x_sign = tmp_sign;
1765 if (x1 >= 1)
1766 y_exp = y_exp + ((UINT64) x1 << 49);
1767 C1_hi = C1.w[1];
1768 C1_lo = C1.w[0];
1769 // general correction from RN to RA, RM, RP, RZ; result uses y_exp
1770 if (rnd_mode != ROUNDING_TO_NEAREST) {
1771 if ((!x_sign
1772 && ((rnd_mode == ROUNDING_UP && is_inexact_lt_midpoint)
1774 ((rnd_mode == ROUNDING_TIES_AWAY
1775 || rnd_mode == ROUNDING_UP)
1776 && is_midpoint_gt_even))) || (x_sign
1778 ((rnd_mode ==
1779 ROUNDING_DOWN
1781 is_inexact_lt_midpoint)
1783 ((rnd_mode ==
1784 ROUNDING_TIES_AWAY
1785 || rnd_mode ==
1786 ROUNDING_DOWN)
1788 is_midpoint_gt_even))))
1790 // C1 = C1 + 1
1791 C1_lo = C1_lo + 1;
1792 if (C1_lo == 0) { // rounding overflow in the low 64 bits
1793 C1_hi = C1_hi + 1;
1795 if (C1_hi == 0x0001ed09bead87c0ull
1796 && C1_lo == 0x378d8e6400000000ull) {
1797 // C1 = 10^34 => rounding overflow
1798 C1_hi = 0x0000314dc6448d93ull;
1799 C1_lo = 0x38c15b0a00000000ull; // 10^33
1800 y_exp = y_exp + EXP_P1;
1802 } else if ((is_midpoint_lt_even || is_inexact_gt_midpoint)
1804 ((x_sign
1805 && (rnd_mode == ROUNDING_UP
1806 || rnd_mode == ROUNDING_TO_ZERO))
1807 || (!x_sign
1808 && (rnd_mode == ROUNDING_DOWN
1809 || rnd_mode == ROUNDING_TO_ZERO)))) {
1810 // C1 = C1 - 1
1811 C1_lo = C1_lo - 1;
1812 if (C1_lo == 0xffffffffffffffffull)
1813 C1_hi--;
1814 // check if we crossed into the lower decade
1815 if (C1_hi == 0x0000314dc6448d93ull && C1_lo == 0x38c15b09ffffffffull) { // 10^33 - 1
1816 C1_hi = 0x0001ed09bead87c0ull; // 10^34 - 1
1817 C1_lo = 0x378d8e63ffffffffull;
1818 y_exp = y_exp - EXP_P1;
1819 // no underflow, because delta + q2 >= P34 + 1
1821 } else {
1822 ; // exact, the result is already correct
1825 // assemble the result
1826 res.w[1] = x_sign | y_exp | C1_hi;
1827 res.w[0] = C1_lo;
1829 } // end delta = P34
1830 } else { // if (|delta| <= P34 - 1)
1831 if (delta >= 0) { // if (0 <= delta <= P34 - 1)
1832 if (delta <= P34 - 1 - q2) {
1833 // calculate C' directly; the result is exact
1834 // in this case 1<=q1<=P34-1, 1<=q2<=P34-1 and 0 <= e1-e2 <= P34-2
1835 // The coefficient of the result is C1 * 10^(e1-e2) + C2 and the
1836 // exponent is e2; either C1 or 10^(e1-e2) may not fit is 64 bits,
1837 // but their product fits with certainty in 128 bits (actually in 113)
1838 scale = delta - q1 + q2; // scale = (int)(e1 >> 49) - (int)(e2 >> 49)
1840 if (scale >= 20) { // 10^(e1-e2) does not fit in 64 bits, but C1 does
1841 __mul_128x64_to_128 (C1, C1_lo, ten2k128[scale - 20]);
1842 C1_hi = C1.w[1];
1843 C1_lo = C1.w[0];
1844 } else if (scale >= 1) {
1845 // if 1 <= scale <= 19 then 10^(e1-e2) fits in 64 bits
1846 if (q1 <= 19) { // C1 fits in 64 bits
1847 __mul_64x64_to_128MACH (C1, C1_lo, ten2k64[scale]);
1848 } else { // q1 >= 20
1849 C1.w[1] = C1_hi;
1850 C1.w[0] = C1_lo;
1851 __mul_128x64_to_128 (C1, ten2k64[scale], C1);
1853 C1_hi = C1.w[1];
1854 C1_lo = C1.w[0];
1855 } else { // if (scale == 0) C1 is unchanged
1856 C1.w[0] = C1_lo; // C1.w[1] = C1_hi;
1858 // now add C2
1859 if (x_sign == y_sign) {
1860 // the result cannot overflow
1861 C1_lo = C1_lo + C2_lo;
1862 C1_hi = C1_hi + C2_hi;
1863 if (C1_lo < C1.w[0])
1864 C1_hi++;
1865 } else { // if x_sign != y_sign
1866 C1_lo = C1_lo - C2_lo;
1867 C1_hi = C1_hi - C2_hi;
1868 if (C1_lo > C1.w[0])
1869 C1_hi--;
1870 // the result can be zero, but it cannot overflow
1871 if (C1_lo == 0 && C1_hi == 0) {
1872 // assemble the result
1873 if (x_exp < y_exp)
1874 res.w[1] = x_exp;
1875 else
1876 res.w[1] = y_exp;
1877 res.w[0] = 0;
1878 if (rnd_mode == ROUNDING_DOWN) {
1879 res.w[1] |= 0x8000000000000000ull;
1881 BID_SWAP128 (res);
1882 BID_RETURN (res);
1884 if (C1_hi >= 0x8000000000000000ull) { // negative coefficient!
1885 C1_lo = ~C1_lo;
1886 C1_lo++;
1887 C1_hi = ~C1_hi;
1888 if (C1_lo == 0x0)
1889 C1_hi++;
1890 x_sign = y_sign; // the result will have the sign of y
1893 // assemble the result
1894 res.w[1] = x_sign | y_exp | C1_hi;
1895 res.w[0] = C1_lo;
1896 } else if (delta == P34 - q2) {
1897 // calculate C' directly; the result may be inexact if it requires
1898 // P34+1 decimal digits; in this case the 'cutoff' point for addition
1899 // is at the position of the lsb of C2, so 0 <= e1-e2 <= P34-1
1900 // The coefficient of the result is C1 * 10^(e1-e2) + C2 and the
1901 // exponent is e2; either C1 or 10^(e1-e2) may not fit is 64 bits,
1902 // but their product fits with certainty in 128 bits (actually in 113)
1903 scale = delta - q1 + q2; // scale = (int)(e1 >> 49) - (int)(e2 >> 49)
1904 if (scale >= 20) { // 10^(e1-e2) does not fit in 64 bits, but C1 does
1905 __mul_128x64_to_128 (C1, C1_lo, ten2k128[scale - 20]);
1906 } else if (scale >= 1) {
1907 // if 1 <= scale <= 19 then 10^(e1-e2) fits in 64 bits
1908 if (q1 <= 19) { // C1 fits in 64 bits
1909 __mul_64x64_to_128MACH (C1, C1_lo, ten2k64[scale]);
1910 } else { // q1 >= 20
1911 C1.w[1] = C1_hi;
1912 C1.w[0] = C1_lo;
1913 __mul_128x64_to_128 (C1, ten2k64[scale], C1);
1915 } else { // if (scale == 0) C1 is unchanged
1916 C1.w[1] = C1_hi;
1917 C1.w[0] = C1_lo; // only the low part is necessary
1919 C1_hi = C1.w[1];
1920 C1_lo = C1.w[0];
1921 // now add C2
1922 if (x_sign == y_sign) {
1923 // the result can overflow!
1924 C1_lo = C1_lo + C2_lo;
1925 C1_hi = C1_hi + C2_hi;
1926 if (C1_lo < C1.w[0])
1927 C1_hi++;
1928 // test for overflow, possible only when C1 >= 10^34
1929 if (C1_hi > 0x0001ed09bead87c0ull || (C1_hi == 0x0001ed09bead87c0ull && C1_lo >= 0x378d8e6400000000ull)) { // C1 >= 10^34
1930 // in this case q = P34 + 1 and x = q - P34 = 1, so multiply
1931 // C'' = C'+ 5 = C1 + 5 by k1 ~ 10^(-1) calculated for P34 + 1
1932 // decimal digits
1933 // Calculate C'' = C' + 1/2 * 10^x
1934 if (C1_lo >= 0xfffffffffffffffbull) { // low half add has carry
1935 C1_lo = C1_lo + 5;
1936 C1_hi = C1_hi + 1;
1937 } else {
1938 C1_lo = C1_lo + 5;
1940 // the approximation of 10^(-1) was rounded up to 118 bits
1941 // 10^(-1) =~ 33333333333333333333333333333400 * 2^-129
1942 // 10^(-1) =~ 19999999999999999999999999999a00 * 2^-128
1943 C1.w[1] = C1_hi;
1944 C1.w[0] = C1_lo; // C''
1945 ten2m1.w[1] = 0x1999999999999999ull;
1946 ten2m1.w[0] = 0x9999999999999a00ull;
1947 __mul_128x128_to_256 (P256, C1, ten2m1); // P256 = C*, f*
1948 // C* is actually floor(C*) in this case
1949 // the top Ex = 128 bits of 10^(-1) are
1950 // T* = 0x00199999999999999999999999999999
1951 // if (0 < f* < 10^(-x)) then
1952 // if floor(C*) is even then C = floor(C*) - logical right
1953 // shift; C has p decimal digits, correct by Prop. 1)
1954 // else if floor(C*) is odd C = floor(C*) - 1 (logical right
1955 // shift; C has p decimal digits, correct by Pr. 1)
1956 // else
1957 // C = floor(C*) (logical right shift; C has p decimal digits,
1958 // correct by Property 1)
1959 // n = C * 10^(e2+x)
1960 if ((P256.w[1] || P256.w[0])
1961 && (P256.w[1] < 0x1999999999999999ull
1962 || (P256.w[1] == 0x1999999999999999ull
1963 && P256.w[0] <= 0x9999999999999999ull))) {
1964 // the result is a midpoint
1965 if (P256.w[2] & 0x01) {
1966 is_midpoint_gt_even = 1;
1967 // if floor(C*) is odd C = floor(C*) - 1; the result is not 0
1968 P256.w[2]--;
1969 if (P256.w[2] == 0xffffffffffffffffull)
1970 P256.w[3]--;
1971 } else {
1972 is_midpoint_lt_even = 1;
1975 // n = Cstar * 10^(e2+1)
1976 y_exp = y_exp + EXP_P1;
1977 // C* != 10^P because C* has P34 digits
1978 // check for overflow
1979 if (y_exp == EXP_MAX_P1
1980 && (rnd_mode == ROUNDING_TO_NEAREST
1981 || rnd_mode == ROUNDING_TIES_AWAY)) {
1982 // overflow for RN
1983 res.w[1] = x_sign | 0x7800000000000000ull; // +/-inf
1984 res.w[0] = 0x0ull;
1985 // set the inexact flag
1986 *pfpsf |= INEXACT_EXCEPTION;
1987 // set the overflow flag
1988 *pfpsf |= OVERFLOW_EXCEPTION;
1989 BID_SWAP128 (res);
1990 BID_RETURN (res);
1992 // if (0 < f* - 1/2 < 10^(-x)) then
1993 // the result of the addition is exact
1994 // else
1995 // the result of the addition is inexact
1996 if (P256.w[1] > 0x8000000000000000ull || (P256.w[1] == 0x8000000000000000ull && P256.w[0] > 0x0ull)) { // the result may be exact
1997 tmp64 = P256.w[1] - 0x8000000000000000ull; // f* - 1/2
1998 if ((tmp64 > 0x1999999999999999ull
1999 || (tmp64 == 0x1999999999999999ull
2000 && P256.w[0] >= 0x9999999999999999ull))) {
2001 // set the inexact flag
2002 *pfpsf |= INEXACT_EXCEPTION;
2003 is_inexact = 1;
2004 } // else the result is exact
2005 } else { // the result is inexact
2006 // set the inexact flag
2007 *pfpsf |= INEXACT_EXCEPTION;
2008 is_inexact = 1;
2010 C1_hi = P256.w[3];
2011 C1_lo = P256.w[2];
2012 if (!is_midpoint_gt_even && !is_midpoint_lt_even) {
2013 is_inexact_lt_midpoint = is_inexact
2014 && (P256.w[1] & 0x8000000000000000ull);
2015 is_inexact_gt_midpoint = is_inexact
2016 && !(P256.w[1] & 0x8000000000000000ull);
2018 // general correction from RN to RA, RM, RP, RZ;
2019 // result uses y_exp
2020 if (rnd_mode != ROUNDING_TO_NEAREST) {
2021 if ((!x_sign
2023 ((rnd_mode == ROUNDING_UP
2024 && is_inexact_lt_midpoint)
2026 ((rnd_mode == ROUNDING_TIES_AWAY
2027 || rnd_mode == ROUNDING_UP)
2028 && is_midpoint_gt_even))) || (x_sign
2030 ((rnd_mode ==
2031 ROUNDING_DOWN
2033 is_inexact_lt_midpoint)
2035 ((rnd_mode ==
2036 ROUNDING_TIES_AWAY
2037 || rnd_mode ==
2038 ROUNDING_DOWN)
2040 is_midpoint_gt_even))))
2042 // C1 = C1 + 1
2043 C1_lo = C1_lo + 1;
2044 if (C1_lo == 0) { // rounding overflow in the low 64 bits
2045 C1_hi = C1_hi + 1;
2047 if (C1_hi == 0x0001ed09bead87c0ull
2048 && C1_lo == 0x378d8e6400000000ull) {
2049 // C1 = 10^34 => rounding overflow
2050 C1_hi = 0x0000314dc6448d93ull;
2051 C1_lo = 0x38c15b0a00000000ull; // 10^33
2052 y_exp = y_exp + EXP_P1;
2054 } else
2055 if ((is_midpoint_lt_even || is_inexact_gt_midpoint)
2057 ((x_sign
2058 && (rnd_mode == ROUNDING_UP
2059 || rnd_mode == ROUNDING_TO_ZERO))
2060 || (!x_sign
2061 && (rnd_mode == ROUNDING_DOWN
2062 || rnd_mode == ROUNDING_TO_ZERO)))) {
2063 // C1 = C1 - 1
2064 C1_lo = C1_lo - 1;
2065 if (C1_lo == 0xffffffffffffffffull)
2066 C1_hi--;
2067 // check if we crossed into the lower decade
2068 if (C1_hi == 0x0000314dc6448d93ull && C1_lo == 0x38c15b09ffffffffull) { // 10^33 - 1
2069 C1_hi = 0x0001ed09bead87c0ull; // 10^34 - 1
2070 C1_lo = 0x378d8e63ffffffffull;
2071 y_exp = y_exp - EXP_P1;
2072 // no underflow, because delta + q2 >= P34 + 1
2074 } else {
2075 ; // exact, the result is already correct
2077 // in all cases check for overflow (RN and RA solved already)
2078 if (y_exp == EXP_MAX_P1) { // overflow
2079 if ((rnd_mode == ROUNDING_DOWN && x_sign) || // RM and res < 0
2080 (rnd_mode == ROUNDING_UP && !x_sign)) { // RP and res > 0
2081 C1_hi = 0x7800000000000000ull; // +inf
2082 C1_lo = 0x0ull;
2083 } else { // RM and res > 0, RP and res < 0, or RZ
2084 C1_hi = 0x5fffed09bead87c0ull;
2085 C1_lo = 0x378d8e63ffffffffull;
2087 y_exp = 0; // x_sign is preserved
2088 // set the inexact flag (in case the exact addition was exact)
2089 *pfpsf |= INEXACT_EXCEPTION;
2090 // set the overflow flag
2091 *pfpsf |= OVERFLOW_EXCEPTION;
2094 } // else if (C1 < 10^34) then C1 is the coeff.; the result is exact
2095 } else { // if x_sign != y_sign the result is exact
2096 C1_lo = C1_lo - C2_lo;
2097 C1_hi = C1_hi - C2_hi;
2098 if (C1_lo > C1.w[0])
2099 C1_hi--;
2100 // the result can be zero, but it cannot overflow
2101 if (C1_lo == 0 && C1_hi == 0) {
2102 // assemble the result
2103 if (x_exp < y_exp)
2104 res.w[1] = x_exp;
2105 else
2106 res.w[1] = y_exp;
2107 res.w[0] = 0;
2108 if (rnd_mode == ROUNDING_DOWN) {
2109 res.w[1] |= 0x8000000000000000ull;
2111 BID_SWAP128 (res);
2112 BID_RETURN (res);
2114 if (C1_hi >= 0x8000000000000000ull) { // negative coefficient!
2115 C1_lo = ~C1_lo;
2116 C1_lo++;
2117 C1_hi = ~C1_hi;
2118 if (C1_lo == 0x0)
2119 C1_hi++;
2120 x_sign = y_sign; // the result will have the sign of y
2123 // assemble the result
2124 res.w[1] = x_sign | y_exp | C1_hi;
2125 res.w[0] = C1_lo;
2126 } else { // if (delta >= P34 + 1 - q2)
2127 // instead of C' = (C1 * 10^(e1-e2) + C2)rnd,P34
2128 // calculate C' = C1 * 10^(e1-e2-x1) + (C2 * 10^(-x1))rnd,P34
2129 // where x1 = q1 + e1 - e2 - P34, 1 <= x1 <= P34 - 1
2130 // In most cases C' will have P34 digits, and n = C' * 10^(e2+x1)
2131 // If the result has P34+1 digits, redo the steps above with x1+1
2132 // If the result has P34-1 digits or less, redo the steps above with
2133 // x1-1 but only if initially x1 >= 1
2134 // NOTE: these two steps can be improved, e.g we could guess if
2135 // P34+1 or P34-1 digits will be obtained by adding/subtracting just
2136 // the top 64 bits of the two operands
2137 // The result cannot be zero, but it can overflow
2138 x1 = delta + q2 - P34; // 1 <= x1 <= P34-1
2139 roundC2:
2140 // Calculate C1 * 10^(e1-e2-x1) where 0 <= e1-e2-x1 <= P34 - 1
2141 // scale = (int)(e1 >> 49) - (int)(e2 >> 49) - x1; 0 <= scale <= P34-1
2142 scale = delta - q1 + q2 - x1; // scale = e1 - e2 - x1 = P34 - q1
2143 // either C1 or 10^(e1-e2-x1) may not fit is 64 bits,
2144 // but their product fits with certainty in 128 bits (actually in 113)
2145 if (scale >= 20) { //10^(e1-e2-x1) doesn't fit in 64 bits, but C1 does
2146 __mul_128x64_to_128 (C1, C1_lo, ten2k128[scale - 20]);
2147 } else if (scale >= 1) {
2148 // if 1 <= scale <= 19 then 10^(e1-e2-x1) fits in 64 bits
2149 if (q1 <= 19) { // C1 fits in 64 bits
2150 __mul_64x64_to_128MACH (C1, C1_lo, ten2k64[scale]);
2151 } else { // q1 >= 20
2152 C1.w[1] = C1_hi;
2153 C1.w[0] = C1_lo;
2154 __mul_128x64_to_128 (C1, ten2k64[scale], C1);
2156 } else { // if (scale == 0) C1 is unchanged
2157 C1.w[1] = C1_hi;
2158 C1.w[0] = C1_lo;
2160 tmp64 = C1.w[0]; // C1.w[1], C1.w[0] contains C1 * 10^(e1-e2-x1)
2162 // now round C2 to q2-x1 decimal digits, where 1<=x1<=q2-1<=P34-1
2163 // (but if we got here a second time after x1 = x1 - 1, then
2164 // x1 >= 0; note that for x1 = 0 C2 is unchanged)
2165 // C2' = C2 + 1/2 * 10^x1 = C2 + 5 * 10^(x1-1)
2166 ind = x1 - 1; // 0 <= ind <= q2-2<=P34-2=32; but note that if x1 = 0
2167 // during a second pass, then ind = -1
2168 if (ind >= 0) { // if (x1 >= 1)
2169 C2.w[0] = C2_lo;
2170 C2.w[1] = C2_hi;
2171 if (ind <= 18) {
2172 C2.w[0] = C2.w[0] + midpoint64[ind];
2173 if (C2.w[0] < C2_lo)
2174 C2.w[1]++;
2175 } else { // 19 <= ind <= 32
2176 C2.w[0] = C2.w[0] + midpoint128[ind - 19].w[0];
2177 C2.w[1] = C2.w[1] + midpoint128[ind - 19].w[1];
2178 if (C2.w[0] < C2_lo)
2179 C2.w[1]++;
2181 // the approximation of 10^(-x1) was rounded up to 118 bits
2182 __mul_128x128_to_256 (R256, C2, ten2mk128[ind]); // R256 = C2*, f2*
2183 // calculate C2* and f2*
2184 // C2* is actually floor(C2*) in this case
2185 // C2* and f2* need shifting and masking, as shown by
2186 // shiftright128[] and maskhigh128[]
2187 // the top Ex bits of 10^(-x1) are T* = ten2mk128trunc[ind], e.g.
2188 // if x1=1, T*=ten2mk128trunc[0]=0x19999999999999999999999999999999
2189 // if (0 < f2* < 10^(-x1)) then
2190 // if floor(C1+C2*) is even then C2* = floor(C2*) - logical right
2191 // shift; C2* has p decimal digits, correct by Prop. 1)
2192 // else if floor(C1+C2*) is odd C2* = floor(C2*)-1 (logical right
2193 // shift; C2* has p decimal digits, correct by Pr. 1)
2194 // else
2195 // C2* = floor(C2*) (logical right shift; C has p decimal digits,
2196 // correct by Property 1)
2197 // n = C2* * 10^(e2+x1)
2199 if (ind <= 2) {
2200 highf2star.w[1] = 0x0;
2201 highf2star.w[0] = 0x0; // low f2* ok
2202 } else if (ind <= 21) {
2203 highf2star.w[1] = 0x0;
2204 highf2star.w[0] = R256.w[2] & maskhigh128[ind]; // low f2* ok
2205 } else {
2206 highf2star.w[1] = R256.w[3] & maskhigh128[ind];
2207 highf2star.w[0] = R256.w[2]; // low f2* is ok
2209 // shift right C2* by Ex-128 = shiftright128[ind]
2210 if (ind >= 3) {
2211 shift = shiftright128[ind];
2212 if (shift < 64) { // 3 <= shift <= 63
2213 R256.w[2] =
2214 (R256.w[2] >> shift) | (R256.w[3] << (64 - shift));
2215 R256.w[3] = (R256.w[3] >> shift);
2216 } else { // 66 <= shift <= 102
2217 R256.w[2] = (R256.w[3] >> (shift - 64));
2218 R256.w[3] = 0x0ULL;
2221 if (second_pass) {
2222 is_inexact_lt_midpoint = 0;
2223 is_inexact_gt_midpoint = 0;
2224 is_midpoint_lt_even = 0;
2225 is_midpoint_gt_even = 0;
2227 // determine inexactness of the rounding of C2* (this may be
2228 // followed by a second rounding only if we get P34+1
2229 // decimal digits)
2230 // if (0 < f2* - 1/2 < 10^(-x1)) then
2231 // the result is exact
2232 // else (if f2* - 1/2 > T* then)
2233 // the result of is inexact
2234 if (ind <= 2) {
2235 if (R256.w[1] > 0x8000000000000000ull ||
2236 (R256.w[1] == 0x8000000000000000ull
2237 && R256.w[0] > 0x0ull)) {
2238 // f2* > 1/2 and the result may be exact
2239 tmp64A = R256.w[1] - 0x8000000000000000ull; // f* - 1/2
2240 if ((tmp64A > ten2mk128trunc[ind].w[1]
2241 || (tmp64A == ten2mk128trunc[ind].w[1]
2242 && R256.w[0] >= ten2mk128trunc[ind].w[0]))) {
2243 // set the inexact flag
2244 // *pfpsf |= INEXACT_EXCEPTION;
2245 tmp_inexact = 1; // may be set again during a second pass
2246 // this rounding is applied to C2 only!
2247 if (x_sign == y_sign)
2248 is_inexact_lt_midpoint = 1;
2249 else // if (x_sign != y_sign)
2250 is_inexact_gt_midpoint = 1;
2251 } // else the result is exact
2252 // rounding down, unless a midpoint in [ODD, EVEN]
2253 } else { // the result is inexact; f2* <= 1/2
2254 // set the inexact flag
2255 // *pfpsf |= INEXACT_EXCEPTION;
2256 tmp_inexact = 1; // just in case we will round a second time
2257 // rounding up, unless a midpoint in [EVEN, ODD]
2258 // this rounding is applied to C2 only!
2259 if (x_sign == y_sign)
2260 is_inexact_gt_midpoint = 1;
2261 else // if (x_sign != y_sign)
2262 is_inexact_lt_midpoint = 1;
2264 } else if (ind <= 21) { // if 3 <= ind <= 21
2265 if (highf2star.w[1] > 0x0 || (highf2star.w[1] == 0x0
2266 && highf2star.w[0] >
2267 onehalf128[ind])
2268 || (highf2star.w[1] == 0x0
2269 && highf2star.w[0] == onehalf128[ind]
2270 && (R256.w[1] || R256.w[0]))) {
2271 // f2* > 1/2 and the result may be exact
2272 // Calculate f2* - 1/2
2273 tmp64A = highf2star.w[0] - onehalf128[ind];
2274 tmp64B = highf2star.w[1];
2275 if (tmp64A > highf2star.w[0])
2276 tmp64B--;
2277 if (tmp64B || tmp64A
2278 || R256.w[1] > ten2mk128trunc[ind].w[1]
2279 || (R256.w[1] == ten2mk128trunc[ind].w[1]
2280 && R256.w[0] > ten2mk128trunc[ind].w[0])) {
2281 // set the inexact flag
2282 // *pfpsf |= INEXACT_EXCEPTION;
2283 tmp_inexact = 1; // may be set again during a second pass
2284 // this rounding is applied to C2 only!
2285 if (x_sign == y_sign)
2286 is_inexact_lt_midpoint = 1;
2287 else // if (x_sign != y_sign)
2288 is_inexact_gt_midpoint = 1;
2289 } // else the result is exact
2290 } else { // the result is inexact; f2* <= 1/2
2291 // set the inexact flag
2292 // *pfpsf |= INEXACT_EXCEPTION;
2293 tmp_inexact = 1; // may be set again during a second pass
2294 // rounding up, unless a midpoint in [EVEN, ODD]
2295 // this rounding is applied to C2 only!
2296 if (x_sign == y_sign)
2297 is_inexact_gt_midpoint = 1;
2298 else // if (x_sign != y_sign)
2299 is_inexact_lt_midpoint = 1;
2301 } else { // if 22 <= ind <= 33
2302 if (highf2star.w[1] > onehalf128[ind]
2303 || (highf2star.w[1] == onehalf128[ind]
2304 && (highf2star.w[0] || R256.w[1]
2305 || R256.w[0]))) {
2306 // f2* > 1/2 and the result may be exact
2307 // Calculate f2* - 1/2
2308 // tmp64A = highf2star.w[0];
2309 tmp64B = highf2star.w[1] - onehalf128[ind];
2310 if (tmp64B || highf2star.w[0]
2311 || R256.w[1] > ten2mk128trunc[ind].w[1]
2312 || (R256.w[1] == ten2mk128trunc[ind].w[1]
2313 && R256.w[0] > ten2mk128trunc[ind].w[0])) {
2314 // set the inexact flag
2315 // *pfpsf |= INEXACT_EXCEPTION;
2316 tmp_inexact = 1; // may be set again during a second pass
2317 // this rounding is applied to C2 only!
2318 if (x_sign == y_sign)
2319 is_inexact_lt_midpoint = 1;
2320 else // if (x_sign != y_sign)
2321 is_inexact_gt_midpoint = 1;
2322 } // else the result is exact
2323 } else { // the result is inexact; f2* <= 1/2
2324 // set the inexact flag
2325 // *pfpsf |= INEXACT_EXCEPTION;
2326 tmp_inexact = 1; // may be set again during a second pass
2327 // rounding up, unless a midpoint in [EVEN, ODD]
2328 // this rounding is applied to C2 only!
2329 if (x_sign == y_sign)
2330 is_inexact_gt_midpoint = 1;
2331 else // if (x_sign != y_sign)
2332 is_inexact_lt_midpoint = 1;
2335 // check for midpoints
2336 if ((R256.w[1] || R256.w[0]) && (highf2star.w[1] == 0)
2337 && (highf2star.w[0] == 0)
2338 && (R256.w[1] < ten2mk128trunc[ind].w[1]
2339 || (R256.w[1] == ten2mk128trunc[ind].w[1]
2340 && R256.w[0] <= ten2mk128trunc[ind].w[0]))) {
2341 // the result is a midpoint
2342 if ((tmp64 + R256.w[2]) & 0x01) { // MP in [EVEN, ODD]
2343 // if floor(C2*) is odd C = floor(C2*) - 1; the result may be 0
2344 R256.w[2]--;
2345 if (R256.w[2] == 0xffffffffffffffffull)
2346 R256.w[3]--;
2347 // this rounding is applied to C2 only!
2348 if (x_sign == y_sign)
2349 is_midpoint_gt_even = 1;
2350 else // if (x_sign != y_sign)
2351 is_midpoint_lt_even = 1;
2352 is_inexact_lt_midpoint = 0;
2353 is_inexact_gt_midpoint = 0;
2354 } else {
2355 // else MP in [ODD, EVEN]
2356 // this rounding is applied to C2 only!
2357 if (x_sign == y_sign)
2358 is_midpoint_lt_even = 1;
2359 else // if (x_sign != y_sign)
2360 is_midpoint_gt_even = 1;
2361 is_inexact_lt_midpoint = 0;
2362 is_inexact_gt_midpoint = 0;
2365 // end if (ind >= 0)
2366 } else { // if (ind == -1); only during a 2nd pass, and when x1 = 0
2367 R256.w[2] = C2_lo;
2368 R256.w[3] = C2_hi;
2369 tmp_inexact = 0;
2370 // to correct a possible setting to 1 from 1st pass
2371 if (second_pass) {
2372 is_midpoint_lt_even = 0;
2373 is_midpoint_gt_even = 0;
2374 is_inexact_lt_midpoint = 0;
2375 is_inexact_gt_midpoint = 0;
2378 // and now add/subtract C1 * 10^(e1-e2-x1) +/- (C2 * 10^(-x1))rnd,P34
2379 if (x_sign == y_sign) { // addition; could overflow
2380 // no second pass is possible this way (only for x_sign != y_sign)
2381 C1.w[0] = C1.w[0] + R256.w[2];
2382 C1.w[1] = C1.w[1] + R256.w[3];
2383 if (C1.w[0] < tmp64)
2384 C1.w[1]++; // carry
2385 // if the sum has P34+1 digits, i.e. C1>=10^34 redo the calculation
2386 // with x1=x1+1
2387 if (C1.w[1] > 0x0001ed09bead87c0ull || (C1.w[1] == 0x0001ed09bead87c0ull && C1.w[0] >= 0x378d8e6400000000ull)) { // C1 >= 10^34
2388 // chop off one more digit from the sum, but make sure there is
2389 // no double-rounding error (see table - double rounding logic)
2390 // now round C1 from P34+1 to P34 decimal digits
2391 // C1' = C1 + 1/2 * 10 = C1 + 5
2392 if (C1.w[0] >= 0xfffffffffffffffbull) { // low half add has carry
2393 C1.w[0] = C1.w[0] + 5;
2394 C1.w[1] = C1.w[1] + 1;
2395 } else {
2396 C1.w[0] = C1.w[0] + 5;
2398 // the approximation of 10^(-1) was rounded up to 118 bits
2399 __mul_128x128_to_256 (Q256, C1, ten2mk128[0]); // Q256 = C1*, f1*
2400 // C1* is actually floor(C1*) in this case
2401 // the top 128 bits of 10^(-1) are
2402 // T* = ten2mk128trunc[0]=0x19999999999999999999999999999999
2403 // if (0 < f1* < 10^(-1)) then
2404 // if floor(C1*) is even then C1* = floor(C1*) - logical right
2405 // shift; C1* has p decimal digits, correct by Prop. 1)
2406 // else if floor(C1*) is odd C1* = floor(C1*) - 1 (logical right
2407 // shift; C1* has p decimal digits, correct by Pr. 1)
2408 // else
2409 // C1* = floor(C1*) (logical right shift; C has p decimal digits
2410 // correct by Property 1)
2411 // n = C1* * 10^(e2+x1+1)
2412 if ((Q256.w[1] || Q256.w[0])
2413 && (Q256.w[1] < ten2mk128trunc[0].w[1]
2414 || (Q256.w[1] == ten2mk128trunc[0].w[1]
2415 && Q256.w[0] <= ten2mk128trunc[0].w[0]))) {
2416 // the result is a midpoint
2417 if (is_inexact_lt_midpoint) { // for the 1st rounding
2418 is_inexact_gt_midpoint = 1;
2419 is_inexact_lt_midpoint = 0;
2420 is_midpoint_gt_even = 0;
2421 is_midpoint_lt_even = 0;
2422 } else if (is_inexact_gt_midpoint) { // for the 1st rounding
2423 Q256.w[2]--;
2424 if (Q256.w[2] == 0xffffffffffffffffull)
2425 Q256.w[3]--;
2426 is_inexact_gt_midpoint = 0;
2427 is_inexact_lt_midpoint = 1;
2428 is_midpoint_gt_even = 0;
2429 is_midpoint_lt_even = 0;
2430 } else if (is_midpoint_gt_even) { // for the 1st rounding
2431 // Note: cannot have is_midpoint_lt_even
2432 is_inexact_gt_midpoint = 0;
2433 is_inexact_lt_midpoint = 1;
2434 is_midpoint_gt_even = 0;
2435 is_midpoint_lt_even = 0;
2436 } else { // the first rounding must have been exact
2437 if (Q256.w[2] & 0x01) { // MP in [EVEN, ODD]
2438 // the truncated result is correct
2439 Q256.w[2]--;
2440 if (Q256.w[2] == 0xffffffffffffffffull)
2441 Q256.w[3]--;
2442 is_inexact_gt_midpoint = 0;
2443 is_inexact_lt_midpoint = 0;
2444 is_midpoint_gt_even = 1;
2445 is_midpoint_lt_even = 0;
2446 } else { // MP in [ODD, EVEN]
2447 is_inexact_gt_midpoint = 0;
2448 is_inexact_lt_midpoint = 0;
2449 is_midpoint_gt_even = 0;
2450 is_midpoint_lt_even = 1;
2453 tmp_inexact = 1; // in all cases
2454 } else { // the result is not a midpoint
2455 // determine inexactness of the rounding of C1 (the sum C1+C2*)
2456 // if (0 < f1* - 1/2 < 10^(-1)) then
2457 // the result is exact
2458 // else (if f1* - 1/2 > T* then)
2459 // the result of is inexact
2460 // ind = 0
2461 if (Q256.w[1] > 0x8000000000000000ull
2462 || (Q256.w[1] == 0x8000000000000000ull
2463 && Q256.w[0] > 0x0ull)) {
2464 // f1* > 1/2 and the result may be exact
2465 Q256.w[1] = Q256.w[1] - 0x8000000000000000ull; // f1* - 1/2
2466 if ((Q256.w[1] > ten2mk128trunc[0].w[1]
2467 || (Q256.w[1] == ten2mk128trunc[0].w[1]
2468 && Q256.w[0] > ten2mk128trunc[0].w[0]))) {
2469 is_inexact_gt_midpoint = 0;
2470 is_inexact_lt_midpoint = 1;
2471 is_midpoint_gt_even = 0;
2472 is_midpoint_lt_even = 0;
2473 // set the inexact flag
2474 tmp_inexact = 1;
2475 // *pfpsf |= INEXACT_EXCEPTION;
2476 } else { // else the result is exact for the 2nd rounding
2477 if (tmp_inexact) { // if the previous rounding was inexact
2478 if (is_midpoint_lt_even) {
2479 is_inexact_gt_midpoint = 1;
2480 is_midpoint_lt_even = 0;
2481 } else if (is_midpoint_gt_even) {
2482 is_inexact_lt_midpoint = 1;
2483 is_midpoint_gt_even = 0;
2484 } else {
2485 ; // no change
2489 // rounding down, unless a midpoint in [ODD, EVEN]
2490 } else { // the result is inexact; f1* <= 1/2
2491 is_inexact_gt_midpoint = 1;
2492 is_inexact_lt_midpoint = 0;
2493 is_midpoint_gt_even = 0;
2494 is_midpoint_lt_even = 0;
2495 // set the inexact flag
2496 tmp_inexact = 1;
2497 // *pfpsf |= INEXACT_EXCEPTION;
2499 } // end 'the result is not a midpoint'
2500 // n = C1 * 10^(e2+x1)
2501 C1.w[1] = Q256.w[3];
2502 C1.w[0] = Q256.w[2];
2503 y_exp = y_exp + ((UINT64) (x1 + 1) << 49);
2504 } else { // C1 < 10^34
2505 // C1.w[1] and C1.w[0] already set
2506 // n = C1 * 10^(e2+x1)
2507 y_exp = y_exp + ((UINT64) x1 << 49);
2509 // check for overflow
2510 if (y_exp == EXP_MAX_P1
2511 && (rnd_mode == ROUNDING_TO_NEAREST
2512 || rnd_mode == ROUNDING_TIES_AWAY)) {
2513 res.w[1] = 0x7800000000000000ull | x_sign; // +/-inf
2514 res.w[0] = 0x0ull;
2515 // set the inexact flag
2516 *pfpsf |= INEXACT_EXCEPTION;
2517 // set the overflow flag
2518 *pfpsf |= OVERFLOW_EXCEPTION;
2519 BID_SWAP128 (res);
2520 BID_RETURN (res);
2521 } // else no overflow
2522 } else { // if x_sign != y_sign the result of this subtract. is exact
2523 C1.w[0] = C1.w[0] - R256.w[2];
2524 C1.w[1] = C1.w[1] - R256.w[3];
2525 if (C1.w[0] > tmp64)
2526 C1.w[1]--; // borrow
2527 if (C1.w[1] >= 0x8000000000000000ull) { // negative coefficient!
2528 C1.w[0] = ~C1.w[0];
2529 C1.w[0]++;
2530 C1.w[1] = ~C1.w[1];
2531 if (C1.w[0] == 0x0)
2532 C1.w[1]++;
2533 tmp_sign = y_sign;
2534 // the result will have the sign of y if last rnd
2535 } else {
2536 tmp_sign = x_sign;
2538 // if the difference has P34-1 digits or less, i.e. C1 < 10^33 then
2539 // redo the calculation with x1=x1-1;
2540 // redo the calculation also if C1 = 10^33 and
2541 // (is_inexact_gt_midpoint or is_midpoint_lt_even);
2542 // (the last part should have really been
2543 // (is_inexact_lt_midpoint or is_midpoint_gt_even) from
2544 // the rounding of C2, but the position flags have been reversed)
2545 // 10^33 = 0x0000314dc6448d93 0x38c15b0a00000000
2546 if ((C1.w[1] < 0x0000314dc6448d93ull || (C1.w[1] == 0x0000314dc6448d93ull && C1.w[0] < 0x38c15b0a00000000ull)) || (C1.w[1] == 0x0000314dc6448d93ull && C1.w[0] == 0x38c15b0a00000000ull && (is_inexact_gt_midpoint || is_midpoint_lt_even))) { // C1=10^33
2547 x1 = x1 - 1; // x1 >= 0
2548 if (x1 >= 0) {
2549 // clear position flags and tmp_inexact
2550 is_midpoint_lt_even = 0;
2551 is_midpoint_gt_even = 0;
2552 is_inexact_lt_midpoint = 0;
2553 is_inexact_gt_midpoint = 0;
2554 tmp_inexact = 0;
2555 second_pass = 1;
2556 goto roundC2; // else result has less than P34 digits
2559 // if the coefficient of the result is 10^34 it means that this
2560 // must be the second pass, and we are done
2561 if (C1.w[1] == 0x0001ed09bead87c0ull && C1.w[0] == 0x378d8e6400000000ull) { // if C1 = 10^34
2562 C1.w[1] = 0x0000314dc6448d93ull; // C1 = 10^33
2563 C1.w[0] = 0x38c15b0a00000000ull;
2564 y_exp = y_exp + ((UINT64) 1 << 49);
2566 x_sign = tmp_sign;
2567 if (x1 >= 1)
2568 y_exp = y_exp + ((UINT64) x1 << 49);
2569 // x1 = -1 is possible at the end of a second pass when the
2570 // first pass started with x1 = 1
2572 C1_hi = C1.w[1];
2573 C1_lo = C1.w[0];
2574 // general correction from RN to RA, RM, RP, RZ; result uses y_exp
2575 if (rnd_mode != ROUNDING_TO_NEAREST) {
2576 if ((!x_sign
2577 && ((rnd_mode == ROUNDING_UP && is_inexact_lt_midpoint)
2579 ((rnd_mode == ROUNDING_TIES_AWAY
2580 || rnd_mode == ROUNDING_UP)
2581 && is_midpoint_gt_even))) || (x_sign
2583 ((rnd_mode ==
2584 ROUNDING_DOWN
2586 is_inexact_lt_midpoint)
2588 ((rnd_mode ==
2589 ROUNDING_TIES_AWAY
2590 || rnd_mode ==
2591 ROUNDING_DOWN)
2593 is_midpoint_gt_even))))
2595 // C1 = C1 + 1
2596 C1_lo = C1_lo + 1;
2597 if (C1_lo == 0) { // rounding overflow in the low 64 bits
2598 C1_hi = C1_hi + 1;
2600 if (C1_hi == 0x0001ed09bead87c0ull
2601 && C1_lo == 0x378d8e6400000000ull) {
2602 // C1 = 10^34 => rounding overflow
2603 C1_hi = 0x0000314dc6448d93ull;
2604 C1_lo = 0x38c15b0a00000000ull; // 10^33
2605 y_exp = y_exp + EXP_P1;
2607 } else if ((is_midpoint_lt_even || is_inexact_gt_midpoint)
2609 ((x_sign
2610 && (rnd_mode == ROUNDING_UP
2611 || rnd_mode == ROUNDING_TO_ZERO))
2612 || (!x_sign
2613 && (rnd_mode == ROUNDING_DOWN
2614 || rnd_mode == ROUNDING_TO_ZERO)))) {
2615 // C1 = C1 - 1
2616 C1_lo = C1_lo - 1;
2617 if (C1_lo == 0xffffffffffffffffull)
2618 C1_hi--;
2619 // check if we crossed into the lower decade
2620 if (C1_hi == 0x0000314dc6448d93ull && C1_lo == 0x38c15b09ffffffffull) { // 10^33 - 1
2621 C1_hi = 0x0001ed09bead87c0ull; // 10^34 - 1
2622 C1_lo = 0x378d8e63ffffffffull;
2623 y_exp = y_exp - EXP_P1;
2624 // no underflow, because delta + q2 >= P34 + 1
2626 } else {
2627 ; // exact, the result is already correct
2629 // in all cases check for overflow (RN and RA solved already)
2630 if (y_exp == EXP_MAX_P1) { // overflow
2631 if ((rnd_mode == ROUNDING_DOWN && x_sign) || // RM and res < 0
2632 (rnd_mode == ROUNDING_UP && !x_sign)) { // RP and res > 0
2633 C1_hi = 0x7800000000000000ull; // +inf
2634 C1_lo = 0x0ull;
2635 } else { // RM and res > 0, RP and res < 0, or RZ
2636 C1_hi = 0x5fffed09bead87c0ull;
2637 C1_lo = 0x378d8e63ffffffffull;
2639 y_exp = 0; // x_sign is preserved
2640 // set the inexact flag (in case the exact addition was exact)
2641 *pfpsf |= INEXACT_EXCEPTION;
2642 // set the overflow flag
2643 *pfpsf |= OVERFLOW_EXCEPTION;
2646 // assemble the result
2647 res.w[1] = x_sign | y_exp | C1_hi;
2648 res.w[0] = C1_lo;
2649 if (tmp_inexact)
2650 *pfpsf |= INEXACT_EXCEPTION;
2652 } else { // if (-P34 + 1 <= delta <= -1) <=> 1 <= -delta <= P34 - 1
2653 // NOTE: the following, up to "} else { // if x_sign != y_sign
2654 // the result is exact" is identical to "else if (delta == P34 - q2) {"
2655 // from above; also, the code is not symmetric: a+b and b+a may take
2656 // different paths (need to unify eventually!)
2657 // calculate C' = C2 + C1 * 10^(e1-e2) directly; the result may be
2658 // inexact if it requires P34 + 1 decimal digits; in either case the
2659 // 'cutoff' point for addition is at the position of the lsb of C2
2660 // The coefficient of the result is C1 * 10^(e1-e2) + C2 and the
2661 // exponent is e2; either C1 or 10^(e1-e2) may not fit is 64 bits,
2662 // but their product fits with certainty in 128 bits (actually in 113)
2663 // Note that 0 <= e1 - e2 <= P34 - 2
2664 // -P34 + 1 <= delta <= -1 <=> -P34 + 1 <= delta <= -1 <=>
2665 // -P34 + 1 <= q1 + e1 - q2 - e2 <= -1 <=>
2666 // q2 - q1 - P34 + 1 <= e1 - e2 <= q2 - q1 - 1 <=>
2667 // 1 - P34 - P34 + 1 <= e1-e2 <= P34 - 1 - 1 => 0 <= e1-e2 <= P34 - 2
2668 scale = delta - q1 + q2; // scale = (int)(e1 >> 49) - (int)(e2 >> 49)
2669 if (scale >= 20) { // 10^(e1-e2) does not fit in 64 bits, but C1 does
2670 __mul_128x64_to_128 (C1, C1_lo, ten2k128[scale - 20]);
2671 } else if (scale >= 1) {
2672 // if 1 <= scale <= 19 then 10^(e1-e2) fits in 64 bits
2673 if (q1 <= 19) { // C1 fits in 64 bits
2674 __mul_64x64_to_128MACH (C1, C1_lo, ten2k64[scale]);
2675 } else { // q1 >= 20
2676 C1.w[1] = C1_hi;
2677 C1.w[0] = C1_lo;
2678 __mul_128x64_to_128 (C1, ten2k64[scale], C1);
2680 } else { // if (scale == 0) C1 is unchanged
2681 C1.w[1] = C1_hi;
2682 C1.w[0] = C1_lo; // only the low part is necessary
2684 C1_hi = C1.w[1];
2685 C1_lo = C1.w[0];
2686 // now add C2
2687 if (x_sign == y_sign) {
2688 // the result can overflow!
2689 C1_lo = C1_lo + C2_lo;
2690 C1_hi = C1_hi + C2_hi;
2691 if (C1_lo < C1.w[0])
2692 C1_hi++;
2693 // test for overflow, possible only when C1 >= 10^34
2694 if (C1_hi > 0x0001ed09bead87c0ull || (C1_hi == 0x0001ed09bead87c0ull && C1_lo >= 0x378d8e6400000000ull)) { // C1 >= 10^34
2695 // in this case q = P34 + 1 and x = q - P34 = 1, so multiply
2696 // C'' = C'+ 5 = C1 + 5 by k1 ~ 10^(-1) calculated for P34 + 1
2697 // decimal digits
2698 // Calculate C'' = C' + 1/2 * 10^x
2699 if (C1_lo >= 0xfffffffffffffffbull) { // low half add has carry
2700 C1_lo = C1_lo + 5;
2701 C1_hi = C1_hi + 1;
2702 } else {
2703 C1_lo = C1_lo + 5;
2705 // the approximation of 10^(-1) was rounded up to 118 bits
2706 // 10^(-1) =~ 33333333333333333333333333333400 * 2^-129
2707 // 10^(-1) =~ 19999999999999999999999999999a00 * 2^-128
2708 C1.w[1] = C1_hi;
2709 C1.w[0] = C1_lo; // C''
2710 ten2m1.w[1] = 0x1999999999999999ull;
2711 ten2m1.w[0] = 0x9999999999999a00ull;
2712 __mul_128x128_to_256 (P256, C1, ten2m1); // P256 = C*, f*
2713 // C* is actually floor(C*) in this case
2714 // the top Ex = 128 bits of 10^(-1) are
2715 // T* = 0x00199999999999999999999999999999
2716 // if (0 < f* < 10^(-x)) then
2717 // if floor(C*) is even then C = floor(C*) - logical right
2718 // shift; C has p decimal digits, correct by Prop. 1)
2719 // else if floor(C*) is odd C = floor(C*) - 1 (logical right
2720 // shift; C has p decimal digits, correct by Pr. 1)
2721 // else
2722 // C = floor(C*) (logical right shift; C has p decimal digits,
2723 // correct by Property 1)
2724 // n = C * 10^(e2+x)
2725 if ((P256.w[1] || P256.w[0])
2726 && (P256.w[1] < 0x1999999999999999ull
2727 || (P256.w[1] == 0x1999999999999999ull
2728 && P256.w[0] <= 0x9999999999999999ull))) {
2729 // the result is a midpoint
2730 if (P256.w[2] & 0x01) {
2731 is_midpoint_gt_even = 1;
2732 // if floor(C*) is odd C = floor(C*) - 1; the result is not 0
2733 P256.w[2]--;
2734 if (P256.w[2] == 0xffffffffffffffffull)
2735 P256.w[3]--;
2736 } else {
2737 is_midpoint_lt_even = 1;
2740 // n = Cstar * 10^(e2+1)
2741 y_exp = y_exp + EXP_P1;
2742 // C* != 10^P34 because C* has P34 digits
2743 // check for overflow
2744 if (y_exp == EXP_MAX_P1
2745 && (rnd_mode == ROUNDING_TO_NEAREST
2746 || rnd_mode == ROUNDING_TIES_AWAY)) {
2747 // overflow for RN
2748 res.w[1] = x_sign | 0x7800000000000000ull; // +/-inf
2749 res.w[0] = 0x0ull;
2750 // set the inexact flag
2751 *pfpsf |= INEXACT_EXCEPTION;
2752 // set the overflow flag
2753 *pfpsf |= OVERFLOW_EXCEPTION;
2754 BID_SWAP128 (res);
2755 BID_RETURN (res);
2757 // if (0 < f* - 1/2 < 10^(-x)) then
2758 // the result of the addition is exact
2759 // else
2760 // the result of the addition is inexact
2761 if (P256.w[1] > 0x8000000000000000ull || (P256.w[1] == 0x8000000000000000ull && P256.w[0] > 0x0ull)) { // the result may be exact
2762 tmp64 = P256.w[1] - 0x8000000000000000ull; // f* - 1/2
2763 if ((tmp64 > 0x1999999999999999ull
2764 || (tmp64 == 0x1999999999999999ull
2765 && P256.w[0] >= 0x9999999999999999ull))) {
2766 // set the inexact flag
2767 *pfpsf |= INEXACT_EXCEPTION;
2768 is_inexact = 1;
2769 } // else the result is exact
2770 } else { // the result is inexact
2771 // set the inexact flag
2772 *pfpsf |= INEXACT_EXCEPTION;
2773 is_inexact = 1;
2775 C1_hi = P256.w[3];
2776 C1_lo = P256.w[2];
2777 if (!is_midpoint_gt_even && !is_midpoint_lt_even) {
2778 is_inexact_lt_midpoint = is_inexact
2779 && (P256.w[1] & 0x8000000000000000ull);
2780 is_inexact_gt_midpoint = is_inexact
2781 && !(P256.w[1] & 0x8000000000000000ull);
2783 // general correction from RN to RA, RM, RP, RZ; result uses y_exp
2784 if (rnd_mode != ROUNDING_TO_NEAREST) {
2785 if ((!x_sign
2786 && ((rnd_mode == ROUNDING_UP
2787 && is_inexact_lt_midpoint)
2788 || ((rnd_mode == ROUNDING_TIES_AWAY
2789 || rnd_mode == ROUNDING_UP)
2790 && is_midpoint_gt_even))) || (x_sign
2792 ((rnd_mode ==
2793 ROUNDING_DOWN
2795 is_inexact_lt_midpoint)
2797 ((rnd_mode ==
2798 ROUNDING_TIES_AWAY
2799 || rnd_mode
2801 ROUNDING_DOWN)
2803 is_midpoint_gt_even))))
2805 // C1 = C1 + 1
2806 C1_lo = C1_lo + 1;
2807 if (C1_lo == 0) { // rounding overflow in the low 64 bits
2808 C1_hi = C1_hi + 1;
2810 if (C1_hi == 0x0001ed09bead87c0ull
2811 && C1_lo == 0x378d8e6400000000ull) {
2812 // C1 = 10^34 => rounding overflow
2813 C1_hi = 0x0000314dc6448d93ull;
2814 C1_lo = 0x38c15b0a00000000ull; // 10^33
2815 y_exp = y_exp + EXP_P1;
2817 } else
2818 if ((is_midpoint_lt_even || is_inexact_gt_midpoint) &&
2819 ((x_sign && (rnd_mode == ROUNDING_UP ||
2820 rnd_mode == ROUNDING_TO_ZERO)) ||
2821 (!x_sign && (rnd_mode == ROUNDING_DOWN ||
2822 rnd_mode == ROUNDING_TO_ZERO)))) {
2823 // C1 = C1 - 1
2824 C1_lo = C1_lo - 1;
2825 if (C1_lo == 0xffffffffffffffffull)
2826 C1_hi--;
2827 // check if we crossed into the lower decade
2828 if (C1_hi == 0x0000314dc6448d93ull && C1_lo == 0x38c15b09ffffffffull) { // 10^33 - 1
2829 C1_hi = 0x0001ed09bead87c0ull; // 10^34 - 1
2830 C1_lo = 0x378d8e63ffffffffull;
2831 y_exp = y_exp - EXP_P1;
2832 // no underflow, because delta + q2 >= P34 + 1
2834 } else {
2835 ; // exact, the result is already correct
2837 // in all cases check for overflow (RN and RA solved already)
2838 if (y_exp == EXP_MAX_P1) { // overflow
2839 if ((rnd_mode == ROUNDING_DOWN && x_sign) || // RM and res < 0
2840 (rnd_mode == ROUNDING_UP && !x_sign)) { // RP and res > 0
2841 C1_hi = 0x7800000000000000ull; // +inf
2842 C1_lo = 0x0ull;
2843 } else { // RM and res > 0, RP and res < 0, or RZ
2844 C1_hi = 0x5fffed09bead87c0ull;
2845 C1_lo = 0x378d8e63ffffffffull;
2847 y_exp = 0; // x_sign is preserved
2848 // set the inexact flag (in case the exact addition was exact)
2849 *pfpsf |= INEXACT_EXCEPTION;
2850 // set the overflow flag
2851 *pfpsf |= OVERFLOW_EXCEPTION;
2854 } // else if (C1 < 10^34) then C1 is the coeff.; the result is exact
2855 // assemble the result
2856 res.w[1] = x_sign | y_exp | C1_hi;
2857 res.w[0] = C1_lo;
2858 } else { // if x_sign != y_sign the result is exact
2859 C1_lo = C2_lo - C1_lo;
2860 C1_hi = C2_hi - C1_hi;
2861 if (C1_lo > C2_lo)
2862 C1_hi--;
2863 if (C1_hi >= 0x8000000000000000ull) { // negative coefficient!
2864 C1_lo = ~C1_lo;
2865 C1_lo++;
2866 C1_hi = ~C1_hi;
2867 if (C1_lo == 0x0)
2868 C1_hi++;
2869 x_sign = y_sign; // the result will have the sign of y
2871 // the result can be zero, but it cannot overflow
2872 if (C1_lo == 0 && C1_hi == 0) {
2873 // assemble the result
2874 if (x_exp < y_exp)
2875 res.w[1] = x_exp;
2876 else
2877 res.w[1] = y_exp;
2878 res.w[0] = 0;
2879 if (rnd_mode == ROUNDING_DOWN) {
2880 res.w[1] |= 0x8000000000000000ull;
2882 BID_SWAP128 (res);
2883 BID_RETURN (res);
2885 // assemble the result
2886 res.w[1] = y_sign | y_exp | C1_hi;
2887 res.w[0] = C1_lo;
2891 BID_SWAP128 (res);
2892 BID_RETURN (res)
2898 // bid128_sub stands for bid128qq_sub
2900 /*****************************************************************************
2901 * BID128 sub
2902 ****************************************************************************/
2904 #if DECIMAL_CALL_BY_REFERENCE
2905 void
2906 bid128_sub (UINT128 * pres, UINT128 * px, UINT128 * py
2907 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
2908 _EXC_INFO_PARAM) {
2909 UINT128 x = *px, y = *py;
2910 #if !DECIMAL_GLOBAL_ROUNDING
2911 unsigned int rnd_mode = *prnd_mode;
2912 #endif
2913 #else
2914 UINT128
2915 bid128_sub (UINT128 x, UINT128 y
2916 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
2917 _EXC_INFO_PARAM) {
2918 #endif
2920 UINT128 res;
2921 UINT64 y_sign;
2923 if ((y.w[HIGH_128W] & MASK_NAN) != MASK_NAN) { // y is not NAN
2924 // change its sign
2925 y_sign = y.w[HIGH_128W] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
2926 if (y_sign)
2927 y.w[HIGH_128W] = y.w[HIGH_128W] & 0x7fffffffffffffffull;
2928 else
2929 y.w[HIGH_128W] = y.w[HIGH_128W] | 0x8000000000000000ull;
2931 #if DECIMAL_CALL_BY_REFERENCE
2932 bid128_add (&res, &x, &y
2933 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
2934 _EXC_INFO_ARG);
2935 #else
2936 res = bid128_add (x, y
2937 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
2938 _EXC_INFO_ARG);
2939 #endif
2940 BID_RETURN (res);