2009-01-14 Richard Guenther <rguenther@suse.de>
[official-gcc.git] / libgcc / config / libbid / bid64_div.c
blob7357f3dbdafb78d000ceca809c8137f3e4a36dd8
1 /* Copyright (C) 2007 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 2, or (at your option) any later
8 version.
10 In addition to the permissions in the GNU General Public License, the
11 Free Software Foundation gives you unlimited permission to link the
12 compiled version of this file into combinations with other programs,
13 and to distribute those combinations without any restriction coming
14 from the use of this file. (The General Public License restrictions
15 do apply in other respects; for example, they cover modification of
16 the file, and distribution when not linked into a combine
17 executable.)
19 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
20 WARRANTY; without even the implied warranty of MERCHANTABILITY or
21 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
22 for more details.
24 You should have received a copy of the GNU General Public License
25 along with GCC; see the file COPYING. If not, write to the Free
26 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
27 02110-1301, USA. */
29 /*****************************************************************************
30 * BID64 divide
31 *****************************************************************************
33 * Algorithm description:
35 * if(coefficient_x<coefficient_y)
36 * p = number_digits(coefficient_y) - number_digits(coefficient_x)
37 * A = coefficient_x*10^p
38 * B = coefficient_y
39 * CA= A*10^(15+j), j=0 for A>=B, 1 otherwise
40 * Q = 0
41 * else
42 * get Q=(int)(coefficient_x/coefficient_y)
43 * (based on double precision divide)
44 * check for exact divide case
45 * Let R = coefficient_x - Q*coefficient_y
46 * Let m=16-number_digits(Q)
47 * CA=R*10^m, Q=Q*10^m
48 * B = coefficient_y
49 * endif
50 * if (CA<2^64)
51 * Q += CA/B (64-bit unsigned divide)
52 * else
53 * get final Q using double precision divide, followed by 3 integer
54 * iterations
55 * if exact result, eliminate trailing zeros
56 * check for underflow
57 * round coefficient to nearest
59 ****************************************************************************/
61 #include "bid_internal.h"
62 #include "bid_div_macros.h"
63 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
64 #include <fenv.h>
66 #define FE_ALL_FLAGS FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW|FE_INEXACT
67 #endif
69 extern UINT32 convert_table[5][128][2];
70 extern SINT8 factors[][2];
71 extern UINT8 packed_10000_zeros[];
74 #if DECIMAL_CALL_BY_REFERENCE
76 void
77 bid64_div (UINT64 * pres, UINT64 * px,
78 UINT64 *
79 py _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
80 _EXC_INFO_PARAM) {
81 UINT64 x, y;
82 #else
84 UINT64
85 bid64_div (UINT64 x,
86 UINT64 y _RND_MODE_PARAM _EXC_FLAGS_PARAM
87 _EXC_MASKS_PARAM _EXC_INFO_PARAM) {
88 #endif
89 UINT128 CA, CT;
90 UINT64 sign_x, sign_y, coefficient_x, coefficient_y, A, B, QX, PD;
91 UINT64 A2, Q, Q2, B2, B4, B5, R, T, DU, res;
92 UINT64 valid_x, valid_y;
93 SINT64 D;
94 int_double t_scale, tempq, temp_b;
95 int_float tempx, tempy;
96 double da, db, dq, da_h, da_l;
97 int exponent_x, exponent_y, bin_expon_cx;
98 int diff_expon, ed1, ed2, bin_index;
99 int rmode, amount;
100 int nzeros, i, j, k, d5;
101 UINT32 QX32, tdigit[3], digit, digit_h, digit_low;
102 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
103 fexcept_t binaryflags = 0;
104 #endif
106 #if DECIMAL_CALL_BY_REFERENCE
107 #if !DECIMAL_GLOBAL_ROUNDING
108 _IDEC_round rnd_mode = *prnd_mode;
109 #endif
110 x = *px;
111 y = *py;
112 #endif
114 valid_x = unpack_BID64 (&sign_x, &exponent_x, &coefficient_x, x);
115 valid_y = unpack_BID64 (&sign_y, &exponent_y, &coefficient_y, y);
117 // unpack arguments, check for NaN or Infinity
118 if (!valid_x) {
119 // x is Inf. or NaN
120 #ifdef SET_STATUS_FLAGS
121 if ((y & SNAN_MASK64) == SNAN_MASK64) // y is sNaN
122 __set_status_flags (pfpsf, INVALID_EXCEPTION);
123 #endif
125 // test if x is NaN
126 if ((x & NAN_MASK64) == NAN_MASK64) {
127 #ifdef SET_STATUS_FLAGS
128 if ((x & SNAN_MASK64) == SNAN_MASK64) // sNaN
129 __set_status_flags (pfpsf, INVALID_EXCEPTION);
130 #endif
131 BID_RETURN (coefficient_x & QUIET_MASK64);
133 // x is Infinity?
134 if ((x & INFINITY_MASK64) == INFINITY_MASK64) {
135 // check if y is Inf or NaN
136 if ((y & INFINITY_MASK64) == INFINITY_MASK64) {
137 // y==Inf, return NaN
138 if ((y & NAN_MASK64) == INFINITY_MASK64) { // Inf/Inf
139 #ifdef SET_STATUS_FLAGS
140 __set_status_flags (pfpsf, INVALID_EXCEPTION);
141 #endif
142 BID_RETURN (NAN_MASK64);
144 } else {
145 // otherwise return +/-Inf
146 BID_RETURN (((x ^ y) & 0x8000000000000000ull) |
147 INFINITY_MASK64);
150 // x==0
151 if (((y & INFINITY_MASK64) != INFINITY_MASK64)
152 && !(coefficient_y)) {
153 // y==0 , return NaN
154 #ifdef SET_STATUS_FLAGS
155 __set_status_flags (pfpsf, INVALID_EXCEPTION);
156 #endif
157 BID_RETURN (NAN_MASK64);
159 if (((y & INFINITY_MASK64) != INFINITY_MASK64)) {
160 if ((y & SPECIAL_ENCODING_MASK64) == SPECIAL_ENCODING_MASK64)
161 exponent_y = ((UINT32) (y >> 51)) & 0x3ff;
162 else
163 exponent_y = ((UINT32) (y >> 53)) & 0x3ff;
164 sign_y = y & 0x8000000000000000ull;
166 exponent_x = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS;
167 if (exponent_x > DECIMAL_MAX_EXPON_64)
168 exponent_x = DECIMAL_MAX_EXPON_64;
169 else if (exponent_x < 0)
170 exponent_x = 0;
171 BID_RETURN ((sign_x ^ sign_y) | (((UINT64) exponent_x) << 53));
175 if (!valid_y) {
176 // y is Inf. or NaN
178 // test if y is NaN
179 if ((y & NAN_MASK64) == NAN_MASK64) {
180 #ifdef SET_STATUS_FLAGS
181 if ((y & SNAN_MASK64) == SNAN_MASK64) // sNaN
182 __set_status_flags (pfpsf, INVALID_EXCEPTION);
183 #endif
184 BID_RETURN (coefficient_y & QUIET_MASK64);
186 // y is Infinity?
187 if ((y & INFINITY_MASK64) == INFINITY_MASK64) {
188 // return +/-0
189 BID_RETURN (((x ^ y) & 0x8000000000000000ull));
191 // y is 0
192 #ifdef SET_STATUS_FLAGS
193 __set_status_flags (pfpsf, ZERO_DIVIDE_EXCEPTION);
194 #endif
195 BID_RETURN ((sign_x ^ sign_y) | INFINITY_MASK64);
197 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
198 (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS);
199 #endif
200 diff_expon = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS;
202 if (coefficient_x < coefficient_y) {
203 // get number of decimal digits for c_x, c_y
205 //--- get number of bits in the coefficients of x and y ---
206 tempx.d = (float) coefficient_x;
207 tempy.d = (float) coefficient_y;
208 bin_index = (tempy.i - tempx.i) >> 23;
210 A = coefficient_x * power10_index_binexp[bin_index];
211 B = coefficient_y;
213 temp_b.d = (double) B;
215 // compare A, B
216 DU = (A - B) >> 63;
217 ed1 = 15 + (int) DU;
218 ed2 = estimate_decimal_digits[bin_index] + ed1;
219 T = power10_table_128[ed1].w[0];
220 __mul_64x64_to_128 (CA, A, T);
222 Q = 0;
223 diff_expon = diff_expon - ed2;
225 // adjust double precision db, to ensure that later A/B - (int)(da/db) > -1
226 if (coefficient_y < 0x0020000000000000ull) {
227 temp_b.i += 1;
228 db = temp_b.d;
229 } else
230 db = (double) (B + 2 + (B & 1));
232 } else {
233 // get c_x/c_y
235 // set last bit before conversion to DP
236 A2 = coefficient_x | 1;
237 da = (double) A2;
239 db = (double) coefficient_y;
241 tempq.d = da / db;
242 Q = (UINT64) tempq.d;
244 R = coefficient_x - coefficient_y * Q;
246 // will use to get number of dec. digits of Q
247 bin_expon_cx = (tempq.i >> 52) - 0x3ff;
249 // R<0 ?
250 D = ((SINT64) R) >> 63;
251 Q += D;
252 R += (coefficient_y & D);
254 // exact result ?
255 if (((SINT64) R) <= 0) {
256 // can have R==-1 for coeff_y==1
257 res =
258 get_BID64 (sign_x ^ sign_y, diff_expon, (Q + R), rnd_mode,
259 pfpsf);
260 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
261 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
262 #endif
263 BID_RETURN (res);
265 // get decimal digits of Q
266 DU = power10_index_binexp[bin_expon_cx] - Q - 1;
267 DU >>= 63;
269 ed2 = 16 - estimate_decimal_digits[bin_expon_cx] - (int) DU;
271 T = power10_table_128[ed2].w[0];
272 __mul_64x64_to_128 (CA, R, T);
273 B = coefficient_y;
275 Q *= power10_table_128[ed2].w[0];
276 diff_expon -= ed2;
280 if (!CA.w[1]) {
281 Q2 = CA.w[0] / B;
282 B2 = B + B;
283 B4 = B2 + B2;
284 R = CA.w[0] - Q2 * B;
285 Q += Q2;
286 } else {
288 // 2^64
289 t_scale.i = 0x43f0000000000000ull;
290 // convert CA to DP
291 da_h = CA.w[1];
292 da_l = CA.w[0];
293 da = da_h * t_scale.d + da_l;
295 // quotient
296 dq = da / db;
297 Q2 = (UINT64) dq;
299 // get w[0] remainder
300 R = CA.w[0] - Q2 * B;
302 // R<0 ?
303 D = ((SINT64) R) >> 63;
304 Q2 += D;
305 R += (B & D);
307 // now R<6*B
309 // quick divide
311 // 4*B
312 B2 = B + B;
313 B4 = B2 + B2;
315 R = R - B4;
316 // R<0 ?
317 D = ((SINT64) R) >> 63;
318 // restore R if negative
319 R += (B4 & D);
320 Q2 += ((~D) & 4);
322 R = R - B2;
323 // R<0 ?
324 D = ((SINT64) R) >> 63;
325 // restore R if negative
326 R += (B2 & D);
327 Q2 += ((~D) & 2);
329 R = R - B;
330 // R<0 ?
331 D = ((SINT64) R) >> 63;
332 // restore R if negative
333 R += (B & D);
334 Q2 += ((~D) & 1);
336 Q += Q2;
339 #ifdef SET_STATUS_FLAGS
340 if (R) {
341 // set status flags
342 __set_status_flags (pfpsf, INEXACT_EXCEPTION);
344 #ifndef LEAVE_TRAILING_ZEROS
345 else
346 #endif
347 #else
348 #ifndef LEAVE_TRAILING_ZEROS
349 if (!R)
350 #endif
351 #endif
352 #ifndef LEAVE_TRAILING_ZEROS
354 // eliminate trailing zeros
356 // check whether CX, CY are short
357 if ((coefficient_x <= 1024) && (coefficient_y <= 1024)) {
358 i = (int) coefficient_y - 1;
359 j = (int) coefficient_x - 1;
360 // difference in powers of 2 factors for Y and X
361 nzeros = ed2 - factors[i][0] + factors[j][0];
362 // difference in powers of 5 factors
363 d5 = ed2 - factors[i][1] + factors[j][1];
364 if (d5 < nzeros)
365 nzeros = d5;
367 __mul_64x64_to_128 (CT, Q, reciprocals10_64[nzeros]);
369 // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
370 amount = short_recip_scale[nzeros];
371 Q = CT.w[1] >> amount;
373 diff_expon += nzeros;
374 } else {
375 tdigit[0] = Q & 0x3ffffff;
376 tdigit[1] = 0;
377 QX = Q >> 26;
378 QX32 = QX;
379 nzeros = 0;
381 for (j = 0; QX32; j++, QX32 >>= 7) {
382 k = (QX32 & 127);
383 tdigit[0] += convert_table[j][k][0];
384 tdigit[1] += convert_table[j][k][1];
385 if (tdigit[0] >= 100000000) {
386 tdigit[0] -= 100000000;
387 tdigit[1]++;
391 digit = tdigit[0];
392 if (!digit && !tdigit[1])
393 nzeros += 16;
394 else {
395 if (!digit) {
396 nzeros += 8;
397 digit = tdigit[1];
399 // decompose digit
400 PD = (UINT64) digit *0x068DB8BBull;
401 digit_h = (UINT32) (PD >> 40);
402 digit_low = digit - digit_h * 10000;
404 if (!digit_low)
405 nzeros += 4;
406 else
407 digit_h = digit_low;
409 if (!(digit_h & 1))
410 nzeros +=
411 3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >>
412 (digit_h & 7));
415 if (nzeros) {
416 __mul_64x64_to_128 (CT, Q, reciprocals10_64[nzeros]);
418 // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
419 amount = short_recip_scale[nzeros];
420 Q = CT.w[1] >> amount;
422 diff_expon += nzeros;
425 if (diff_expon >= 0) {
426 res =
427 fast_get_BID64_check_OF (sign_x ^ sign_y, diff_expon, Q,
428 rnd_mode, pfpsf);
429 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
430 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
431 #endif
432 BID_RETURN (res);
435 #endif
437 if (diff_expon >= 0) {
438 #ifdef IEEE_ROUND_NEAREST
439 // round to nearest code
440 // R*10
441 R += R;
442 R = (R << 2) + R;
443 B5 = B4 + B;
445 // compare 10*R to 5*B
446 R = B5 - R;
447 // correction for (R==0 && (Q&1))
448 R -= (Q & 1);
449 // R<0 ?
450 D = ((UINT64) R) >> 63;
451 Q += D;
452 #else
453 #ifdef IEEE_ROUND_NEAREST_TIES_AWAY
454 // round to nearest code
455 // R*10
456 R += R;
457 R = (R << 2) + R;
458 B5 = B4 + B;
460 // compare 10*R to 5*B
461 R = B5 - R;
462 // correction for (R==0 && (Q&1))
463 R -= (Q & 1);
464 // R<0 ?
465 D = ((UINT64) R) >> 63;
466 Q += D;
467 #else
468 rmode = rnd_mode;
469 if (sign_x ^ sign_y && (unsigned) (rmode - 1) < 2)
470 rmode = 3 - rmode;
471 switch (rmode) {
472 case 0: // round to nearest code
473 case ROUNDING_TIES_AWAY:
474 // R*10
475 R += R;
476 R = (R << 2) + R;
477 B5 = B4 + B;
478 // compare 10*R to 5*B
479 R = B5 - R;
480 // correction for (R==0 && (Q&1))
481 R -= ((Q | (rmode >> 2)) & 1);
482 // R<0 ?
483 D = ((UINT64) R) >> 63;
484 Q += D;
485 break;
486 case ROUNDING_DOWN:
487 case ROUNDING_TO_ZERO:
488 break;
489 default: // rounding up
490 Q++;
491 break;
493 #endif
494 #endif
496 res =
497 fast_get_BID64_check_OF (sign_x ^ sign_y, diff_expon, Q, rnd_mode,
498 pfpsf);
499 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
500 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
501 #endif
502 BID_RETURN (res);
503 } else {
504 // UF occurs
506 #ifdef SET_STATUS_FLAGS
507 if ((diff_expon + 16 < 0)) {
508 // set status flags
509 __set_status_flags (pfpsf, INEXACT_EXCEPTION);
511 #endif
512 rmode = rnd_mode;
513 res =
514 get_BID64_UF (sign_x ^ sign_y, diff_expon, Q, R, rmode, pfpsf);
515 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
516 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
517 #endif
518 BID_RETURN (res);
525 TYPE0_FUNCTION_ARGTYPE1_ARG128 (UINT64, bid64dq_div, UINT64, x, y)
526 UINT256 CA4 =
527 { {0x0ull, 0x0ull, 0x0ull, 0x0ull} }, CA4r, P256, QB256;
528 UINT128 CX, CY, T128, CQ, CQ2, CR, CA, TP128, Qh, Ql, Tmp;
529 UINT64 sign_x, sign_y, T, carry64, D, Q_low, QX, valid_y, PD, res;
530 int_float fx, fy, f64;
531 UINT32 QX32, tdigit[3], digit, digit_h, digit_low;
532 int exponent_x, exponent_y, bin_index, bin_expon, diff_expon, ed2,
533 digits_q, amount;
534 int nzeros, i, j, k, d5, done = 0;
535 unsigned rmode;
536 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
537 fexcept_t binaryflags = 0;
538 #endif
540 valid_y = unpack_BID128_value (&sign_y, &exponent_y, &CY, y);
542 // unpack arguments, check for NaN or Infinity
543 CX.w[1] = 0;
544 if (!unpack_BID64 (&sign_x, &exponent_x, &CX.w[0], (x))) {
545 #ifdef SET_STATUS_FLAGS
546 if (((y.w[1] & SNAN_MASK64) == SNAN_MASK64) || // y is sNaN
547 ((x & SNAN_MASK64) == SNAN_MASK64))
548 __set_status_flags (pfpsf, INVALID_EXCEPTION);
549 #endif
550 // test if x is NaN
551 if (((x) & 0x7c00000000000000ull) == 0x7c00000000000000ull) {
552 res = CX.w[0];
553 BID_RETURN (res & QUIET_MASK64);
555 // x is Infinity?
556 if (((x) & 0x7800000000000000ull) == 0x7800000000000000ull) {
557 // check if y is Inf.
558 if (((y.w[1] & 0x7c00000000000000ull) == 0x7800000000000000ull))
559 // return NaN
561 #ifdef SET_STATUS_FLAGS
562 __set_status_flags (pfpsf, INVALID_EXCEPTION);
563 #endif
564 res = 0x7c00000000000000ull;
565 BID_RETURN (res);
567 if (((y.w[1] & 0x7c00000000000000ull) != 0x7c00000000000000ull)) {
568 // otherwise return +/-Inf
569 res =
570 (((x) ^ y.w[1]) & 0x8000000000000000ull) | 0x7800000000000000ull;
571 BID_RETURN (res);
574 // x is 0
575 if ((y.w[1] & INFINITY_MASK64) != INFINITY_MASK64) {
576 if ((!CY.w[0]) && !(CY.w[1] & 0x0001ffffffffffffull)) {
577 #ifdef SET_STATUS_FLAGS
578 __set_status_flags (pfpsf, INVALID_EXCEPTION);
579 #endif
580 // x=y=0, return NaN
581 res = 0x7c00000000000000ull;
582 BID_RETURN (res);
584 // return 0
585 res = ((x) ^ y.w[1]) & 0x8000000000000000ull;
586 exponent_x = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS_128;
587 if (exponent_x > DECIMAL_MAX_EXPON_64)
588 exponent_x = DECIMAL_MAX_EXPON_64;
589 else if (exponent_x < 0)
590 exponent_x = 0;
591 res |= (((UINT64) exponent_x) << 53);
592 BID_RETURN (res);
595 exponent_x += (DECIMAL_EXPONENT_BIAS_128 - DECIMAL_EXPONENT_BIAS);
596 if (!valid_y) {
597 // y is Inf. or NaN
599 // test if y is NaN
600 if ((y.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) {
601 #ifdef SET_STATUS_FLAGS
602 if ((y.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull) // sNaN
603 __set_status_flags (pfpsf, INVALID_EXCEPTION);
604 #endif
605 Tmp.w[1] = (CY.w[1] & 0x00003fffffffffffull);
606 Tmp.w[0] = CY.w[0];
607 TP128 = reciprocals10_128[18];
608 __mul_128x128_full (Qh, Ql, Tmp, TP128);
609 amount = recip_scale[18];
610 __shr_128 (Tmp, Qh, amount);
611 res = (CY.w[1] & 0xfc00000000000000ull) | Tmp.w[0];
612 BID_RETURN (res);
614 // y is Infinity?
615 if ((y.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) {
616 // return +/-0
617 res = sign_x ^ sign_y;
618 BID_RETURN (res);
620 // y is 0, return +/-Inf
621 res =
622 (((x) ^ y.w[1]) & 0x8000000000000000ull) | 0x7800000000000000ull;
623 #ifdef SET_STATUS_FLAGS
624 __set_status_flags (pfpsf, ZERO_DIVIDE_EXCEPTION);
625 #endif
626 BID_RETURN (res);
628 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
629 (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS);
630 #endif
631 diff_expon = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS;
633 if (__unsigned_compare_gt_128 (CY, CX)) {
634 // CX < CY
636 // 2^64
637 f64.i = 0x5f800000;
639 // fx ~ CX, fy ~ CY
640 fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0];
641 fy.d = (float) CY.w[1] * f64.d + (float) CY.w[0];
642 // expon_cy - expon_cx
643 bin_index = (fy.i - fx.i) >> 23;
645 if (CX.w[1]) {
646 T = power10_index_binexp_128[bin_index].w[0];
647 __mul_64x128_short (CA, T, CX);
648 } else {
649 T128 = power10_index_binexp_128[bin_index];
650 __mul_64x128_short (CA, CX.w[0], T128);
653 ed2 = 15;
654 if (__unsigned_compare_gt_128 (CY, CA))
655 ed2++;
657 T128 = power10_table_128[ed2];
658 __mul_128x128_to_256 (CA4, CA, T128);
660 ed2 += estimate_decimal_digits[bin_index];
661 CQ.w[0] = CQ.w[1] = 0;
662 diff_expon = diff_expon - ed2;
664 } else {
665 // get CQ = CX/CY
666 __div_128_by_128 (&CQ, &CR, CX, CY);
668 // get number of decimal digits in CQ
669 // 2^64
670 f64.i = 0x5f800000;
671 fx.d = (float) CQ.w[1] * f64.d + (float) CQ.w[0];
672 // binary expon. of CQ
673 bin_expon = (fx.i - 0x3f800000) >> 23;
675 digits_q = estimate_decimal_digits[bin_expon];
676 TP128.w[0] = power10_index_binexp_128[bin_expon].w[0];
677 TP128.w[1] = power10_index_binexp_128[bin_expon].w[1];
678 if (__unsigned_compare_ge_128 (CQ, TP128))
679 digits_q++;
681 if (digits_q <= 16) {
682 if (!CR.w[1] && !CR.w[0]) {
683 res = get_BID64 (sign_x ^ sign_y, diff_expon,
684 CQ.w[0], rnd_mode, pfpsf);
685 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
686 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
687 #endif
688 BID_RETURN (res);
691 ed2 = 16 - digits_q;
692 T128.w[0] = power10_table_128[ed2].w[0];
693 __mul_64x128_to_192 (CA4, (T128.w[0]), CR);
694 diff_expon = diff_expon - ed2;
695 CQ.w[0] *= T128.w[0];
696 } else {
697 ed2 = digits_q - 16;
698 diff_expon += ed2;
699 T128 = reciprocals10_128[ed2];
700 __mul_128x128_to_256 (P256, CQ, T128);
701 amount = recip_scale[ed2];
702 CQ.w[0] = (P256.w[2] >> amount) | (P256.w[3] << (64 - amount));
703 CQ.w[1] = 0;
705 __mul_64x64_to_128 (CQ2, CQ.w[0], (power10_table_128[ed2].w[0]));
707 __mul_64x64_to_128 (QB256, CQ2.w[0], CY.w[0]);
708 QB256.w[1] += CQ2.w[0] * CY.w[1] + CQ2.w[1] * CY.w[0];
710 CA4.w[1] = CX.w[1] - QB256.w[1];
711 CA4.w[0] = CX.w[0] - QB256.w[0];
712 if (CX.w[0] < QB256.w[0])
713 CA4.w[1]--;
714 if (CR.w[0] || CR.w[1])
715 CA4.w[0] |= 1;
716 done = 1;
721 if (!done) {
722 __div_256_by_128 (&CQ, &CA4, CY);
727 #ifdef SET_STATUS_FLAGS
728 if (CA4.w[0] || CA4.w[1]) {
729 // set status flags
730 __set_status_flags (pfpsf, INEXACT_EXCEPTION);
732 #ifndef LEAVE_TRAILING_ZEROS
733 else
734 #endif
735 #else
736 #ifndef LEAVE_TRAILING_ZEROS
737 if (!CA4.w[0] && !CA4.w[1])
738 #endif
739 #endif
740 #ifndef LEAVE_TRAILING_ZEROS
741 // check whether result is exact
743 // check whether CX, CY are short
744 if (!CX.w[1] && !CY.w[1] && (CX.w[0] <= 1024) && (CY.w[0] <= 1024)) {
745 i = (int) CY.w[0] - 1;
746 j = (int) CX.w[0] - 1;
747 // difference in powers of 2 factors for Y and X
748 nzeros = ed2 - factors[i][0] + factors[j][0];
749 // difference in powers of 5 factors
750 d5 = ed2 - factors[i][1] + factors[j][1];
751 if (d5 < nzeros)
752 nzeros = d5;
753 // get P*(2^M[extra_digits])/10^extra_digits
754 __mul_128x128_full (Qh, Ql, CQ, reciprocals10_128[nzeros]);
756 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
757 amount = recip_scale[nzeros];
758 __shr_128_long (CQ, Qh, amount);
760 diff_expon += nzeros;
761 } else {
762 // decompose Q as Qh*10^17 + Ql
763 Q_low = CQ.w[0];
766 tdigit[0] = Q_low & 0x3ffffff;
767 tdigit[1] = 0;
768 QX = Q_low >> 26;
769 QX32 = QX;
770 nzeros = 0;
772 for (j = 0; QX32; j++, QX32 >>= 7) {
773 k = (QX32 & 127);
774 tdigit[0] += convert_table[j][k][0];
775 tdigit[1] += convert_table[j][k][1];
776 if (tdigit[0] >= 100000000) {
777 tdigit[0] -= 100000000;
778 tdigit[1]++;
782 if (tdigit[1] >= 100000000) {
783 tdigit[1] -= 100000000;
784 if (tdigit[1] >= 100000000)
785 tdigit[1] -= 100000000;
788 digit = tdigit[0];
789 if (!digit && !tdigit[1])
790 nzeros += 16;
791 else {
792 if (!digit) {
793 nzeros += 8;
794 digit = tdigit[1];
796 // decompose digit
797 PD = (UINT64) digit *0x068DB8BBull;
798 digit_h = (UINT32) (PD >> 40);
799 digit_low = digit - digit_h * 10000;
801 if (!digit_low)
802 nzeros += 4;
803 else
804 digit_h = digit_low;
806 if (!(digit_h & 1))
807 nzeros +=
808 3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >>
809 (digit_h & 7));
812 if (nzeros) {
813 // get P*(2^M[extra_digits])/10^extra_digits
814 __mul_128x128_full (Qh, Ql, CQ, reciprocals10_128[nzeros]);
816 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
817 amount = recip_scale[nzeros];
818 __shr_128 (CQ, Qh, amount);
820 diff_expon += nzeros;
824 if(diff_expon>=0){
825 res =
826 fast_get_BID64_check_OF (sign_x ^ sign_y, diff_expon, CQ.w[0],
827 rnd_mode, pfpsf);
828 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
829 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
830 #endif
831 BID_RETURN (res);
834 #endif
836 if (diff_expon >= 0) {
837 #ifdef IEEE_ROUND_NEAREST
838 // rounding
839 // 2*CA4 - CY
840 CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
841 CA4r.w[0] = CA4.w[0] + CA4.w[0];
842 __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
843 CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
845 D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0;
846 carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D);
848 CQ.w[0] += carry64;
849 #else
850 #ifdef IEEE_ROUND_NEAREST_TIES_AWAY
851 // rounding
852 // 2*CA4 - CY
853 CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
854 CA4r.w[0] = CA4.w[0] + CA4.w[0];
855 __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
856 CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
858 D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1;
859 carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D;
861 CQ.w[0] += carry64;
862 if (CQ.w[0] < carry64)
863 CQ.w[1]++;
864 #else
865 rmode = rnd_mode;
866 if (sign_x ^ sign_y && (unsigned) (rmode - 1) < 2)
867 rmode = 3 - rmode;
868 switch (rmode) {
869 case ROUNDING_TO_NEAREST: // round to nearest code
870 // rounding
871 // 2*CA4 - CY
872 CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
873 CA4r.w[0] = CA4.w[0] + CA4.w[0];
874 __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
875 CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
876 D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0;
877 carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D);
878 CQ.w[0] += carry64;
879 if (CQ.w[0] < carry64)
880 CQ.w[1]++;
881 break;
882 case ROUNDING_TIES_AWAY:
883 // rounding
884 // 2*CA4 - CY
885 CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
886 CA4r.w[0] = CA4.w[0] + CA4.w[0];
887 __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
888 CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
889 D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1;
890 carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D;
891 CQ.w[0] += carry64;
892 if (CQ.w[0] < carry64)
893 CQ.w[1]++;
894 break;
895 case ROUNDING_DOWN:
896 case ROUNDING_TO_ZERO:
897 break;
898 default: // rounding up
899 CQ.w[0]++;
900 if (!CQ.w[0])
901 CQ.w[1]++;
902 break;
904 #endif
905 #endif
907 res =
908 fast_get_BID64_check_OF (sign_x ^ sign_y, diff_expon, CQ.w[0], rnd_mode,
909 pfpsf);
910 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
911 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
912 #endif
913 BID_RETURN (res);
914 } else {
915 // UF occurs
917 #ifdef SET_STATUS_FLAGS
918 if ((diff_expon + 16 < 0)) {
919 // set status flags
920 __set_status_flags (pfpsf, INEXACT_EXCEPTION);
922 #endif
923 rmode = rnd_mode;
924 res =
925 get_BID64_UF (sign_x ^ sign_y, diff_expon, CQ.w[0], CA4.w[1] | CA4.w[0], rmode, pfpsf);
926 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
927 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
928 #endif
929 BID_RETURN (res);
936 //#define LEAVE_TRAILING_ZEROS
938 TYPE0_FUNCTION_ARG128_ARGTYPE2 (UINT64, bid64qd_div, x, UINT64, y)
940 UINT256 CA4 =
941 { {0x0ull, 0x0ull, 0x0ull, 0x0ull} }, CA4r, P256, QB256;
942 UINT128 CX, CY, T128, CQ, CQ2, CR, CA, TP128, Qh, Ql, Tmp;
943 UINT64 sign_x, sign_y, T, carry64, D, Q_low, QX, PD, res, valid_y;
944 int_float fx, fy, f64;
945 UINT32 QX32, tdigit[3], digit, digit_h, digit_low;
946 int exponent_x, exponent_y, bin_index, bin_expon, diff_expon, ed2,
947 digits_q, amount;
948 int nzeros, i, j, k, d5, done = 0;
949 unsigned rmode;
950 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
951 fexcept_t binaryflags = 0;
952 #endif
954 valid_y = unpack_BID64 (&sign_y, &exponent_y, &CY.w[0], (y));
956 // unpack arguments, check for NaN or Infinity
957 if (!unpack_BID128_value (&sign_x, &exponent_x, &CX, x)) {
958 // test if x is NaN
959 if ((x.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) {
960 #ifdef SET_STATUS_FLAGS
961 if ((x.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull || // sNaN
962 (y & 0x7e00000000000000ull) == 0x7e00000000000000ull)
963 __set_status_flags (pfpsf, INVALID_EXCEPTION);
964 #endif
965 Tmp.w[1] = (CX.w[1] & 0x00003fffffffffffull);
966 Tmp.w[0] = CX.w[0];
967 TP128 = reciprocals10_128[18];
968 __mul_128x128_full (Qh, Ql, Tmp, TP128);
969 amount = recip_scale[18];
970 __shr_128 (Tmp, Qh, amount);
971 res = (CX.w[1] & 0xfc00000000000000ull) | Tmp.w[0];
972 BID_RETURN (res);
974 // x is Infinity?
975 if ((x.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) {
976 // check if y is Inf.
977 if (((y & 0x7c00000000000000ull) == 0x7800000000000000ull))
978 // return NaN
980 #ifdef SET_STATUS_FLAGS
981 __set_status_flags (pfpsf, INVALID_EXCEPTION);
982 #endif
983 res = 0x7c00000000000000ull;
984 BID_RETURN (res);
986 if (((y & 0x7c00000000000000ull) != 0x7c00000000000000ull)) {
987 // otherwise return +/-Inf
988 res =
989 ((x.w[1] ^ (y)) & 0x8000000000000000ull) | 0x7800000000000000ull;
990 BID_RETURN (res);
993 // x is 0
994 if (((y & INFINITY_MASK64) != INFINITY_MASK64) &&
995 !(CY.w[0])) {
996 #ifdef SET_STATUS_FLAGS
997 __set_status_flags (pfpsf, INVALID_EXCEPTION);
998 #endif
999 // x=y=0, return NaN
1000 res = 0x7c00000000000000ull;
1001 BID_RETURN (res);
1003 // return 0
1004 if (((y & 0x7800000000000000ull) != 0x7800000000000000ull)) {
1005 if (!CY.w[0]) {
1006 #ifdef SET_STATUS_FLAGS
1007 __set_status_flags (pfpsf, INVALID_EXCEPTION);
1008 #endif
1009 res = 0x7c00000000000000ull;
1010 BID_RETURN (res);
1012 exponent_x =
1013 exponent_x - exponent_y - DECIMAL_EXPONENT_BIAS_128 +
1014 (DECIMAL_EXPONENT_BIAS << 1);
1015 if (exponent_x > DECIMAL_MAX_EXPON_64)
1016 exponent_x = DECIMAL_MAX_EXPON_64;
1017 else if (exponent_x < 0)
1018 exponent_x = 0;
1019 res = (sign_x ^ sign_y) | (((UINT64) exponent_x) << 53);
1020 BID_RETURN (res);
1023 CY.w[1] = 0;
1024 if (!valid_y) {
1025 // y is Inf. or NaN
1027 // test if y is NaN
1028 if ((y & NAN_MASK64) == NAN_MASK64) {
1029 #ifdef SET_STATUS_FLAGS
1030 if ((y & SNAN_MASK64) == SNAN_MASK64) // sNaN
1031 __set_status_flags (pfpsf, INVALID_EXCEPTION);
1032 #endif
1033 BID_RETURN (CY.w[0] & QUIET_MASK64);
1035 // y is Infinity?
1036 if (((y) & 0x7800000000000000ull) == 0x7800000000000000ull) {
1037 // return +/-0
1038 res = sign_x ^ sign_y;
1039 BID_RETURN (res);
1041 // y is 0, return +/-Inf
1042 res =
1043 ((x.w[1] ^ (y)) & 0x8000000000000000ull) | 0x7800000000000000ull;
1044 #ifdef SET_STATUS_FLAGS
1045 __set_status_flags (pfpsf, ZERO_DIVIDE_EXCEPTION);
1046 #endif
1047 BID_RETURN (res);
1049 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
1050 (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS);
1051 #endif
1052 diff_expon =
1053 exponent_x - exponent_y - DECIMAL_EXPONENT_BIAS_128 +
1054 (DECIMAL_EXPONENT_BIAS << 1);
1056 if (__unsigned_compare_gt_128 (CY, CX)) {
1057 // CX < CY
1059 // 2^64
1060 f64.i = 0x5f800000;
1062 // fx ~ CX, fy ~ CY
1063 fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0];
1064 fy.d = (float) CY.w[1] * f64.d + (float) CY.w[0];
1065 // expon_cy - expon_cx
1066 bin_index = (fy.i - fx.i) >> 23;
1068 if (CX.w[1]) {
1069 T = power10_index_binexp_128[bin_index].w[0];
1070 __mul_64x128_short (CA, T, CX);
1071 } else {
1072 T128 = power10_index_binexp_128[bin_index];
1073 __mul_64x128_short (CA, CX.w[0], T128);
1076 ed2 = 15;
1077 if (__unsigned_compare_gt_128 (CY, CA))
1078 ed2++;
1080 T128 = power10_table_128[ed2];
1081 __mul_128x128_to_256 (CA4, CA, T128);
1083 ed2 += estimate_decimal_digits[bin_index];
1084 CQ.w[0] = CQ.w[1] = 0;
1085 diff_expon = diff_expon - ed2;
1087 } else {
1088 // get CQ = CX/CY
1089 __div_128_by_128 (&CQ, &CR, CX, CY);
1091 // get number of decimal digits in CQ
1092 // 2^64
1093 f64.i = 0x5f800000;
1094 fx.d = (float) CQ.w[1] * f64.d + (float) CQ.w[0];
1095 // binary expon. of CQ
1096 bin_expon = (fx.i - 0x3f800000) >> 23;
1098 digits_q = estimate_decimal_digits[bin_expon];
1099 TP128.w[0] = power10_index_binexp_128[bin_expon].w[0];
1100 TP128.w[1] = power10_index_binexp_128[bin_expon].w[1];
1101 if (__unsigned_compare_ge_128 (CQ, TP128))
1102 digits_q++;
1104 if (digits_q <= 16) {
1105 if (!CR.w[1] && !CR.w[0]) {
1106 res = get_BID64 (sign_x ^ sign_y, diff_expon,
1107 CQ.w[0], rnd_mode, pfpsf);
1108 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
1109 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
1110 #endif
1111 BID_RETURN (res);
1114 ed2 = 16 - digits_q;
1115 T128.w[0] = power10_table_128[ed2].w[0];
1116 __mul_64x128_to_192 (CA4, (T128.w[0]), CR);
1117 diff_expon = diff_expon - ed2;
1118 CQ.w[0] *= T128.w[0];
1119 } else {
1120 ed2 = digits_q - 16;
1121 diff_expon += ed2;
1122 T128 = reciprocals10_128[ed2];
1123 __mul_128x128_to_256 (P256, CQ, T128);
1124 amount = recip_scale[ed2];
1125 CQ.w[0] = (P256.w[2] >> amount) | (P256.w[3] << (64 - amount));
1126 CQ.w[1] = 0;
1128 __mul_64x64_to_128 (CQ2, CQ.w[0], (power10_table_128[ed2].w[0]));
1130 __mul_64x64_to_128 (QB256, CQ2.w[0], CY.w[0]);
1131 QB256.w[1] += CQ2.w[0] * CY.w[1] + CQ2.w[1] * CY.w[0];
1133 CA4.w[1] = CX.w[1] - QB256.w[1];
1134 CA4.w[0] = CX.w[0] - QB256.w[0];
1135 if (CX.w[0] < QB256.w[0])
1136 CA4.w[1]--;
1137 if (CR.w[0] || CR.w[1])
1138 CA4.w[0] |= 1;
1139 done = 1;
1140 if(CA4.w[1]|CA4.w[0]) {
1141 __mul_64x128_low(CY, (power10_table_128[ed2].w[0]),CY);
1148 if (!done) {
1149 __div_256_by_128 (&CQ, &CA4, CY);
1152 #ifdef SET_STATUS_FLAGS
1153 if (CA4.w[0] || CA4.w[1]) {
1154 // set status flags
1155 __set_status_flags (pfpsf, INEXACT_EXCEPTION);
1157 #ifndef LEAVE_TRAILING_ZEROS
1158 else
1159 #endif
1160 #else
1161 #ifndef LEAVE_TRAILING_ZEROS
1162 if (!CA4.w[0] && !CA4.w[1])
1163 #endif
1164 #endif
1165 #ifndef LEAVE_TRAILING_ZEROS
1166 // check whether result is exact
1168 if(!done) {
1169 // check whether CX, CY are short
1170 if (!CX.w[1] && !CY.w[1] && (CX.w[0] <= 1024) && (CY.w[0] <= 1024)) {
1171 i = (int) CY.w[0] - 1;
1172 j = (int) CX.w[0] - 1;
1173 // difference in powers of 2 factors for Y and X
1174 nzeros = ed2 - factors[i][0] + factors[j][0];
1175 // difference in powers of 5 factors
1176 d5 = ed2 - factors[i][1] + factors[j][1];
1177 if (d5 < nzeros)
1178 nzeros = d5;
1179 // get P*(2^M[extra_digits])/10^extra_digits
1180 __mul_128x128_full (Qh, Ql, CQ, reciprocals10_128[nzeros]);
1181 //__mul_128x128_to_256(P256, CQ, reciprocals10_128[nzeros]);Qh.w[1]=P256.w[3];Qh.w[0]=P256.w[2];
1183 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
1184 amount = recip_scale[nzeros];
1185 __shr_128_long (CQ, Qh, amount);
1187 diff_expon += nzeros;
1188 } else {
1189 // decompose Q as Qh*10^17 + Ql
1190 //T128 = reciprocals10_128[17];
1191 Q_low = CQ.w[0];
1194 tdigit[0] = Q_low & 0x3ffffff;
1195 tdigit[1] = 0;
1196 QX = Q_low >> 26;
1197 QX32 = QX;
1198 nzeros = 0;
1200 for (j = 0; QX32; j++, QX32 >>= 7) {
1201 k = (QX32 & 127);
1202 tdigit[0] += convert_table[j][k][0];
1203 tdigit[1] += convert_table[j][k][1];
1204 if (tdigit[0] >= 100000000) {
1205 tdigit[0] -= 100000000;
1206 tdigit[1]++;
1210 if (tdigit[1] >= 100000000) {
1211 tdigit[1] -= 100000000;
1212 if (tdigit[1] >= 100000000)
1213 tdigit[1] -= 100000000;
1216 digit = tdigit[0];
1217 if (!digit && !tdigit[1])
1218 nzeros += 16;
1219 else {
1220 if (!digit) {
1221 nzeros += 8;
1222 digit = tdigit[1];
1224 // decompose digit
1225 PD = (UINT64) digit *0x068DB8BBull;
1226 digit_h = (UINT32) (PD >> 40);
1227 digit_low = digit - digit_h * 10000;
1229 if (!digit_low)
1230 nzeros += 4;
1231 else
1232 digit_h = digit_low;
1234 if (!(digit_h & 1))
1235 nzeros +=
1236 3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >>
1237 (digit_h & 7));
1240 if (nzeros) {
1241 // get P*(2^M[extra_digits])/10^extra_digits
1242 __mul_128x128_full (Qh, Ql, CQ, reciprocals10_128[nzeros]);
1244 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
1245 amount = recip_scale[nzeros];
1246 __shr_128 (CQ, Qh, amount);
1248 diff_expon += nzeros;
1253 if(diff_expon>=0){
1254 res =
1255 fast_get_BID64_check_OF (sign_x ^ sign_y, diff_expon, CQ.w[0],
1256 rnd_mode, pfpsf);
1257 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
1258 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
1259 #endif
1260 BID_RETURN (res);
1263 #endif
1265 if (diff_expon >= 0) {
1266 #ifdef IEEE_ROUND_NEAREST
1267 // rounding
1268 // 2*CA4 - CY
1269 CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
1270 CA4r.w[0] = CA4.w[0] + CA4.w[0];
1271 __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
1272 CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
1274 D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0;
1275 carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D);
1277 CQ.w[0] += carry64;
1278 //if(CQ.w[0]<carry64)
1279 //CQ.w[1] ++;
1280 #else
1281 #ifdef IEEE_ROUND_NEAREST_TIES_AWAY
1282 // rounding
1283 // 2*CA4 - CY
1284 CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
1285 CA4r.w[0] = CA4.w[0] + CA4.w[0];
1286 __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
1287 CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
1289 D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1;
1290 carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D;
1292 CQ.w[0] += carry64;
1293 if (CQ.w[0] < carry64)
1294 CQ.w[1]++;
1295 #else
1296 rmode = rnd_mode;
1297 if (sign_x ^ sign_y && (unsigned) (rmode - 1) < 2)
1298 rmode = 3 - rmode;
1299 switch (rmode) {
1300 case ROUNDING_TO_NEAREST: // round to nearest code
1301 // rounding
1302 // 2*CA4 - CY
1303 CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
1304 CA4r.w[0] = CA4.w[0] + CA4.w[0];
1305 __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
1306 CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
1307 D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0;
1308 carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D);
1309 CQ.w[0] += carry64;
1310 if (CQ.w[0] < carry64)
1311 CQ.w[1]++;
1312 break;
1313 case ROUNDING_TIES_AWAY:
1314 // rounding
1315 // 2*CA4 - CY
1316 CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
1317 CA4r.w[0] = CA4.w[0] + CA4.w[0];
1318 __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
1319 CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
1320 D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1;
1321 carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D;
1322 CQ.w[0] += carry64;
1323 if (CQ.w[0] < carry64)
1324 CQ.w[1]++;
1325 break;
1326 case ROUNDING_DOWN:
1327 case ROUNDING_TO_ZERO:
1328 break;
1329 default: // rounding up
1330 CQ.w[0]++;
1331 if (!CQ.w[0])
1332 CQ.w[1]++;
1333 break;
1335 #endif
1336 #endif
1339 res =
1340 fast_get_BID64_check_OF (sign_x ^ sign_y, diff_expon, CQ.w[0], rnd_mode,
1341 pfpsf);
1342 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
1343 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
1344 #endif
1345 BID_RETURN (res);
1346 } else {
1347 // UF occurs
1349 #ifdef SET_STATUS_FLAGS
1350 if ((diff_expon + 16 < 0)) {
1351 // set status flags
1352 __set_status_flags (pfpsf, INEXACT_EXCEPTION);
1354 #endif
1355 rmode = rnd_mode;
1356 res =
1357 get_BID64_UF (sign_x ^ sign_y, diff_expon, CQ.w[0], CA4.w[1] | CA4.w[0], rmode, pfpsf);
1358 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
1359 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
1360 #endif
1361 BID_RETURN (res);
1367 //#define LEAVE_TRAILING_ZEROS
1369 extern UINT32 convert_table[5][128][2];
1370 extern SINT8 factors[][2];
1371 extern UINT8 packed_10000_zeros[];
1374 //UINT64* bid64_div128x128(UINT64 res, UINT128 *px, UINT128 *py, unsigned rnd_mode, unsigned *pfpsf)
1376 TYPE0_FUNCTION_ARG128_ARG128 (UINT64, bid64qq_div, x, y)
1377 UINT256 CA4 =
1378 { {0x0ull, 0x0ull, 0x0ull, 0x0ull} }, CA4r, P256, QB256;
1379 UINT128 CX, CY, T128, CQ, CQ2, CR, CA, TP128, Qh, Ql, Tmp;
1380 UINT64 sign_x, sign_y, T, carry64, D, Q_low, QX, valid_y, PD, res;
1381 int_float fx, fy, f64;
1382 UINT32 QX32, tdigit[3], digit, digit_h, digit_low;
1383 int exponent_x, exponent_y, bin_index, bin_expon, diff_expon, ed2,
1384 digits_q, amount;
1385 int nzeros, i, j, k, d5, done = 0;
1386 unsigned rmode;
1387 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
1388 fexcept_t binaryflags = 0;
1389 #endif
1391 valid_y = unpack_BID128_value (&sign_y, &exponent_y, &CY, y);
1393 // unpack arguments, check for NaN or Infinity
1394 if (!unpack_BID128_value (&sign_x, &exponent_x, &CX, x)) {
1395 // test if x is NaN
1396 if ((x.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) {
1397 #ifdef SET_STATUS_FLAGS
1398 if ((x.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull || // sNaN
1399 (y.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull)
1400 __set_status_flags (pfpsf, INVALID_EXCEPTION);
1401 #endif
1402 Tmp.w[1] = (CX.w[1] & 0x00003fffffffffffull);
1403 Tmp.w[0] = CX.w[0];
1404 TP128 = reciprocals10_128[18];
1405 __mul_128x128_full (Qh, Ql, Tmp, TP128);
1406 amount = recip_scale[18];
1407 __shr_128 (Tmp, Qh, amount);
1408 res = (CX.w[1] & 0xfc00000000000000ull) | Tmp.w[0];
1409 BID_RETURN (res);
1411 // x is Infinity?
1412 if ((x.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) {
1413 // check if y is Inf.
1414 if (((y.w[1] & 0x7c00000000000000ull) == 0x7800000000000000ull))
1415 // return NaN
1417 #ifdef SET_STATUS_FLAGS
1418 __set_status_flags (pfpsf, INVALID_EXCEPTION);
1419 #endif
1420 res = 0x7c00000000000000ull;
1421 BID_RETURN (res);
1423 if (((y.w[1] & 0x7c00000000000000ull) != 0x7c00000000000000ull)) {
1424 // otherwise return +/-Inf
1425 res =
1426 ((x.w[1] ^ y.
1427 w[1]) & 0x8000000000000000ull) | 0x7800000000000000ull;
1428 BID_RETURN (res);
1431 // x is 0
1432 if (((y.w[1] & 0x7800000000000000ull) != 0x7800000000000000ull)) {
1433 if ((!CY.w[0]) && !(CY.w[1] & 0x0001ffffffffffffull)) {
1434 #ifdef SET_STATUS_FLAGS
1435 __set_status_flags (pfpsf, INVALID_EXCEPTION);
1436 #endif
1437 // x=y=0, return NaN
1438 res = 0x7c00000000000000ull;
1439 BID_RETURN (res);
1441 // return 0
1442 res = (x.w[1] ^ y.w[1]) & 0x8000000000000000ull;
1443 exponent_x = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS;
1444 if (exponent_x > DECIMAL_MAX_EXPON_64)
1445 exponent_x = DECIMAL_MAX_EXPON_64;
1446 else if (exponent_x < 0)
1447 exponent_x = 0;
1448 res |= (((UINT64) exponent_x) << 53);
1449 BID_RETURN (res);
1452 if (!valid_y) {
1453 // y is Inf. or NaN
1455 // test if y is NaN
1456 if ((y.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) {
1457 #ifdef SET_STATUS_FLAGS
1458 if ((y.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull) // sNaN
1459 __set_status_flags (pfpsf, INVALID_EXCEPTION);
1460 #endif
1461 Tmp.w[1] = (CY.w[1] & 0x00003fffffffffffull);
1462 Tmp.w[0] = CY.w[0];
1463 TP128 = reciprocals10_128[18];
1464 __mul_128x128_full (Qh, Ql, Tmp, TP128);
1465 amount = recip_scale[18];
1466 __shr_128 (Tmp, Qh, amount);
1467 res = (CY.w[1] & 0xfc00000000000000ull) | Tmp.w[0];
1468 BID_RETURN (res);
1470 // y is Infinity?
1471 if ((y.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) {
1472 // return +/-0
1473 res = sign_x ^ sign_y;
1474 BID_RETURN (res);
1476 // y is 0, return +/-Inf
1477 res =
1478 ((x.w[1] ^ y.w[1]) & 0x8000000000000000ull) | 0x7800000000000000ull;
1479 #ifdef SET_STATUS_FLAGS
1480 __set_status_flags (pfpsf, ZERO_DIVIDE_EXCEPTION);
1481 #endif
1482 BID_RETURN (res);
1484 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
1485 (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS);
1486 #endif
1487 diff_expon = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS;
1489 if (__unsigned_compare_gt_128 (CY, CX)) {
1490 // CX < CY
1492 // 2^64
1493 f64.i = 0x5f800000;
1495 // fx ~ CX, fy ~ CY
1496 fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0];
1497 fy.d = (float) CY.w[1] * f64.d + (float) CY.w[0];
1498 // expon_cy - expon_cx
1499 bin_index = (fy.i - fx.i) >> 23;
1501 if (CX.w[1]) {
1502 T = power10_index_binexp_128[bin_index].w[0];
1503 __mul_64x128_short (CA, T, CX);
1504 } else {
1505 T128 = power10_index_binexp_128[bin_index];
1506 __mul_64x128_short (CA, CX.w[0], T128);
1509 ed2 = 15;
1510 if (__unsigned_compare_gt_128 (CY, CA))
1511 ed2++;
1513 T128 = power10_table_128[ed2];
1514 __mul_128x128_to_256 (CA4, CA, T128);
1516 ed2 += estimate_decimal_digits[bin_index];
1517 CQ.w[0] = CQ.w[1] = 0;
1518 diff_expon = diff_expon - ed2;
1520 } else {
1521 // get CQ = CX/CY
1522 __div_128_by_128 (&CQ, &CR, CX, CY);
1524 // get number of decimal digits in CQ
1525 // 2^64
1526 f64.i = 0x5f800000;
1527 fx.d = (float) CQ.w[1] * f64.d + (float) CQ.w[0];
1528 // binary expon. of CQ
1529 bin_expon = (fx.i - 0x3f800000) >> 23;
1531 digits_q = estimate_decimal_digits[bin_expon];
1532 TP128.w[0] = power10_index_binexp_128[bin_expon].w[0];
1533 TP128.w[1] = power10_index_binexp_128[bin_expon].w[1];
1534 if (__unsigned_compare_ge_128 (CQ, TP128))
1535 digits_q++;
1537 if (digits_q <= 16) {
1538 if (!CR.w[1] && !CR.w[0]) {
1539 res = get_BID64 (sign_x ^ sign_y, diff_expon,
1540 CQ.w[0], rnd_mode, pfpsf);
1541 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
1542 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
1543 #endif
1544 BID_RETURN (res);
1547 ed2 = 16 - digits_q;
1548 T128.w[0] = power10_table_128[ed2].w[0];
1549 __mul_64x128_to_192 (CA4, (T128.w[0]), CR);
1550 diff_expon = diff_expon - ed2;
1551 CQ.w[0] *= T128.w[0];
1552 } else {
1553 ed2 = digits_q - 16;
1554 diff_expon += ed2;
1555 T128 = reciprocals10_128[ed2];
1556 __mul_128x128_to_256 (P256, CQ, T128);
1557 amount = recip_scale[ed2];
1558 CQ.w[0] = (P256.w[2] >> amount) | (P256.w[3] << (64 - amount));
1559 CQ.w[1] = 0;
1561 __mul_64x64_to_128 (CQ2, CQ.w[0], (power10_table_128[ed2].w[0]));
1563 __mul_64x64_to_128 (QB256, CQ2.w[0], CY.w[0]);
1564 QB256.w[1] += CQ2.w[0] * CY.w[1] + CQ2.w[1] * CY.w[0];
1566 CA4.w[1] = CX.w[1] - QB256.w[1];
1567 CA4.w[0] = CX.w[0] - QB256.w[0];
1568 if (CX.w[0] < QB256.w[0])
1569 CA4.w[1]--;
1570 if (CR.w[0] || CR.w[1])
1571 CA4.w[0] |= 1;
1572 done = 1;
1573 if(CA4.w[1]|CA4.w[0]) {
1574 __mul_64x128_low(CY, (power10_table_128[ed2].w[0]),CY);
1580 if (!done) {
1581 __div_256_by_128 (&CQ, &CA4, CY);
1586 #ifdef SET_STATUS_FLAGS
1587 if (CA4.w[0] || CA4.w[1]) {
1588 // set status flags
1589 __set_status_flags (pfpsf, INEXACT_EXCEPTION);
1591 #ifndef LEAVE_TRAILING_ZEROS
1592 else
1593 #endif
1594 #else
1595 #ifndef LEAVE_TRAILING_ZEROS
1596 if (!CA4.w[0] && !CA4.w[1])
1597 #endif
1598 #endif
1599 #ifndef LEAVE_TRAILING_ZEROS
1600 // check whether result is exact
1602 if(!done) {
1603 // check whether CX, CY are short
1604 if (!CX.w[1] && !CY.w[1] && (CX.w[0] <= 1024) && (CY.w[0] <= 1024)) {
1605 i = (int) CY.w[0] - 1;
1606 j = (int) CX.w[0] - 1;
1607 // difference in powers of 2 factors for Y and X
1608 nzeros = ed2 - factors[i][0] + factors[j][0];
1609 // difference in powers of 5 factors
1610 d5 = ed2 - factors[i][1] + factors[j][1];
1611 if (d5 < nzeros)
1612 nzeros = d5;
1613 // get P*(2^M[extra_digits])/10^extra_digits
1614 __mul_128x128_full (Qh, Ql, CQ, reciprocals10_128[nzeros]);
1615 //__mul_128x128_to_256(P256, CQ, reciprocals10_128[nzeros]);Qh.w[1]=P256.w[3];Qh.w[0]=P256.w[2];
1617 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
1618 amount = recip_scale[nzeros];
1619 __shr_128_long (CQ, Qh, amount);
1621 diff_expon += nzeros;
1622 } else {
1623 // decompose Q as Qh*10^17 + Ql
1624 //T128 = reciprocals10_128[17];
1625 Q_low = CQ.w[0];
1628 tdigit[0] = Q_low & 0x3ffffff;
1629 tdigit[1] = 0;
1630 QX = Q_low >> 26;
1631 QX32 = QX;
1632 nzeros = 0;
1634 for (j = 0; QX32; j++, QX32 >>= 7) {
1635 k = (QX32 & 127);
1636 tdigit[0] += convert_table[j][k][0];
1637 tdigit[1] += convert_table[j][k][1];
1638 if (tdigit[0] >= 100000000) {
1639 tdigit[0] -= 100000000;
1640 tdigit[1]++;
1644 if (tdigit[1] >= 100000000) {
1645 tdigit[1] -= 100000000;
1646 if (tdigit[1] >= 100000000)
1647 tdigit[1] -= 100000000;
1650 digit = tdigit[0];
1651 if (!digit && !tdigit[1])
1652 nzeros += 16;
1653 else {
1654 if (!digit) {
1655 nzeros += 8;
1656 digit = tdigit[1];
1658 // decompose digit
1659 PD = (UINT64) digit *0x068DB8BBull;
1660 digit_h = (UINT32) (PD >> 40);
1661 digit_low = digit - digit_h * 10000;
1663 if (!digit_low)
1664 nzeros += 4;
1665 else
1666 digit_h = digit_low;
1668 if (!(digit_h & 1))
1669 nzeros +=
1670 3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >>
1671 (digit_h & 7));
1674 if (nzeros) {
1675 // get P*(2^M[extra_digits])/10^extra_digits
1676 __mul_128x128_full (Qh, Ql, CQ, reciprocals10_128[nzeros]);
1678 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
1679 amount = recip_scale[nzeros];
1680 __shr_128 (CQ, Qh, amount);
1682 diff_expon += nzeros;
1687 if(diff_expon>=0){
1688 res =
1689 fast_get_BID64_check_OF (sign_x ^ sign_y, diff_expon, CQ.w[0],
1690 rnd_mode, pfpsf);
1691 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
1692 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
1693 #endif
1694 BID_RETURN (res);
1697 #endif
1699 if(diff_expon>=0) {
1701 #ifdef IEEE_ROUND_NEAREST
1702 // rounding
1703 // 2*CA4 - CY
1704 CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
1705 CA4r.w[0] = CA4.w[0] + CA4.w[0];
1706 __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
1707 CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
1709 D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0;
1710 carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D);
1712 CQ.w[0] += carry64;
1713 //if(CQ.w[0]<carry64)
1714 //CQ.w[1] ++;
1715 #else
1716 #ifdef IEEE_ROUND_NEAREST_TIES_AWAY
1717 // rounding
1718 // 2*CA4 - CY
1719 CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
1720 CA4r.w[0] = CA4.w[0] + CA4.w[0];
1721 __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
1722 CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
1724 D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1;
1725 carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D;
1727 CQ.w[0] += carry64;
1728 if (CQ.w[0] < carry64)
1729 CQ.w[1]++;
1730 #else
1731 rmode = rnd_mode;
1732 if (sign_x ^ sign_y && (unsigned) (rmode - 1) < 2)
1733 rmode = 3 - rmode;
1734 switch (rmode) {
1735 case ROUNDING_TO_NEAREST: // round to nearest code
1736 // rounding
1737 // 2*CA4 - CY
1738 CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
1739 CA4r.w[0] = CA4.w[0] + CA4.w[0];
1740 __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
1741 CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
1742 D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0;
1743 carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D);
1744 CQ.w[0] += carry64;
1745 if (CQ.w[0] < carry64)
1746 CQ.w[1]++;
1747 break;
1748 case ROUNDING_TIES_AWAY:
1749 // rounding
1750 // 2*CA4 - CY
1751 CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
1752 CA4r.w[0] = CA4.w[0] + CA4.w[0];
1753 __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
1754 CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
1755 D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1;
1756 carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D;
1757 CQ.w[0] += carry64;
1758 if (CQ.w[0] < carry64)
1759 CQ.w[1]++;
1760 break;
1761 case ROUNDING_DOWN:
1762 case ROUNDING_TO_ZERO:
1763 break;
1764 default: // rounding up
1765 CQ.w[0]++;
1766 if (!CQ.w[0])
1767 CQ.w[1]++;
1768 break;
1770 #endif
1771 #endif
1774 res =
1775 fast_get_BID64_check_OF (sign_x ^ sign_y, diff_expon, CQ.w[0], rnd_mode,
1776 pfpsf);
1777 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
1778 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
1779 #endif
1780 BID_RETURN (res);
1781 } else {
1782 // UF occurs
1784 #ifdef SET_STATUS_FLAGS
1785 if ((diff_expon + 16 < 0)) {
1786 // set status flags
1787 __set_status_flags (pfpsf, INEXACT_EXCEPTION);
1789 #endif
1790 rmode = rnd_mode;
1791 res =
1792 get_BID64_UF (sign_x ^ sign_y, diff_expon, CQ.w[0], CA4.w[1] | CA4.w[0], rmode, pfpsf);
1793 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
1794 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
1795 #endif
1796 BID_RETURN (res);