libgcc/config/libbid/bid64_div.c

   1 /* Copyright (C) 2007  Free Software Foundation, Inc.
   2
   3 This file is part of GCC.
   4
   5 GCC is free software; you can redistribute it and/or modify it under
   6 the terms of the GNU General Public License as published by the Free
   7 Software Foundation; either version 2, or (at your option) any later
   8 version.
   9
  10 In addition to the permissions in the GNU General Public License, the
  11 Free Software Foundation gives you unlimited permission to link the
  12 compiled version of this file into combinations with other programs,
  13 and to distribute those combinations without any restriction coming
  14 from the use of this file.  (The General Public License restrictions
  15 do apply in other respects; for example, they cover modification of
  16 the file, and distribution when not linked into a combine
  17 executable.)
  18
  19 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  20 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  21 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  22 for more details.
  23
  24 You should have received a copy of the GNU General Public License
  25 along with GCC; see the file COPYING.  If not, write to the Free
  26 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  27 02110-1301, USA.  */
  28
  29 /*****************************************************************************
  30  *    BID64 divide
  31  *****************************************************************************
  32  *
  33  *  Algorithm description:
  34  *
  35  *  if(coefficient_x<coefficient_y)
  36  *    p = number_digits(coefficient_y) - number_digits(coefficient_x)
  37  *    A = coefficient_x*10^p
  38  *    B = coefficient_y
  39  *    CA= A*10^(15+j), j=0 for A>=B, 1 otherwise
  40  *    Q = 0
  41  *  else
  42  *    get Q=(int)(coefficient_x/coefficient_y)
  43  *        (based on double precision divide)
  44  *    check for exact divide case
  45  *    Let R = coefficient_x - Q*coefficient_y
  46  *    Let m=16-number_digits(Q)
  47  *    CA=R*10^m, Q=Q*10^m
  48  *    B = coefficient_y
  49  *  endif
  50  *    if (CA<2^64)
  51  *      Q += CA/B  (64-bit unsigned divide)
  52  *    else
  53  *      get final Q using double precision divide, followed by 3 integer
  54  *          iterations
  55  *    if exact result, eliminate trailing zeros
  56  *    check for underflow
  57  *    round coefficient to nearest
  58  *
  59  ****************************************************************************/
  60
  61 #include "bid_internal.h"
  62 #include "bid_div_macros.h"
  63 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
  64 #include <fenv.h>
  65
  66 #define FE_ALL_FLAGS FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW|FE_INEXACT
  67 #endif
  68
  69 extern UINT32 convert_table[5][128][2];
  70 extern SINT8 factors[][2];
  71 extern UINT8 packed_10000_zeros[];
  72
  73
  74 #if DECIMAL_CALL_BY_REFERENCE
  75
  76 void
  77 bid64_div (UINT64 * pres, UINT64 * px,
  78            UINT64 *
  79            py _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
  80            _EXC_INFO_PARAM) {
  81   UINT64 x, y;
  82 #else
  83
  84 UINT64
  85 bid64_div (UINT64 x,
  86            UINT64 y _RND_MODE_PARAM _EXC_FLAGS_PARAM
  87            _EXC_MASKS_PARAM _EXC_INFO_PARAM) {
  88 #endif
  89   UINT128 CA, CT;
  90   UINT64 sign_x, sign_y, coefficient_x, coefficient_y, A, B, QX, PD;
  91   UINT64 A2, Q, Q2, B2, B4, B5, R, T, DU, res;
  92   UINT64 valid_x, valid_y;
  93   SINT64 D;
  94   int_double t_scale, tempq, temp_b;
  95   int_float tempx, tempy;
  96   double da, db, dq, da_h, da_l;
  97   int exponent_x, exponent_y, bin_expon_cx;
  98   int diff_expon, ed1, ed2, bin_index;
  99   int rmode, amount;
 100   int nzeros, i, j, k, d5;
 101   UINT32 QX32, tdigit[3], digit, digit_h, digit_low;
 102 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
 103   fexcept_t binaryflags = 0;
 104 #endif
 105
 106 #if DECIMAL_CALL_BY_REFERENCE
 107 #if !DECIMAL_GLOBAL_ROUNDING
 108   _IDEC_round rnd_mode = *prnd_mode;
 109 #endif
 110   x = *px;
 111   y = *py;
 112 #endif
 113
 114   valid_x = unpack_BID64 (&sign_x, &exponent_x, &coefficient_x, x);
 115   valid_y = unpack_BID64 (&sign_y, &exponent_y, &coefficient_y, y);
 116
 117   // unpack arguments, check for NaN or Infinity
 118   if (!valid_x) {
 119     // x is Inf. or NaN
 120 #ifdef SET_STATUS_FLAGS
 121     if ((y & SNAN_MASK64) == SNAN_MASK64)       // y is sNaN
 122       __set_status_flags (pfpsf, INVALID_EXCEPTION);
 123 #endif
 124
 125     // test if x is NaN
 126     if ((x & NAN_MASK64) == NAN_MASK64) {
 127 #ifdef SET_STATUS_FLAGS
 128       if ((x & SNAN_MASK64) == SNAN_MASK64)     // sNaN
 129         __set_status_flags (pfpsf, INVALID_EXCEPTION);
 130 #endif
 131       BID_RETURN (coefficient_x & QUIET_MASK64);
 132     }
 133     // x is Infinity?
 134     if ((x & INFINITY_MASK64) == INFINITY_MASK64) {
 135       // check if y is Inf or NaN
 136       if ((y & INFINITY_MASK64) == INFINITY_MASK64) {
 137         // y==Inf, return NaN
 138         if ((y & NAN_MASK64) == INFINITY_MASK64) {      // Inf/Inf
 139 #ifdef SET_STATUS_FLAGS
 140           __set_status_flags (pfpsf, INVALID_EXCEPTION);
 141 #endif
 142           BID_RETURN (NAN_MASK64);
 143         }
 144       } else {
 145         // otherwise return +/-Inf
 146         BID_RETURN (((x ^ y) & 0x8000000000000000ull) |
 147                     INFINITY_MASK64);
 148       }
 149     }
 150     // x==0
 151     if (((y & INFINITY_MASK64) != INFINITY_MASK64)
 152         && !(coefficient_y)) {
 153       // y==0 , return NaN
 154 #ifdef SET_STATUS_FLAGS
 155       __set_status_flags (pfpsf, INVALID_EXCEPTION);
 156 #endif
 157       BID_RETURN (NAN_MASK64);
 158     }
 159     if (((y & INFINITY_MASK64) != INFINITY_MASK64)) {
 160       if ((y & SPECIAL_ENCODING_MASK64) == SPECIAL_ENCODING_MASK64)
 161         exponent_y = ((UINT32) (y >> 51)) & 0x3ff;
 162       else
 163         exponent_y = ((UINT32) (y >> 53)) & 0x3ff;
 164       sign_y = y & 0x8000000000000000ull;
 165
 166       exponent_x = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS;
 167       if (exponent_x > DECIMAL_MAX_EXPON_64)
 168         exponent_x = DECIMAL_MAX_EXPON_64;
 169       else if (exponent_x < 0)
 170         exponent_x = 0;
 171       BID_RETURN ((sign_x ^ sign_y) | (((UINT64) exponent_x) << 53));
 172     }
 173
 174   }
 175   if (!valid_y) {
 176     // y is Inf. or NaN
 177
 178     // test if y is NaN
 179     if ((y & NAN_MASK64) == NAN_MASK64) {
 180 #ifdef SET_STATUS_FLAGS
 181       if ((y & SNAN_MASK64) == SNAN_MASK64)     // sNaN
 182         __set_status_flags (pfpsf, INVALID_EXCEPTION);
 183 #endif
 184       BID_RETURN (coefficient_y & QUIET_MASK64);
 185     }
 186     // y is Infinity?
 187     if ((y & INFINITY_MASK64) == INFINITY_MASK64) {
 188       // return +/-0
 189       BID_RETURN (((x ^ y) & 0x8000000000000000ull));
 190     }
 191     // y is 0
 192 #ifdef SET_STATUS_FLAGS
 193     __set_status_flags (pfpsf, ZERO_DIVIDE_EXCEPTION);
 194 #endif
 195     BID_RETURN ((sign_x ^ sign_y) | INFINITY_MASK64);
 196   }
 197 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
 198   (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS);
 199 #endif
 200   diff_expon = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS;
 201
 202   if (coefficient_x < coefficient_y) {
 203     // get number of decimal digits for c_x, c_y
 204
 205     //--- get number of bits in the coefficients of x and y ---
 206     tempx.d = (float) coefficient_x;
 207     tempy.d = (float) coefficient_y;
 208     bin_index = (tempy.i - tempx.i) >> 23;
 209
 210     A = coefficient_x * power10_index_binexp[bin_index];
 211     B = coefficient_y;
 212
 213     temp_b.d = (double) B;
 214
 215     // compare A, B
 216     DU = (A - B) >> 63;
 217     ed1 = 15 + (int) DU;
 218     ed2 = estimate_decimal_digits[bin_index] + ed1;
 219     T = power10_table_128[ed1].w[0];
 220     __mul_64x64_to_128 (CA, A, T);
 221
 222     Q = 0;
 223     diff_expon = diff_expon - ed2;
 224
 225     // adjust double precision db, to ensure that later A/B - (int)(da/db) > -1
 226     if (coefficient_y < 0x0020000000000000ull) {
 227       temp_b.i += 1;
 228       db = temp_b.d;
 229     } else
 230       db = (double) (B + 2 + (B & 1));
 231
 232   } else {
 233     // get c_x/c_y
 234
 235     //  set last bit before conversion to DP
 236     A2 = coefficient_x | 1;
 237     da = (double) A2;
 238
 239     db = (double) coefficient_y;
 240
 241     tempq.d = da / db;
 242     Q = (UINT64) tempq.d;
 243
 244     R = coefficient_x - coefficient_y * Q;
 245
 246     // will use to get number of dec. digits of Q
 247     bin_expon_cx = (tempq.i >> 52) - 0x3ff;
 248
 249     // R<0 ?
 250     D = ((SINT64) R) >> 63;
 251     Q += D;
 252     R += (coefficient_y & D);
 253
 254     // exact result ?
 255     if (((SINT64) R) <= 0) {
 256       // can have R==-1 for coeff_y==1
 257       res =
 258         get_BID64 (sign_x ^ sign_y, diff_expon, (Q + R), rnd_mode,
 259                    pfpsf);
 260 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
 261       (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
 262 #endif
 263       BID_RETURN (res);
 264     }
 265     // get decimal digits of Q
 266     DU = power10_index_binexp[bin_expon_cx] - Q - 1;
 267     DU >>= 63;
 268
 269     ed2 = 16 - estimate_decimal_digits[bin_expon_cx] - (int) DU;
 270
 271     T = power10_table_128[ed2].w[0];
 272     __mul_64x64_to_128 (CA, R, T);
 273     B = coefficient_y;
 274
 275     Q *= power10_table_128[ed2].w[0];
 276     diff_expon -= ed2;
 277
 278   }
 279
 280   if (!CA.w[1]) {
 281     Q2 = CA.w[0] / B;
 282     B2 = B + B;
 283     B4 = B2 + B2;
 284     R = CA.w[0] - Q2 * B;
 285     Q += Q2;
 286   } else {
 287
 288     // 2^64
 289     t_scale.i = 0x43f0000000000000ull;
 290     // convert CA to DP
 291     da_h = CA.w[1];
 292     da_l = CA.w[0];
 293     da = da_h * t_scale.d + da_l;
 294
 295     // quotient
 296     dq = da / db;
 297     Q2 = (UINT64) dq;
 298
 299     // get w[0] remainder
 300     R = CA.w[0] - Q2 * B;
 301
 302     // R<0 ?
 303     D = ((SINT64) R) >> 63;
 304     Q2 += D;
 305     R += (B & D);
 306
 307     // now R<6*B
 308
 309     // quick divide
 310
 311     // 4*B
 312     B2 = B + B;
 313     B4 = B2 + B2;
 314
 315     R = R - B4;
 316     // R<0 ?
 317     D = ((SINT64) R) >> 63;
 318     // restore R if negative
 319     R += (B4 & D);
 320     Q2 += ((~D) & 4);
 321
 322     R = R - B2;
 323     // R<0 ?
 324     D = ((SINT64) R) >> 63;
 325     // restore R if negative
 326     R += (B2 & D);
 327     Q2 += ((~D) & 2);
 328
 329     R = R - B;
 330     // R<0 ?
 331     D = ((SINT64) R) >> 63;
 332     // restore R if negative
 333     R += (B & D);
 334     Q2 += ((~D) & 1);
 335
 336     Q += Q2;
 337   }
 338
 339 #ifdef SET_STATUS_FLAGS
 340   if (R) {
 341     // set status flags
 342     __set_status_flags (pfpsf, INEXACT_EXCEPTION);
 343   }
 344 #ifndef LEAVE_TRAILING_ZEROS
 345   else
 346 #endif
 347 #else
 348 #ifndef LEAVE_TRAILING_ZEROS
 349   if (!R)
 350 #endif
 351 #endif
 352 #ifndef LEAVE_TRAILING_ZEROS
 353   {
 354     // eliminate trailing zeros
 355
 356     // check whether CX, CY are short
 357     if ((coefficient_x <= 1024) && (coefficient_y <= 1024)) {
 358       i = (int) coefficient_y - 1;
 359       j = (int) coefficient_x - 1;
 360       // difference in powers of 2 factors for Y and X
 361       nzeros = ed2 - factors[i][0] + factors[j][0];
 362       // difference in powers of 5 factors
 363       d5 = ed2 - factors[i][1] + factors[j][1];
 364       if (d5 < nzeros)
 365         nzeros = d5;
 366
 367       __mul_64x64_to_128 (CT, Q, reciprocals10_64[nzeros]);
 368
 369       // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
 370       amount = short_recip_scale[nzeros];
 371       Q = CT.w[1] >> amount;
 372
 373       diff_expon += nzeros;
 374     } else {
 375       tdigit[0] = Q & 0x3ffffff;
 376       tdigit[1] = 0;
 377       QX = Q >> 26;
 378       QX32 = QX;
 379       nzeros = 0;
 380
 381       for (j = 0; QX32; j++, QX32 >>= 7) {
 382         k = (QX32 & 127);
 383         tdigit[0] += convert_table[j][k][0];
 384         tdigit[1] += convert_table[j][k][1];
 385         if (tdigit[0] >= 100000000) {
 386           tdigit[0] -= 100000000;
 387           tdigit[1]++;
 388         }
 389       }
 390
 391       digit = tdigit[0];
 392       if (!digit && !tdigit[1])
 393         nzeros += 16;
 394       else {
 395         if (!digit) {
 396           nzeros += 8;
 397           digit = tdigit[1];
 398         }
 399         // decompose digit
 400         PD = (UINT64) digit *0x068DB8BBull;
 401         digit_h = (UINT32) (PD >> 40);
 402         digit_low = digit - digit_h * 10000;
 403
 404         if (!digit_low)
 405           nzeros += 4;
 406         else
 407           digit_h = digit_low;
 408
 409         if (!(digit_h & 1))
 410           nzeros +=
 411             3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >>
 412                           (digit_h & 7));
 413       }
 414
 415       if (nzeros) {
 416         __mul_64x64_to_128 (CT, Q, reciprocals10_64[nzeros]);
 417
 418         // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
 419         amount = short_recip_scale[nzeros];
 420         Q = CT.w[1] >> amount;
 421       }
 422       diff_expon += nzeros;
 423
 424     }
 425     if (diff_expon >= 0) {
 426       res =
 427         fast_get_BID64_check_OF (sign_x ^ sign_y, diff_expon, Q,
 428                                  rnd_mode, pfpsf);
 429 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
 430       (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
 431 #endif
 432       BID_RETURN (res);
 433     }
 434   }
 435 #endif
 436
 437   if (diff_expon >= 0) {
 438 #ifdef IEEE_ROUND_NEAREST
 439     // round to nearest code
 440     // R*10
 441     R += R;
 442     R = (R << 2) + R;
 443     B5 = B4 + B;
 444
 445     // compare 10*R to 5*B
 446     R = B5 - R;
 447     // correction for (R==0 && (Q&1))
 448     R -= (Q & 1);
 449     // R<0 ?
 450     D = ((UINT64) R) >> 63;
 451     Q += D;
 452 #else
 453 #ifdef IEEE_ROUND_NEAREST_TIES_AWAY
 454     // round to nearest code
 455     // R*10
 456     R += R;
 457     R = (R << 2) + R;
 458     B5 = B4 + B;
 459
 460     // compare 10*R to 5*B
 461     R = B5 - R;
 462     // correction for (R==0 && (Q&1))
 463     R -= (Q & 1);
 464     // R<0 ?
 465     D = ((UINT64) R) >> 63;
 466     Q += D;
 467 #else
 468     rmode = rnd_mode;
 469     if (sign_x ^ sign_y && (unsigned) (rmode - 1) < 2)
 470       rmode = 3 - rmode;
 471     switch (rmode) {
 472     case 0:     // round to nearest code
 473     case ROUNDING_TIES_AWAY:
 474       // R*10
 475       R += R;
 476       R = (R << 2) + R;
 477       B5 = B4 + B;
 478       // compare 10*R to 5*B
 479       R = B5 - R;
 480       // correction for (R==0 && (Q&1))
 481       R -= ((Q | (rmode >> 2)) & 1);
 482       // R<0 ?
 483       D = ((UINT64) R) >> 63;
 484       Q += D;
 485       break;
 486     case ROUNDING_DOWN:
 487     case ROUNDING_TO_ZERO:
 488       break;
 489     default:    // rounding up
 490       Q++;
 491       break;
 492     }
 493 #endif
 494 #endif
 495
 496     res =
 497       fast_get_BID64_check_OF (sign_x ^ sign_y, diff_expon, Q, rnd_mode,
 498                                pfpsf);
 499 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
 500     (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
 501 #endif
 502     BID_RETURN (res);
 503   } else {
 504     // UF occurs
 505
 506 #ifdef SET_STATUS_FLAGS
 507     if ((diff_expon + 16 < 0)) {
 508       // set status flags
 509       __set_status_flags (pfpsf, INEXACT_EXCEPTION);
 510     }
 511 #endif
 512     rmode = rnd_mode;
 513     res =
 514       get_BID64_UF (sign_x ^ sign_y, diff_expon, Q, R, rmode, pfpsf);
 515 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
 516     (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
 517 #endif
 518     BID_RETURN (res);
 519
 520   }
 521 }
 522
 523
 524
 525 TYPE0_FUNCTION_ARGTYPE1_ARG128 (UINT64, bid64dq_div, UINT64, x, y)
 526      UINT256 CA4 =
 527        { {0x0ull, 0x0ull, 0x0ull, 0x0ull} }, CA4r, P256, QB256;
 528 UINT128 CX, CY, T128, CQ, CQ2, CR, CA, TP128, Qh, Ql, Tmp;
 529 UINT64 sign_x, sign_y, T, carry64, D, Q_low, QX, valid_y, PD, res;
 530 int_float fx, fy, f64;
 531 UINT32 QX32, tdigit[3], digit, digit_h, digit_low;
 532 int exponent_x, exponent_y, bin_index, bin_expon, diff_expon, ed2,
 533   digits_q, amount;
 534 int nzeros, i, j, k, d5, done = 0;
 535 unsigned rmode;
 536 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
 537 fexcept_t binaryflags = 0;
 538 #endif
 539
 540 valid_y = unpack_BID128_value (&sign_y, &exponent_y, &CY, y);
 541
 542         // unpack arguments, check for NaN or Infinity
 543 CX.w[1] = 0;
 544 if (!unpack_BID64 (&sign_x, &exponent_x, &CX.w[0], (x))) {
 545 #ifdef SET_STATUS_FLAGS
 546     if (((y.w[1] & SNAN_MASK64) == SNAN_MASK64) ||      // y is sNaN
 547                 ((x & SNAN_MASK64) == SNAN_MASK64))
 548       __set_status_flags (pfpsf, INVALID_EXCEPTION);
 549 #endif
 550   // test if x is NaN
 551   if (((x) & 0x7c00000000000000ull) == 0x7c00000000000000ull) {
 552     res = CX.w[0];
 553     BID_RETURN (res & QUIET_MASK64);
 554   }
 555   // x is Infinity?
 556   if (((x) & 0x7800000000000000ull) == 0x7800000000000000ull) {
 557     // check if y is Inf.
 558     if (((y.w[1] & 0x7c00000000000000ull) == 0x7800000000000000ull))
 559       // return NaN
 560     {
 561 #ifdef SET_STATUS_FLAGS
 562       __set_status_flags (pfpsf, INVALID_EXCEPTION);
 563 #endif
 564       res = 0x7c00000000000000ull;
 565       BID_RETURN (res);
 566     }
 567         if (((y.w[1] & 0x7c00000000000000ull) != 0x7c00000000000000ull)) {
 568     // otherwise return +/-Inf
 569     res =
 570       (((x) ^ y.w[1]) & 0x8000000000000000ull) | 0x7800000000000000ull;
 571     BID_RETURN (res);
 572         }
 573   }
 574   // x is 0
 575   if ((y.w[1] & INFINITY_MASK64) != INFINITY_MASK64) {
 576     if ((!CY.w[0]) && !(CY.w[1] & 0x0001ffffffffffffull)) {
 577 #ifdef SET_STATUS_FLAGS
 578       __set_status_flags (pfpsf, INVALID_EXCEPTION);
 579 #endif
 580       // x=y=0, return NaN
 581       res = 0x7c00000000000000ull;
 582       BID_RETURN (res);
 583     }
 584     // return 0
 585     res = ((x) ^ y.w[1]) & 0x8000000000000000ull;
 586     exponent_x = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS_128;
 587     if (exponent_x > DECIMAL_MAX_EXPON_64)
 588       exponent_x = DECIMAL_MAX_EXPON_64;
 589     else if (exponent_x < 0)
 590       exponent_x = 0;
 591     res |= (((UINT64) exponent_x) << 53);
 592     BID_RETURN (res);
 593   }
 594 }
 595 exponent_x += (DECIMAL_EXPONENT_BIAS_128 - DECIMAL_EXPONENT_BIAS);
 596 if (!valid_y) {
 597   // y is Inf. or NaN
 598
 599   // test if y is NaN
 600   if ((y.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) {
 601 #ifdef SET_STATUS_FLAGS
 602     if ((y.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull)      // sNaN
 603       __set_status_flags (pfpsf, INVALID_EXCEPTION);
 604 #endif
 605     Tmp.w[1] = (CY.w[1] & 0x00003fffffffffffull);
 606     Tmp.w[0] = CY.w[0];
 607     TP128 = reciprocals10_128[18];
 608     __mul_128x128_full (Qh, Ql, Tmp, TP128);
 609     amount = recip_scale[18];
 610     __shr_128 (Tmp, Qh, amount);
 611     res = (CY.w[1] & 0xfc00000000000000ull) | Tmp.w[0];
 612     BID_RETURN (res);
 613   }
 614   // y is Infinity?
 615   if ((y.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) {
 616     // return +/-0
 617     res = sign_x ^ sign_y;
 618     BID_RETURN (res);
 619   }
 620   // y is 0, return +/-Inf
 621   res =
 622     (((x) ^ y.w[1]) & 0x8000000000000000ull) | 0x7800000000000000ull;
 623 #ifdef SET_STATUS_FLAGS
 624   __set_status_flags (pfpsf, ZERO_DIVIDE_EXCEPTION);
 625 #endif
 626   BID_RETURN (res);
 627 }
 628 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
 629 (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS);
 630 #endif
 631 diff_expon = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS;
 632
 633 if (__unsigned_compare_gt_128 (CY, CX)) {
 634   // CX < CY
 635
 636   // 2^64
 637   f64.i = 0x5f800000;
 638
 639   // fx ~ CX,   fy ~ CY
 640   fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0];
 641   fy.d = (float) CY.w[1] * f64.d + (float) CY.w[0];
 642   // expon_cy - expon_cx
 643   bin_index = (fy.i - fx.i) >> 23;
 644
 645   if (CX.w[1]) {
 646     T = power10_index_binexp_128[bin_index].w[0];
 647     __mul_64x128_short (CA, T, CX);
 648   } else {
 649     T128 = power10_index_binexp_128[bin_index];
 650     __mul_64x128_short (CA, CX.w[0], T128);
 651   }
 652
 653   ed2 = 15;
 654   if (__unsigned_compare_gt_128 (CY, CA))
 655     ed2++;
 656
 657   T128 = power10_table_128[ed2];
 658   __mul_128x128_to_256 (CA4, CA, T128);
 659
 660   ed2 += estimate_decimal_digits[bin_index];
 661   CQ.w[0] = CQ.w[1] = 0;
 662   diff_expon = diff_expon - ed2;
 663
 664 } else {
 665   // get CQ = CX/CY
 666   __div_128_by_128 (&CQ, &CR, CX, CY);
 667
 668   // get number of decimal digits in CQ
 669   // 2^64
 670   f64.i = 0x5f800000;
 671   fx.d = (float) CQ.w[1] * f64.d + (float) CQ.w[0];
 672   // binary expon. of CQ
 673   bin_expon = (fx.i - 0x3f800000) >> 23;
 674
 675   digits_q = estimate_decimal_digits[bin_expon];
 676   TP128.w[0] = power10_index_binexp_128[bin_expon].w[0];
 677   TP128.w[1] = power10_index_binexp_128[bin_expon].w[1];
 678   if (__unsigned_compare_ge_128 (CQ, TP128))
 679     digits_q++;
 680
 681   if (digits_q <= 16) {
 682     if (!CR.w[1] && !CR.w[0]) {
 683       res = get_BID64 (sign_x ^ sign_y, diff_expon,
 684                        CQ.w[0], rnd_mode, pfpsf);
 685 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
 686       (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
 687 #endif
 688       BID_RETURN (res);
 689     }
 690
 691     ed2 = 16 - digits_q;
 692     T128.w[0] = power10_table_128[ed2].w[0];
 693     __mul_64x128_to_192 (CA4, (T128.w[0]), CR);
 694     diff_expon = diff_expon - ed2;
 695     CQ.w[0] *= T128.w[0];
 696   } else {
 697     ed2 = digits_q - 16;
 698     diff_expon += ed2;
 699     T128 = reciprocals10_128[ed2];
 700     __mul_128x128_to_256 (P256, CQ, T128);
 701     amount = recip_scale[ed2];
 702     CQ.w[0] = (P256.w[2] >> amount) | (P256.w[3] << (64 - amount));
 703     CQ.w[1] = 0;
 704
 705     __mul_64x64_to_128 (CQ2, CQ.w[0], (power10_table_128[ed2].w[0]));
 706
 707     __mul_64x64_to_128 (QB256, CQ2.w[0], CY.w[0]);
 708     QB256.w[1] += CQ2.w[0] * CY.w[1] + CQ2.w[1] * CY.w[0];
 709
 710     CA4.w[1] = CX.w[1] - QB256.w[1];
 711     CA4.w[0] = CX.w[0] - QB256.w[0];
 712     if (CX.w[0] < QB256.w[0])
 713       CA4.w[1]--;
 714     if (CR.w[0] || CR.w[1])
 715       CA4.w[0] |= 1;
 716     done = 1;
 717
 718   }
 719
 720 }
 721 if (!done) {
 722   __div_256_by_128 (&CQ, &CA4, CY);
 723 }
 724
 725
 726
 727 #ifdef SET_STATUS_FLAGS
 728   if (CA4.w[0] || CA4.w[1]) {
 729     // set status flags
 730     __set_status_flags (pfpsf, INEXACT_EXCEPTION);
 731   }
 732 #ifndef LEAVE_TRAILING_ZEROS
 733   else
 734 #endif
 735 #else
 736 #ifndef LEAVE_TRAILING_ZEROS
 737   if (!CA4.w[0] && !CA4.w[1])
 738 #endif
 739 #endif
 740 #ifndef LEAVE_TRAILING_ZEROS
 741     // check whether result is exact
 742   {
 743     // check whether CX, CY are short
 744     if (!CX.w[1] && !CY.w[1] && (CX.w[0] <= 1024) && (CY.w[0] <= 1024)) {
 745       i = (int) CY.w[0] - 1;
 746       j = (int) CX.w[0] - 1;
 747       // difference in powers of 2 factors for Y and X
 748       nzeros = ed2 - factors[i][0] + factors[j][0];
 749       // difference in powers of 5 factors
 750       d5 = ed2 - factors[i][1] + factors[j][1];
 751       if (d5 < nzeros)
 752         nzeros = d5;
 753       // get P*(2^M[extra_digits])/10^extra_digits
 754       __mul_128x128_full (Qh, Ql, CQ, reciprocals10_128[nzeros]);
 755
 756       // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
 757       amount = recip_scale[nzeros];
 758       __shr_128_long (CQ, Qh, amount);
 759
 760       diff_expon += nzeros;
 761     } else {
 762       // decompose Q as Qh*10^17 + Ql
 763       Q_low = CQ.w[0];
 764
 765       {
 766         tdigit[0] = Q_low & 0x3ffffff;
 767         tdigit[1] = 0;
 768         QX = Q_low >> 26;
 769         QX32 = QX;
 770         nzeros = 0;
 771
 772         for (j = 0; QX32; j++, QX32 >>= 7) {
 773           k = (QX32 & 127);
 774           tdigit[0] += convert_table[j][k][0];
 775           tdigit[1] += convert_table[j][k][1];
 776           if (tdigit[0] >= 100000000) {
 777             tdigit[0] -= 100000000;
 778             tdigit[1]++;
 779           }
 780         }
 781
 782         if (tdigit[1] >= 100000000) {
 783           tdigit[1] -= 100000000;
 784           if (tdigit[1] >= 100000000)
 785             tdigit[1] -= 100000000;
 786         }
 787
 788         digit = tdigit[0];
 789         if (!digit && !tdigit[1])
 790           nzeros += 16;
 791         else {
 792           if (!digit) {
 793             nzeros += 8;
 794             digit = tdigit[1];
 795           }
 796           // decompose digit
 797           PD = (UINT64) digit *0x068DB8BBull;
 798           digit_h = (UINT32) (PD >> 40);
 799           digit_low = digit - digit_h * 10000;
 800
 801           if (!digit_low)
 802             nzeros += 4;
 803           else
 804             digit_h = digit_low;
 805
 806           if (!(digit_h & 1))
 807             nzeros +=
 808               3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >>
 809                             (digit_h & 7));
 810         }
 811
 812         if (nzeros) {
 813           // get P*(2^M[extra_digits])/10^extra_digits
 814           __mul_128x128_full (Qh, Ql, CQ, reciprocals10_128[nzeros]);
 815
 816           // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
 817           amount = recip_scale[nzeros];
 818           __shr_128 (CQ, Qh, amount);
 819         }
 820         diff_expon += nzeros;
 821
 822       }
 823     }
 824         if(diff_expon>=0){
 825     res =
 826       fast_get_BID64_check_OF (sign_x ^ sign_y, diff_expon, CQ.w[0],
 827                                rnd_mode, pfpsf);
 828 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
 829     (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
 830 #endif
 831     BID_RETURN (res);
 832         }
 833   }
 834 #endif
 835
 836   if (diff_expon >= 0) {
 837 #ifdef IEEE_ROUND_NEAREST
 838   // rounding
 839   // 2*CA4 - CY
 840   CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
 841   CA4r.w[0] = CA4.w[0] + CA4.w[0];
 842   __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
 843   CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
 844
 845   D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0;
 846   carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D);
 847
 848   CQ.w[0] += carry64;
 849 #else
 850 #ifdef IEEE_ROUND_NEAREST_TIES_AWAY
 851   // rounding
 852   // 2*CA4 - CY
 853   CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
 854   CA4r.w[0] = CA4.w[0] + CA4.w[0];
 855   __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
 856   CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
 857
 858   D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1;
 859   carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D;
 860
 861   CQ.w[0] += carry64;
 862   if (CQ.w[0] < carry64)
 863     CQ.w[1]++;
 864 #else
 865   rmode = rnd_mode;
 866   if (sign_x ^ sign_y && (unsigned) (rmode - 1) < 2)
 867     rmode = 3 - rmode;
 868   switch (rmode) {
 869   case ROUNDING_TO_NEAREST:     // round to nearest code
 870     // rounding
 871     // 2*CA4 - CY
 872     CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
 873     CA4r.w[0] = CA4.w[0] + CA4.w[0];
 874     __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
 875     CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
 876     D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0;
 877     carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D);
 878     CQ.w[0] += carry64;
 879     if (CQ.w[0] < carry64)
 880       CQ.w[1]++;
 881     break;
 882   case ROUNDING_TIES_AWAY:
 883     // rounding
 884     // 2*CA4 - CY
 885     CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
 886     CA4r.w[0] = CA4.w[0] + CA4.w[0];
 887     __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
 888     CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
 889     D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1;
 890     carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D;
 891     CQ.w[0] += carry64;
 892     if (CQ.w[0] < carry64)
 893       CQ.w[1]++;
 894     break;
 895   case ROUNDING_DOWN:
 896   case ROUNDING_TO_ZERO:
 897     break;
 898   default:      // rounding up
 899     CQ.w[0]++;
 900     if (!CQ.w[0])
 901       CQ.w[1]++;
 902     break;
 903   }
 904 #endif
 905 #endif
 906
 907     res =
 908       fast_get_BID64_check_OF (sign_x ^ sign_y, diff_expon, CQ.w[0], rnd_mode,
 909                                pfpsf);
 910 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
 911     (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
 912 #endif
 913     BID_RETURN (res);
 914   } else {
 915     // UF occurs
 916
 917 #ifdef SET_STATUS_FLAGS
 918     if ((diff_expon + 16 < 0)) {
 919       // set status flags
 920       __set_status_flags (pfpsf, INEXACT_EXCEPTION);
 921     }
 922 #endif
 923     rmode = rnd_mode;
 924     res =
 925       get_BID64_UF (sign_x ^ sign_y, diff_expon, CQ.w[0], CA4.w[1] | CA4.w[0], rmode, pfpsf);
 926 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
 927     (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
 928 #endif
 929     BID_RETURN (res);
 930
 931   }
 932
 933 }
 934
 935
 936 //#define LEAVE_TRAILING_ZEROS
 937
 938 TYPE0_FUNCTION_ARG128_ARGTYPE2 (UINT64, bid64qd_div, x, UINT64, y)
 939
 940      UINT256 CA4 =
 941        { {0x0ull, 0x0ull, 0x0ull, 0x0ull} }, CA4r, P256, QB256;
 942 UINT128 CX, CY, T128, CQ, CQ2, CR, CA, TP128, Qh, Ql, Tmp;
 943 UINT64 sign_x, sign_y, T, carry64, D, Q_low, QX, PD, res, valid_y;
 944 int_float fx, fy, f64;
 945 UINT32 QX32, tdigit[3], digit, digit_h, digit_low;
 946 int exponent_x, exponent_y, bin_index, bin_expon, diff_expon, ed2,
 947   digits_q, amount;
 948 int nzeros, i, j, k, d5, done = 0;
 949 unsigned rmode;
 950 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
 951 fexcept_t binaryflags = 0;
 952 #endif
 953
 954 valid_y = unpack_BID64 (&sign_y, &exponent_y, &CY.w[0], (y));
 955
 956         // unpack arguments, check for NaN or Infinity
 957 if (!unpack_BID128_value (&sign_x, &exponent_x, &CX, x)) {
 958   // test if x is NaN
 959   if ((x.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) {
 960 #ifdef SET_STATUS_FLAGS
 961     if ((x.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull ||    // sNaN
 962         (y & 0x7e00000000000000ull) == 0x7e00000000000000ull)
 963       __set_status_flags (pfpsf, INVALID_EXCEPTION);
 964 #endif
 965       Tmp.w[1] = (CX.w[1] & 0x00003fffffffffffull);
 966       Tmp.w[0] = CX.w[0];
 967       TP128 = reciprocals10_128[18];
 968       __mul_128x128_full (Qh, Ql, Tmp, TP128);
 969       amount = recip_scale[18];
 970       __shr_128 (Tmp, Qh, amount);
 971       res = (CX.w[1] & 0xfc00000000000000ull) | Tmp.w[0];
 972     BID_RETURN (res);
 973   }
 974   // x is Infinity?
 975   if ((x.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) {
 976     // check if y is Inf.
 977     if (((y & 0x7c00000000000000ull) == 0x7800000000000000ull))
 978       // return NaN
 979     {
 980 #ifdef SET_STATUS_FLAGS
 981       __set_status_flags (pfpsf, INVALID_EXCEPTION);
 982 #endif
 983       res = 0x7c00000000000000ull;
 984       BID_RETURN (res);
 985     }
 986         if (((y & 0x7c00000000000000ull) != 0x7c00000000000000ull)) {
 987     // otherwise return +/-Inf
 988     res =
 989       ((x.w[1] ^ (y)) & 0x8000000000000000ull) | 0x7800000000000000ull;
 990     BID_RETURN (res);
 991         }
 992   }
 993   // x is 0
 994   if (((y & INFINITY_MASK64) != INFINITY_MASK64) &&
 995       !(CY.w[0])) {
 996 #ifdef SET_STATUS_FLAGS
 997     __set_status_flags (pfpsf, INVALID_EXCEPTION);
 998 #endif
 999     // x=y=0, return NaN
1000     res = 0x7c00000000000000ull;
1001     BID_RETURN (res);
1002   }
1003   // return 0
1004   if (((y & 0x7800000000000000ull) != 0x7800000000000000ull)) {
1005           if (!CY.w[0]) {
1006 #ifdef SET_STATUS_FLAGS
1007       __set_status_flags (pfpsf, INVALID_EXCEPTION);
1008 #endif
1009       res = 0x7c00000000000000ull;
1010       BID_RETURN (res);
1011           }
1012     exponent_x =
1013       exponent_x - exponent_y - DECIMAL_EXPONENT_BIAS_128 +
1014       (DECIMAL_EXPONENT_BIAS << 1);
1015     if (exponent_x > DECIMAL_MAX_EXPON_64)
1016       exponent_x = DECIMAL_MAX_EXPON_64;
1017     else if (exponent_x < 0)
1018       exponent_x = 0;
1019     res = (sign_x ^ sign_y) | (((UINT64) exponent_x) << 53);
1020     BID_RETURN (res);
1021   }
1022 }
1023 CY.w[1] = 0;
1024 if (!valid_y) {
1025   // y is Inf. or NaN
1026
1027   // test if y is NaN
1028   if ((y & NAN_MASK64) == NAN_MASK64) {
1029 #ifdef SET_STATUS_FLAGS
1030     if ((y & SNAN_MASK64) == SNAN_MASK64)       // sNaN
1031       __set_status_flags (pfpsf, INVALID_EXCEPTION);
1032 #endif
1033     BID_RETURN (CY.w[0] & QUIET_MASK64);
1034   }
1035   // y is Infinity?
1036   if (((y) & 0x7800000000000000ull) == 0x7800000000000000ull) {
1037     // return +/-0
1038     res = sign_x ^ sign_y;
1039     BID_RETURN (res);
1040   }
1041   // y is 0, return +/-Inf
1042   res =
1043     ((x.w[1] ^ (y)) & 0x8000000000000000ull) | 0x7800000000000000ull;
1044 #ifdef SET_STATUS_FLAGS
1045   __set_status_flags (pfpsf, ZERO_DIVIDE_EXCEPTION);
1046 #endif
1047   BID_RETURN (res);
1048 }
1049 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
1050 (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS);
1051 #endif
1052 diff_expon =
1053   exponent_x - exponent_y - DECIMAL_EXPONENT_BIAS_128 +
1054   (DECIMAL_EXPONENT_BIAS << 1);
1055
1056 if (__unsigned_compare_gt_128 (CY, CX)) {
1057   // CX < CY
1058
1059   // 2^64
1060   f64.i = 0x5f800000;
1061
1062   // fx ~ CX,   fy ~ CY
1063   fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0];
1064   fy.d = (float) CY.w[1] * f64.d + (float) CY.w[0];
1065   // expon_cy - expon_cx
1066   bin_index = (fy.i - fx.i) >> 23;
1067
1068   if (CX.w[1]) {
1069     T = power10_index_binexp_128[bin_index].w[0];
1070     __mul_64x128_short (CA, T, CX);
1071   } else {
1072     T128 = power10_index_binexp_128[bin_index];
1073     __mul_64x128_short (CA, CX.w[0], T128);
1074   }
1075
1076   ed2 = 15;
1077   if (__unsigned_compare_gt_128 (CY, CA))
1078     ed2++;
1079
1080   T128 = power10_table_128[ed2];
1081   __mul_128x128_to_256 (CA4, CA, T128);
1082
1083   ed2 += estimate_decimal_digits[bin_index];
1084   CQ.w[0] = CQ.w[1] = 0;
1085   diff_expon = diff_expon - ed2;
1086
1087 } else {
1088   // get CQ = CX/CY
1089   __div_128_by_128 (&CQ, &CR, CX, CY);
1090
1091   // get number of decimal digits in CQ
1092   // 2^64
1093   f64.i = 0x5f800000;
1094   fx.d = (float) CQ.w[1] * f64.d + (float) CQ.w[0];
1095   // binary expon. of CQ
1096   bin_expon = (fx.i - 0x3f800000) >> 23;
1097
1098   digits_q = estimate_decimal_digits[bin_expon];
1099   TP128.w[0] = power10_index_binexp_128[bin_expon].w[0];
1100   TP128.w[1] = power10_index_binexp_128[bin_expon].w[1];
1101   if (__unsigned_compare_ge_128 (CQ, TP128))
1102     digits_q++;
1103
1104   if (digits_q <= 16) {
1105     if (!CR.w[1] && !CR.w[0]) {
1106       res = get_BID64 (sign_x ^ sign_y, diff_expon,
1107                        CQ.w[0], rnd_mode, pfpsf);
1108 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
1109       (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
1110 #endif
1111       BID_RETURN (res);
1112     }
1113
1114     ed2 = 16 - digits_q;
1115     T128.w[0] = power10_table_128[ed2].w[0];
1116     __mul_64x128_to_192 (CA4, (T128.w[0]), CR);
1117     diff_expon = diff_expon - ed2;
1118     CQ.w[0] *= T128.w[0];
1119   } else {
1120     ed2 = digits_q - 16;
1121     diff_expon += ed2;
1122     T128 = reciprocals10_128[ed2];
1123     __mul_128x128_to_256 (P256, CQ, T128);
1124     amount = recip_scale[ed2];
1125     CQ.w[0] = (P256.w[2] >> amount) | (P256.w[3] << (64 - amount));
1126     CQ.w[1] = 0;
1127
1128     __mul_64x64_to_128 (CQ2, CQ.w[0], (power10_table_128[ed2].w[0]));
1129
1130     __mul_64x64_to_128 (QB256, CQ2.w[0], CY.w[0]);
1131     QB256.w[1] += CQ2.w[0] * CY.w[1] + CQ2.w[1] * CY.w[0];
1132
1133     CA4.w[1] = CX.w[1] - QB256.w[1];
1134     CA4.w[0] = CX.w[0] - QB256.w[0];
1135     if (CX.w[0] < QB256.w[0])
1136       CA4.w[1]--;
1137     if (CR.w[0] || CR.w[1])
1138       CA4.w[0] |= 1;
1139     done = 1;
1140         if(CA4.w[1]|CA4.w[0]) {
1141     __mul_64x128_low(CY, (power10_table_128[ed2].w[0]),CY);
1142         }
1143
1144   }
1145
1146 }
1147
1148 if (!done) {
1149   __div_256_by_128 (&CQ, &CA4, CY);
1150 }
1151
1152 #ifdef SET_STATUS_FLAGS
1153   if (CA4.w[0] || CA4.w[1]) {
1154     // set status flags
1155     __set_status_flags (pfpsf, INEXACT_EXCEPTION);
1156   }
1157 #ifndef LEAVE_TRAILING_ZEROS
1158   else
1159 #endif
1160 #else
1161 #ifndef LEAVE_TRAILING_ZEROS
1162   if (!CA4.w[0] && !CA4.w[1])
1163 #endif
1164 #endif
1165 #ifndef LEAVE_TRAILING_ZEROS
1166     // check whether result is exact
1167   {
1168           if(!done) {
1169     // check whether CX, CY are short
1170     if (!CX.w[1] && !CY.w[1] && (CX.w[0] <= 1024) && (CY.w[0] <= 1024)) {
1171       i = (int) CY.w[0] - 1;
1172       j = (int) CX.w[0] - 1;
1173       // difference in powers of 2 factors for Y and X
1174       nzeros = ed2 - factors[i][0] + factors[j][0];
1175       // difference in powers of 5 factors
1176       d5 = ed2 - factors[i][1] + factors[j][1];
1177       if (d5 < nzeros)
1178                 nzeros = d5;
1179       // get P*(2^M[extra_digits])/10^extra_digits
1180       __mul_128x128_full (Qh, Ql, CQ, reciprocals10_128[nzeros]);
1181       //__mul_128x128_to_256(P256, CQ, reciprocals10_128[nzeros]);Qh.w[1]=P256.w[3];Qh.w[0]=P256.w[2];
1182
1183       // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
1184       amount = recip_scale[nzeros];
1185       __shr_128_long (CQ, Qh, amount);
1186
1187       diff_expon += nzeros;
1188     } else {
1189       // decompose Q as Qh*10^17 + Ql
1190       //T128 = reciprocals10_128[17];
1191       Q_low = CQ.w[0];
1192
1193       {
1194         tdigit[0] = Q_low & 0x3ffffff;
1195         tdigit[1] = 0;
1196         QX = Q_low >> 26;
1197         QX32 = QX;
1198         nzeros = 0;
1199
1200         for (j = 0; QX32; j++, QX32 >>= 7) {
1201           k = (QX32 & 127);
1202           tdigit[0] += convert_table[j][k][0];
1203           tdigit[1] += convert_table[j][k][1];
1204           if (tdigit[0] >= 100000000) {
1205             tdigit[0] -= 100000000;
1206             tdigit[1]++;
1207           }
1208         }
1209
1210         if (tdigit[1] >= 100000000) {
1211           tdigit[1] -= 100000000;
1212           if (tdigit[1] >= 100000000)
1213             tdigit[1] -= 100000000;
1214         }
1215
1216         digit = tdigit[0];
1217         if (!digit && !tdigit[1])
1218           nzeros += 16;
1219         else {
1220           if (!digit) {
1221             nzeros += 8;
1222             digit = tdigit[1];
1223           }
1224           // decompose digit
1225           PD = (UINT64) digit *0x068DB8BBull;
1226           digit_h = (UINT32) (PD >> 40);
1227           digit_low = digit - digit_h * 10000;
1228
1229           if (!digit_low)
1230             nzeros += 4;
1231           else
1232             digit_h = digit_low;
1233
1234           if (!(digit_h & 1))
1235             nzeros +=
1236               3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >>
1237                             (digit_h & 7));
1238         }
1239
1240         if (nzeros) {
1241           // get P*(2^M[extra_digits])/10^extra_digits
1242           __mul_128x128_full (Qh, Ql, CQ, reciprocals10_128[nzeros]);
1243
1244           // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
1245           amount = recip_scale[nzeros];
1246           __shr_128 (CQ, Qh, amount);
1247         }
1248         diff_expon += nzeros;
1249
1250       }
1251     }
1252           }
1253         if(diff_expon>=0){
1254     res =
1255       fast_get_BID64_check_OF (sign_x ^ sign_y, diff_expon, CQ.w[0],
1256                                rnd_mode, pfpsf);
1257 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
1258     (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
1259 #endif
1260     BID_RETURN (res);
1261         }
1262   }
1263 #endif
1264
1265   if (diff_expon >= 0) {
1266 #ifdef IEEE_ROUND_NEAREST
1267   // rounding
1268   // 2*CA4 - CY
1269   CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
1270   CA4r.w[0] = CA4.w[0] + CA4.w[0];
1271   __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
1272   CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
1273
1274   D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0;
1275   carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D);
1276
1277   CQ.w[0] += carry64;
1278   //if(CQ.w[0]<carry64)
1279   //CQ.w[1] ++;
1280 #else
1281 #ifdef IEEE_ROUND_NEAREST_TIES_AWAY
1282   // rounding
1283   // 2*CA4 - CY
1284   CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
1285   CA4r.w[0] = CA4.w[0] + CA4.w[0];
1286   __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
1287   CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
1288
1289   D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1;
1290   carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D;
1291
1292   CQ.w[0] += carry64;
1293   if (CQ.w[0] < carry64)
1294     CQ.w[1]++;
1295 #else
1296   rmode = rnd_mode;
1297   if (sign_x ^ sign_y && (unsigned) (rmode - 1) < 2)
1298     rmode = 3 - rmode;
1299   switch (rmode) {
1300   case ROUNDING_TO_NEAREST:     // round to nearest code
1301     // rounding
1302     // 2*CA4 - CY
1303     CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
1304     CA4r.w[0] = CA4.w[0] + CA4.w[0];
1305     __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
1306     CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
1307     D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0;
1308     carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D);
1309     CQ.w[0] += carry64;
1310     if (CQ.w[0] < carry64)
1311       CQ.w[1]++;
1312     break;
1313   case ROUNDING_TIES_AWAY:
1314     // rounding
1315     // 2*CA4 - CY
1316     CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
1317     CA4r.w[0] = CA4.w[0] + CA4.w[0];
1318     __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
1319     CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
1320     D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1;
1321     carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D;
1322     CQ.w[0] += carry64;
1323     if (CQ.w[0] < carry64)
1324       CQ.w[1]++;
1325     break;
1326   case ROUNDING_DOWN:
1327   case ROUNDING_TO_ZERO:
1328     break;
1329   default:      // rounding up
1330     CQ.w[0]++;
1331     if (!CQ.w[0])
1332       CQ.w[1]++;
1333     break;
1334   }
1335 #endif
1336 #endif
1337
1338
1339     res =
1340       fast_get_BID64_check_OF (sign_x ^ sign_y, diff_expon, CQ.w[0], rnd_mode,
1341                                pfpsf);
1342 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
1343     (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
1344 #endif
1345     BID_RETURN (res);
1346   } else {
1347     // UF occurs
1348
1349 #ifdef SET_STATUS_FLAGS
1350     if ((diff_expon + 16 < 0)) {
1351       // set status flags
1352       __set_status_flags (pfpsf, INEXACT_EXCEPTION);
1353     }
1354 #endif
1355     rmode = rnd_mode;
1356     res =
1357       get_BID64_UF (sign_x ^ sign_y, diff_expon, CQ.w[0], CA4.w[1] | CA4.w[0], rmode, pfpsf);
1358 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
1359     (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
1360 #endif
1361     BID_RETURN (res);
1362
1363   }
1364
1365 }
1366
1367 //#define LEAVE_TRAILING_ZEROS
1368
1369 extern UINT32 convert_table[5][128][2];
1370 extern SINT8 factors[][2];
1371 extern UINT8 packed_10000_zeros[];
1372
1373
1374 //UINT64* bid64_div128x128(UINT64 res, UINT128 *px, UINT128 *py, unsigned rnd_mode, unsigned *pfpsf)
1375
1376 TYPE0_FUNCTION_ARG128_ARG128 (UINT64, bid64qq_div, x, y)
1377      UINT256 CA4 =
1378        { {0x0ull, 0x0ull, 0x0ull, 0x0ull} }, CA4r, P256, QB256;
1379 UINT128 CX, CY, T128, CQ, CQ2, CR, CA, TP128, Qh, Ql, Tmp;
1380 UINT64 sign_x, sign_y, T, carry64, D, Q_low, QX, valid_y, PD, res;
1381 int_float fx, fy, f64;
1382 UINT32 QX32, tdigit[3], digit, digit_h, digit_low;
1383 int exponent_x, exponent_y, bin_index, bin_expon, diff_expon, ed2,
1384   digits_q, amount;
1385 int nzeros, i, j, k, d5, done = 0;
1386 unsigned rmode;
1387 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
1388 fexcept_t binaryflags = 0;
1389 #endif
1390
1391 valid_y = unpack_BID128_value (&sign_y, &exponent_y, &CY, y);
1392
1393         // unpack arguments, check for NaN or Infinity
1394 if (!unpack_BID128_value (&sign_x, &exponent_x, &CX, x)) {
1395   // test if x is NaN
1396   if ((x.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) {
1397 #ifdef SET_STATUS_FLAGS
1398     if ((x.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull ||    // sNaN
1399         (y.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull)
1400       __set_status_flags (pfpsf, INVALID_EXCEPTION);
1401 #endif
1402       Tmp.w[1] = (CX.w[1] & 0x00003fffffffffffull);
1403       Tmp.w[0] = CX.w[0];
1404       TP128 = reciprocals10_128[18];
1405       __mul_128x128_full (Qh, Ql, Tmp, TP128);
1406       amount = recip_scale[18];
1407       __shr_128 (Tmp, Qh, amount);
1408       res = (CX.w[1] & 0xfc00000000000000ull) | Tmp.w[0];
1409     BID_RETURN (res);
1410   }
1411   // x is Infinity?
1412   if ((x.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) {
1413     // check if y is Inf.
1414     if (((y.w[1] & 0x7c00000000000000ull) == 0x7800000000000000ull))
1415       // return NaN
1416     {
1417 #ifdef SET_STATUS_FLAGS
1418       __set_status_flags (pfpsf, INVALID_EXCEPTION);
1419 #endif
1420       res = 0x7c00000000000000ull;
1421       BID_RETURN (res);
1422     }
1423         if (((y.w[1] & 0x7c00000000000000ull) != 0x7c00000000000000ull)) {
1424     // otherwise return +/-Inf
1425     res =
1426       ((x.w[1] ^ y.
1427         w[1]) & 0x8000000000000000ull) | 0x7800000000000000ull;
1428     BID_RETURN (res);
1429         }
1430   }
1431   // x is 0
1432   if (((y.w[1] & 0x7800000000000000ull) != 0x7800000000000000ull)) {
1433   if ((!CY.w[0]) && !(CY.w[1] & 0x0001ffffffffffffull)) {
1434 #ifdef SET_STATUS_FLAGS
1435     __set_status_flags (pfpsf, INVALID_EXCEPTION);
1436 #endif
1437     // x=y=0, return NaN
1438     res = 0x7c00000000000000ull;
1439     BID_RETURN (res);
1440   }
1441   // return 0
1442   res = (x.w[1] ^ y.w[1]) & 0x8000000000000000ull;
1443   exponent_x = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS;
1444   if (exponent_x > DECIMAL_MAX_EXPON_64)
1445     exponent_x = DECIMAL_MAX_EXPON_64;
1446   else if (exponent_x < 0)
1447     exponent_x = 0;
1448   res |= (((UINT64) exponent_x) << 53);
1449   BID_RETURN (res);
1450   }
1451 }
1452 if (!valid_y) {
1453   // y is Inf. or NaN
1454
1455   // test if y is NaN
1456   if ((y.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) {
1457 #ifdef SET_STATUS_FLAGS
1458     if ((y.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull)      // sNaN
1459       __set_status_flags (pfpsf, INVALID_EXCEPTION);
1460 #endif
1461       Tmp.w[1] = (CY.w[1] & 0x00003fffffffffffull);
1462       Tmp.w[0] = CY.w[0];
1463       TP128 = reciprocals10_128[18];
1464       __mul_128x128_full (Qh, Ql, Tmp, TP128);
1465       amount = recip_scale[18];
1466       __shr_128 (Tmp, Qh, amount);
1467       res = (CY.w[1] & 0xfc00000000000000ull) | Tmp.w[0];
1468     BID_RETURN (res);
1469   }
1470   // y is Infinity?
1471   if ((y.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) {
1472     // return +/-0
1473     res = sign_x ^ sign_y;
1474     BID_RETURN (res);
1475   }
1476   // y is 0, return +/-Inf
1477   res =
1478     ((x.w[1] ^ y.w[1]) & 0x8000000000000000ull) | 0x7800000000000000ull;
1479 #ifdef SET_STATUS_FLAGS
1480   __set_status_flags (pfpsf, ZERO_DIVIDE_EXCEPTION);
1481 #endif
1482   BID_RETURN (res);
1483 }
1484 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
1485 (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS);
1486 #endif
1487 diff_expon = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS;
1488
1489 if (__unsigned_compare_gt_128 (CY, CX)) {
1490   // CX < CY
1491
1492   // 2^64
1493   f64.i = 0x5f800000;
1494
1495   // fx ~ CX,   fy ~ CY
1496   fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0];
1497   fy.d = (float) CY.w[1] * f64.d + (float) CY.w[0];
1498   // expon_cy - expon_cx
1499   bin_index = (fy.i - fx.i) >> 23;
1500
1501   if (CX.w[1]) {
1502     T = power10_index_binexp_128[bin_index].w[0];
1503     __mul_64x128_short (CA, T, CX);
1504   } else {
1505     T128 = power10_index_binexp_128[bin_index];
1506     __mul_64x128_short (CA, CX.w[0], T128);
1507   }
1508
1509   ed2 = 15;
1510   if (__unsigned_compare_gt_128 (CY, CA))
1511     ed2++;
1512
1513   T128 = power10_table_128[ed2];
1514   __mul_128x128_to_256 (CA4, CA, T128);
1515
1516   ed2 += estimate_decimal_digits[bin_index];
1517   CQ.w[0] = CQ.w[1] = 0;
1518   diff_expon = diff_expon - ed2;
1519
1520 } else {
1521   // get CQ = CX/CY
1522   __div_128_by_128 (&CQ, &CR, CX, CY);
1523
1524   // get number of decimal digits in CQ
1525   // 2^64
1526   f64.i = 0x5f800000;
1527   fx.d = (float) CQ.w[1] * f64.d + (float) CQ.w[0];
1528   // binary expon. of CQ
1529   bin_expon = (fx.i - 0x3f800000) >> 23;
1530
1531   digits_q = estimate_decimal_digits[bin_expon];
1532   TP128.w[0] = power10_index_binexp_128[bin_expon].w[0];
1533   TP128.w[1] = power10_index_binexp_128[bin_expon].w[1];
1534   if (__unsigned_compare_ge_128 (CQ, TP128))
1535     digits_q++;
1536
1537   if (digits_q <= 16) {
1538     if (!CR.w[1] && !CR.w[0]) {
1539       res = get_BID64 (sign_x ^ sign_y, diff_expon,
1540                        CQ.w[0], rnd_mode, pfpsf);
1541 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
1542       (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
1543 #endif
1544       BID_RETURN (res);
1545     }
1546
1547     ed2 = 16 - digits_q;
1548     T128.w[0] = power10_table_128[ed2].w[0];
1549     __mul_64x128_to_192 (CA4, (T128.w[0]), CR);
1550     diff_expon = diff_expon - ed2;
1551     CQ.w[0] *= T128.w[0];
1552   } else {
1553     ed2 = digits_q - 16;
1554     diff_expon += ed2;
1555     T128 = reciprocals10_128[ed2];
1556     __mul_128x128_to_256 (P256, CQ, T128);
1557     amount = recip_scale[ed2];
1558     CQ.w[0] = (P256.w[2] >> amount) | (P256.w[3] << (64 - amount));
1559     CQ.w[1] = 0;
1560
1561     __mul_64x64_to_128 (CQ2, CQ.w[0], (power10_table_128[ed2].w[0]));
1562
1563     __mul_64x64_to_128 (QB256, CQ2.w[0], CY.w[0]);
1564     QB256.w[1] += CQ2.w[0] * CY.w[1] + CQ2.w[1] * CY.w[0];
1565
1566     CA4.w[1] = CX.w[1] - QB256.w[1];
1567     CA4.w[0] = CX.w[0] - QB256.w[0];
1568     if (CX.w[0] < QB256.w[0])
1569       CA4.w[1]--;
1570     if (CR.w[0] || CR.w[1])
1571       CA4.w[0] |= 1;
1572     done = 1;
1573         if(CA4.w[1]|CA4.w[0]) {
1574     __mul_64x128_low(CY, (power10_table_128[ed2].w[0]),CY);
1575         }
1576   }
1577
1578 }
1579
1580 if (!done) {
1581   __div_256_by_128 (&CQ, &CA4, CY);
1582 }
1583
1584
1585
1586 #ifdef SET_STATUS_FLAGS
1587   if (CA4.w[0] || CA4.w[1]) {
1588     // set status flags
1589     __set_status_flags (pfpsf, INEXACT_EXCEPTION);
1590   }
1591 #ifndef LEAVE_TRAILING_ZEROS
1592   else
1593 #endif
1594 #else
1595 #ifndef LEAVE_TRAILING_ZEROS
1596   if (!CA4.w[0] && !CA4.w[1])
1597 #endif
1598 #endif
1599 #ifndef LEAVE_TRAILING_ZEROS
1600     // check whether result is exact
1601   {
1602           if(!done) {
1603     // check whether CX, CY are short
1604     if (!CX.w[1] && !CY.w[1] && (CX.w[0] <= 1024) && (CY.w[0] <= 1024)) {
1605       i = (int) CY.w[0] - 1;
1606       j = (int) CX.w[0] - 1;
1607       // difference in powers of 2 factors for Y and X
1608       nzeros = ed2 - factors[i][0] + factors[j][0];
1609       // difference in powers of 5 factors
1610       d5 = ed2 - factors[i][1] + factors[j][1];
1611       if (d5 < nzeros)
1612         nzeros = d5;
1613       // get P*(2^M[extra_digits])/10^extra_digits
1614       __mul_128x128_full (Qh, Ql, CQ, reciprocals10_128[nzeros]);
1615       //__mul_128x128_to_256(P256, CQ, reciprocals10_128[nzeros]);Qh.w[1]=P256.w[3];Qh.w[0]=P256.w[2];
1616
1617       // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
1618       amount = recip_scale[nzeros];
1619       __shr_128_long (CQ, Qh, amount);
1620
1621       diff_expon += nzeros;
1622     } else {
1623       // decompose Q as Qh*10^17 + Ql
1624       //T128 = reciprocals10_128[17];
1625       Q_low = CQ.w[0];
1626
1627       {
1628         tdigit[0] = Q_low & 0x3ffffff;
1629         tdigit[1] = 0;
1630         QX = Q_low >> 26;
1631         QX32 = QX;
1632         nzeros = 0;
1633
1634         for (j = 0; QX32; j++, QX32 >>= 7) {
1635           k = (QX32 & 127);
1636           tdigit[0] += convert_table[j][k][0];
1637           tdigit[1] += convert_table[j][k][1];
1638           if (tdigit[0] >= 100000000) {
1639             tdigit[0] -= 100000000;
1640             tdigit[1]++;
1641           }
1642         }
1643
1644         if (tdigit[1] >= 100000000) {
1645           tdigit[1] -= 100000000;
1646           if (tdigit[1] >= 100000000)
1647             tdigit[1] -= 100000000;
1648         }
1649
1650         digit = tdigit[0];
1651         if (!digit && !tdigit[1])
1652           nzeros += 16;
1653         else {
1654           if (!digit) {
1655             nzeros += 8;
1656             digit = tdigit[1];
1657           }
1658           // decompose digit
1659           PD = (UINT64) digit *0x068DB8BBull;
1660           digit_h = (UINT32) (PD >> 40);
1661           digit_low = digit - digit_h * 10000;
1662
1663           if (!digit_low)
1664             nzeros += 4;
1665           else
1666             digit_h = digit_low;
1667
1668           if (!(digit_h & 1))
1669             nzeros +=
1670               3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >>
1671                             (digit_h & 7));
1672         }
1673
1674         if (nzeros) {
1675           // get P*(2^M[extra_digits])/10^extra_digits
1676           __mul_128x128_full (Qh, Ql, CQ, reciprocals10_128[nzeros]);
1677
1678           // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
1679           amount = recip_scale[nzeros];
1680           __shr_128 (CQ, Qh, amount);
1681         }
1682         diff_expon += nzeros;
1683
1684       }
1685     }
1686           }
1687         if(diff_expon>=0){
1688     res =
1689       fast_get_BID64_check_OF (sign_x ^ sign_y, diff_expon, CQ.w[0],
1690                                rnd_mode, pfpsf);
1691 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
1692     (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
1693 #endif
1694     BID_RETURN (res);
1695         }
1696   }
1697 #endif
1698
1699   if(diff_expon>=0) {
1700
1701 #ifdef IEEE_ROUND_NEAREST
1702   // rounding
1703   // 2*CA4 - CY
1704   CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
1705   CA4r.w[0] = CA4.w[0] + CA4.w[0];
1706   __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
1707   CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
1708
1709   D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0;
1710   carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D);
1711
1712   CQ.w[0] += carry64;
1713   //if(CQ.w[0]<carry64)
1714   //CQ.w[1] ++;
1715 #else
1716 #ifdef IEEE_ROUND_NEAREST_TIES_AWAY
1717   // rounding
1718   // 2*CA4 - CY
1719   CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
1720   CA4r.w[0] = CA4.w[0] + CA4.w[0];
1721   __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
1722   CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
1723
1724   D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1;
1725   carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D;
1726
1727   CQ.w[0] += carry64;
1728   if (CQ.w[0] < carry64)
1729     CQ.w[1]++;
1730 #else
1731   rmode = rnd_mode;
1732   if (sign_x ^ sign_y && (unsigned) (rmode - 1) < 2)
1733     rmode = 3 - rmode;
1734   switch (rmode) {
1735   case ROUNDING_TO_NEAREST:     // round to nearest code
1736     // rounding
1737     // 2*CA4 - CY
1738     CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
1739     CA4r.w[0] = CA4.w[0] + CA4.w[0];
1740     __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
1741     CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
1742     D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0;
1743     carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D);
1744     CQ.w[0] += carry64;
1745     if (CQ.w[0] < carry64)
1746       CQ.w[1]++;
1747     break;
1748   case ROUNDING_TIES_AWAY:
1749     // rounding
1750     // 2*CA4 - CY
1751     CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
1752     CA4r.w[0] = CA4.w[0] + CA4.w[0];
1753     __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
1754     CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
1755     D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1;
1756     carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D;
1757     CQ.w[0] += carry64;
1758     if (CQ.w[0] < carry64)
1759       CQ.w[1]++;
1760     break;
1761   case ROUNDING_DOWN:
1762   case ROUNDING_TO_ZERO:
1763     break;
1764   default:      // rounding up
1765     CQ.w[0]++;
1766     if (!CQ.w[0])
1767       CQ.w[1]++;
1768     break;
1769   }
1770 #endif
1771 #endif
1772
1773
1774     res =
1775       fast_get_BID64_check_OF (sign_x ^ sign_y, diff_expon, CQ.w[0], rnd_mode,
1776                                pfpsf);
1777 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
1778     (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
1779 #endif
1780     BID_RETURN (res);
1781   } else {
1782     // UF occurs
1783
1784 #ifdef SET_STATUS_FLAGS
1785     if ((diff_expon + 16 < 0)) {
1786       // set status flags
1787       __set_status_flags (pfpsf, INEXACT_EXCEPTION);
1788     }
1789 #endif
1790     rmode = rnd_mode;
1791     res =
1792       get_BID64_UF (sign_x ^ sign_y, diff_expon, CQ.w[0], CA4.w[1] | CA4.w[0], rmode, pfpsf);
1793 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
1794     (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
1795 #endif
1796     BID_RETURN (res);
1797
1798   }
1799
1800 }