libgcc/config/libbid/bid64_fma.c

   1 /* Copyright (C) 2007  Free Software Foundation, Inc.
   2
   3 This file is part of GCC.
   4
   5 GCC is free software; you can redistribute it and/or modify it under
   6 the terms of the GNU General Public License as published by the Free
   7 Software Foundation; either version 2, or (at your option) any later
   8 version.
   9
  10 In addition to the permissions in the GNU General Public License, the
  11 Free Software Foundation gives you unlimited permission to link the
  12 compiled version of this file into combinations with other programs,
  13 and to distribute those combinations without any restriction coming
  14 from the use of this file.  (The General Public License restrictions
  15 do apply in other respects; for example, they cover modification of
  16 the file, and distribution when not linked into a combine
  17 executable.)
  18
  19 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  20 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  21 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  22 for more details.
  23
  24 You should have received a copy of the GNU General Public License
  25 along with GCC; see the file COPYING.  If not, write to the Free
  26 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  27 02110-1301, USA.  */
  28
  29 /*****************************************************************************
  30  *    BID64 fma
  31  *****************************************************************************
  32  *
  33  *  Algorithm description:
  34  *
  35  *  if multiplication is guranteed exact (short coefficients)
  36  *     call the unpacked arg. equivalent of bid64_add(x*y, z)
  37  *  else
  38  *     get full coefficient_x*coefficient_y product
  39  *     call subroutine to perform addition of 64-bit argument
  40  *                                         to 128-bit product
  41  *
  42  ****************************************************************************/
  43
  44 #include "bid_inline_add.h"
  45
  46 #if DECIMAL_CALL_BY_REFERENCE
  47 extern void bid64_mul (UINT64 * pres, UINT64 * px,
  48                        UINT64 *
  49                        py _RND_MODE_PARAM _EXC_FLAGS_PARAM
  50                        _EXC_MASKS_PARAM _EXC_INFO_PARAM);
  51 #else
  52
  53 extern UINT64 bid64_mul (UINT64 x,
  54                          UINT64 y _RND_MODE_PARAM
  55                          _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
  56                          _EXC_INFO_PARAM);
  57 #endif
  58
  59 #if DECIMAL_CALL_BY_REFERENCE
  60
  61 void
  62 bid64_fma (UINT64 * pres, UINT64 * px, UINT64 * py,
  63            UINT64 *
  64            pz _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
  65            _EXC_INFO_PARAM) {
  66   UINT64 x, y, z;
  67 #else
  68
  69 UINT64
  70 bid64_fma (UINT64 x, UINT64 y,
  71            UINT64 z _RND_MODE_PARAM _EXC_FLAGS_PARAM
  72            _EXC_MASKS_PARAM _EXC_INFO_PARAM) {
  73 #endif
  74   UINT128 P, PU, CT, CZ;
  75   UINT64 sign_x, sign_y, coefficient_x, coefficient_y, sign_z,
  76     coefficient_z;
  77   UINT64 C64, remainder_y, res;
  78   UINT64 CYh, CY0L, T, valid_x, valid_y, valid_z;
  79   int_double tempx, tempy;
  80   int extra_digits, exponent_x, exponent_y, bin_expon_cx, bin_expon_cy,
  81     bin_expon_product, rmode;
  82   int digits_p, bp, final_exponent, exponent_z, digits_z, ez, ey,
  83     scale_z, uf_status;
  84
  85 #if DECIMAL_CALL_BY_REFERENCE
  86 #if !DECIMAL_GLOBAL_ROUNDING
  87   _IDEC_round rnd_mode = *prnd_mode;
  88 #endif
  89   x = *px;
  90   y = *py;
  91   z = *pz;
  92 #endif
  93
  94   valid_x = unpack_BID64 (&sign_x, &exponent_x, &coefficient_x, x);
  95   valid_y = unpack_BID64 (&sign_y, &exponent_y, &coefficient_y, y);
  96   valid_z = unpack_BID64 (&sign_z, &exponent_z, &coefficient_z, z);
  97
  98   // unpack arguments, check for NaN, Infinity, or 0
  99   if (!valid_x || !valid_y || !valid_z) {
 100
 101     if ((y & MASK_NAN) == MASK_NAN) {   // y is NAN
 102       // if x = {0, f, inf, NaN}, y = NaN, z = {0, f, inf, NaN} then res = Q (y)
 103       // check first for non-canonical NaN payload
 104       y = y & 0xfe03ffffffffffffull;    // clear G6-G12
 105       if ((y & 0x0003ffffffffffffull) > 999999999999999ull) {
 106         y = y & 0xfe00000000000000ull;  // clear G6-G12 and the payload bits
 107       }
 108       if ((y & MASK_SNAN) == MASK_SNAN) {       // y is SNAN
 109         // set invalid flag
 110         *pfpsf |= INVALID_EXCEPTION;
 111         // return quiet (y)
 112         res = y & 0xfdffffffffffffffull;
 113       } else {  // y is QNaN
 114         // return y
 115         res = y;
 116         // if z = SNaN or x = SNaN signal invalid exception
 117         if ((z & MASK_SNAN) == MASK_SNAN
 118             || (x & MASK_SNAN) == MASK_SNAN) {
 119           // set invalid flag
 120           *pfpsf |= INVALID_EXCEPTION;
 121         }
 122       }
 123       BID_RETURN (res)
 124     } else if ((z & MASK_NAN) == MASK_NAN) {    // z is NAN
 125       // if x = {0, f, inf, NaN}, y = {0, f, inf}, z = NaN then res = Q (z)
 126       // check first for non-canonical NaN payload
 127       z = z & 0xfe03ffffffffffffull;    // clear G6-G12
 128       if ((z & 0x0003ffffffffffffull) > 999999999999999ull) {
 129         z = z & 0xfe00000000000000ull;  // clear G6-G12 and the payload bits
 130       }
 131       if ((z & MASK_SNAN) == MASK_SNAN) {       // z is SNAN
 132         // set invalid flag
 133         *pfpsf |= INVALID_EXCEPTION;
 134         // return quiet (z)
 135         res = z & 0xfdffffffffffffffull;
 136       } else {  // z is QNaN
 137         // return z
 138         res = z;
 139         // if x = SNaN signal invalid exception
 140         if ((x & MASK_SNAN) == MASK_SNAN) {
 141           // set invalid flag
 142           *pfpsf |= INVALID_EXCEPTION;
 143         }
 144       }
 145       BID_RETURN (res)
 146     } else if ((x & MASK_NAN) == MASK_NAN) {    // x is NAN
 147       // if x = NaN, y = {0, f, inf}, z = {0, f, inf} then res = Q (x)
 148       // check first for non-canonical NaN payload
 149       x = x & 0xfe03ffffffffffffull;    // clear G6-G12
 150       if ((x & 0x0003ffffffffffffull) > 999999999999999ull) {
 151         x = x & 0xfe00000000000000ull;  // clear G6-G12 and the payload bits
 152       }
 153       if ((x & MASK_SNAN) == MASK_SNAN) {       // x is SNAN
 154         // set invalid flag
 155         *pfpsf |= INVALID_EXCEPTION;
 156         // return quiet (x)
 157         res = x & 0xfdffffffffffffffull;
 158       } else {  // x is QNaN
 159         // return x
 160         res = x;        // clear out G[6]-G[16]
 161       }
 162       BID_RETURN (res)
 163     }
 164
 165     if (!valid_x) {
 166       // x is Inf. or 0
 167
 168       // x is Infinity?
 169       if ((x & 0x7800000000000000ull) == 0x7800000000000000ull) {
 170         // check if y is 0
 171         if (!coefficient_y) {
 172           // y==0, return NaN
 173 #ifdef SET_STATUS_FLAGS
 174           if ((z & 0x7e00000000000000ull) != 0x7c00000000000000ull)
 175             __set_status_flags (pfpsf, INVALID_EXCEPTION);
 176 #endif
 177           BID_RETURN (0x7c00000000000000ull);
 178         }
 179         // test if z is Inf of oposite sign
 180         if (((z & 0x7c00000000000000ull) == 0x7800000000000000ull)
 181             && (((x ^ y) ^ z) & 0x8000000000000000ull)) {
 182           // return NaN
 183 #ifdef SET_STATUS_FLAGS
 184           __set_status_flags (pfpsf, INVALID_EXCEPTION);
 185 #endif
 186           BID_RETURN (0x7c00000000000000ull);
 187         }
 188         // otherwise return +/-Inf
 189         BID_RETURN (((x ^ y) & 0x8000000000000000ull) |
 190                     0x7800000000000000ull);
 191       }
 192       // x is 0
 193       if (((y & 0x7800000000000000ull) != 0x7800000000000000ull)
 194           && ((z & 0x7800000000000000ull) != 0x7800000000000000ull)) {
 195
 196         if (coefficient_z) {
 197           exponent_y = exponent_x - DECIMAL_EXPONENT_BIAS + exponent_y;
 198
 199           sign_z = z & 0x8000000000000000ull;
 200
 201           if (exponent_y >= exponent_z)
 202             BID_RETURN (z);
 203           res =
 204             add_zero64 (exponent_y, sign_z, exponent_z, coefficient_z,
 205                         &rnd_mode, pfpsf);
 206           BID_RETURN (res);
 207         }
 208       }
 209     }
 210     if (!valid_y) {
 211       // y is Inf. or 0
 212
 213       // y is Infinity?
 214       if ((y & 0x7800000000000000ull) == 0x7800000000000000ull) {
 215         // check if x is 0
 216         if (!coefficient_x) {
 217           // y==0, return NaN
 218 #ifdef SET_STATUS_FLAGS
 219           __set_status_flags (pfpsf, INVALID_EXCEPTION);
 220 #endif
 221           BID_RETURN (0x7c00000000000000ull);
 222         }
 223         // test if z is Inf of oposite sign
 224         if (((z & 0x7c00000000000000ull) == 0x7800000000000000ull)
 225             && (((x ^ y) ^ z) & 0x8000000000000000ull)) {
 226 #ifdef SET_STATUS_FLAGS
 227           __set_status_flags (pfpsf, INVALID_EXCEPTION);
 228 #endif
 229           // return NaN
 230           BID_RETURN (0x7c00000000000000ull);
 231         }
 232         // otherwise return +/-Inf
 233         BID_RETURN (((x ^ y) & 0x8000000000000000ull) |
 234                     0x7800000000000000ull);
 235       }
 236       // y is 0
 237       if (((z & 0x7800000000000000ull) != 0x7800000000000000ull)) {
 238
 239         if (coefficient_z) {
 240           exponent_y += exponent_x - DECIMAL_EXPONENT_BIAS;
 241
 242           sign_z = z & 0x8000000000000000ull;
 243
 244           if (exponent_y >= exponent_z)
 245             BID_RETURN (z);
 246           res =
 247             add_zero64 (exponent_y, sign_z, exponent_z, coefficient_z,
 248                         &rnd_mode, pfpsf);
 249           BID_RETURN (res);
 250         }
 251       }
 252     }
 253
 254     if (!valid_z) {
 255       // y is Inf. or 0
 256
 257       // test if y is NaN/Inf
 258       if ((z & 0x7800000000000000ull) == 0x7800000000000000ull) {
 259         BID_RETURN (coefficient_z & QUIET_MASK64);
 260       }
 261       // z is 0, return x*y
 262       if ((!coefficient_x) || (!coefficient_y)) {
 263         //0+/-0
 264         exponent_x += exponent_y - DECIMAL_EXPONENT_BIAS;
 265         if (exponent_x > DECIMAL_MAX_EXPON_64)
 266           exponent_x = DECIMAL_MAX_EXPON_64;
 267         else if (exponent_x < 0)
 268           exponent_x = 0;
 269         if (exponent_x <= exponent_z)
 270           res = ((UINT64) exponent_x) << 53;
 271         else
 272           res = ((UINT64) exponent_z) << 53;
 273         if ((sign_x ^ sign_y) == sign_z)
 274           res |= sign_z;
 275 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
 276 #ifndef IEEE_ROUND_NEAREST
 277         else if (rnd_mode == ROUNDING_DOWN)
 278           res |= 0x8000000000000000ull;
 279 #endif
 280 #endif
 281         BID_RETURN (res);
 282       }
 283     }
 284   }
 285
 286   /* get binary coefficients of x and y */
 287
 288   //--- get number of bits in the coefficients of x and y ---
 289   // version 2 (original)
 290   tempx.d = (double) coefficient_x;
 291   bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52);
 292
 293   tempy.d = (double) coefficient_y;
 294   bin_expon_cy = ((tempy.i & MASK_BINARY_EXPONENT) >> 52);
 295
 296   // magnitude estimate for coefficient_x*coefficient_y is
 297   //        2^(unbiased_bin_expon_cx + unbiased_bin_expon_cx)
 298   bin_expon_product = bin_expon_cx + bin_expon_cy;
 299
 300   // check if coefficient_x*coefficient_y<2^(10*k+3)
 301   // equivalent to unbiased_bin_expon_cx + unbiased_bin_expon_cx < 10*k+1
 302   if (bin_expon_product < UPPER_EXPON_LIMIT + 2 * BINARY_EXPONENT_BIAS) {
 303     //  easy multiply
 304     C64 = coefficient_x * coefficient_y;
 305     final_exponent = exponent_x + exponent_y - DECIMAL_EXPONENT_BIAS;
 306     if ((final_exponent > 0) || (!coefficient_z)) {
 307       res =
 308         get_add64 (sign_x ^ sign_y,
 309                    final_exponent, C64, sign_z, exponent_z, coefficient_z, rnd_mode, pfpsf);
 310       BID_RETURN (res);
 311     } else {
 312       P.w[0] = C64;
 313       P.w[1] = 0;
 314       extra_digits = 0;
 315     }
 316   } else {
 317     if (!coefficient_z) {
 318 #if DECIMAL_CALL_BY_REFERENCE
 319       bid64_mul (&res, px,
 320                  py _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
 321                  _EXC_INFO_ARG);
 322 #else
 323       res =
 324         bid64_mul (x,
 325                    y _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
 326                    _EXC_INFO_ARG);
 327 #endif
 328       BID_RETURN (res);
 329     }
 330     // get 128-bit product: coefficient_x*coefficient_y
 331     __mul_64x64_to_128 (P, coefficient_x, coefficient_y);
 332
 333     // tighten binary range of P:  leading bit is 2^bp
 334     // unbiased_bin_expon_product <= bp <= unbiased_bin_expon_product+1
 335     bin_expon_product -= 2 * BINARY_EXPONENT_BIAS;
 336     __tight_bin_range_128 (bp, P, bin_expon_product);
 337
 338     // get number of decimal digits in the product
 339     digits_p = estimate_decimal_digits[bp];
 340     if (!(__unsigned_compare_gt_128 (power10_table_128[digits_p], P)))
 341       digits_p++;       // if power10_table_128[digits_p] <= P
 342
 343     // determine number of decimal digits to be rounded out
 344     extra_digits = digits_p - MAX_FORMAT_DIGITS;
 345     final_exponent =
 346       exponent_x + exponent_y + extra_digits - DECIMAL_EXPONENT_BIAS;
 347   }
 348
 349   if (((unsigned) final_exponent) >= 3 * 256) {
 350     if (final_exponent < 0) {
 351       //--- get number of bits in the coefficients of z  ---
 352       tempx.d = (double) coefficient_z;
 353       bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
 354       // get number of decimal digits in the coeff_x
 355       digits_z = estimate_decimal_digits[bin_expon_cx];
 356       if (coefficient_z >= power10_table_128[digits_z].w[0])
 357         digits_z++;
 358       // underflow
 359       if ((final_exponent + 16 < 0)
 360           || (exponent_z + digits_z > 33 + final_exponent)) {
 361         res =
 362           BID_normalize (sign_z, exponent_z, coefficient_z,
 363                          sign_x ^ sign_y, 1, rnd_mode, pfpsf);
 364         BID_RETURN (res);
 365       }
 366
 367       ez = exponent_z + digits_z - 16;
 368       if (ez < 0)
 369         ez = 0;
 370       scale_z = exponent_z - ez;
 371       coefficient_z *= power10_table_128[scale_z].w[0];
 372       ey = final_exponent - extra_digits;
 373       extra_digits = ez - ey;
 374       if (extra_digits > 33) {
 375         res =
 376           BID_normalize (sign_z, exponent_z, coefficient_z,
 377                          sign_x ^ sign_y, 1, rnd_mode, pfpsf);
 378         BID_RETURN (res);
 379       }
 380       //else  // extra_digits<=32
 381
 382       if (extra_digits > 17) {
 383         CYh = __truncate (P, 16);
 384         // get remainder
 385         T = power10_table_128[16].w[0];
 386         __mul_64x64_to_64 (CY0L, CYh, T);
 387         remainder_y = P.w[0] - CY0L;
 388
 389         extra_digits -= 16;
 390         P.w[0] = CYh;
 391         P.w[1] = 0;
 392       } else
 393         remainder_y = 0;
 394
 395       // align coeff_x, CYh
 396       __mul_64x64_to_128 (CZ, coefficient_z,
 397                           power10_table_128[extra_digits].w[0]);
 398
 399       if (sign_z == (sign_y ^ sign_x)) {
 400         __add_128_128 (CT, CZ, P);
 401         if (__unsigned_compare_ge_128
 402             (CT, power10_table_128[16 + extra_digits])) {
 403           extra_digits++;
 404           ez++;
 405         }
 406       } else {
 407         if (remainder_y && (__unsigned_compare_ge_128 (CZ, P))) {
 408           P.w[0]++;
 409           if (!P.w[0])
 410             P.w[1]++;
 411         }
 412         __sub_128_128 (CT, CZ, P);
 413         if (((SINT64) CT.w[1]) < 0) {
 414           sign_z = sign_y ^ sign_x;
 415           CT.w[0] = 0 - CT.w[0];
 416           CT.w[1] = 0 - CT.w[1];
 417           if (CT.w[0])
 418             CT.w[1]--;
 419         } else if(!(CT.w[1]|CT.w[0]))
 420                 sign_z = (rnd_mode!=ROUNDING_DOWN)? 0: 0x8000000000000000ull;
 421         if (ez
 422             &&
 423             (__unsigned_compare_gt_128
 424              (power10_table_128[15 + extra_digits], CT))) {
 425           extra_digits--;
 426           ez--;
 427         }
 428       }
 429
 430 #ifdef SET_STATUS_FLAGS
 431       uf_status = 0;
 432       if ((!ez)
 433           &&
 434           __unsigned_compare_gt_128 (power10_table_128
 435                                      [extra_digits + 15], CT)) {
 436         rmode = rnd_mode;
 437         if (sign_z && (unsigned) (rmode - 1) < 2)
 438           rmode = 3 - rmode;
 439         //__add_128_64(PU, CT, round_const_table[rmode][extra_digits]);
 440         PU = power10_table_128[extra_digits + 15];
 441         PU.w[0]--;
 442         if (__unsigned_compare_gt_128 (PU, CT)
 443             || (rmode == ROUNDING_DOWN)
 444             || (rmode == ROUNDING_TO_ZERO))
 445           uf_status = UNDERFLOW_EXCEPTION;
 446         else if (extra_digits < 2) {
 447           if ((rmode == ROUNDING_UP)) {
 448             if (!extra_digits)
 449               uf_status = UNDERFLOW_EXCEPTION;
 450             else {
 451               if (remainder_y && (sign_z != (sign_y ^ sign_x)))
 452                 remainder_y = power10_table_128[16].w[0] - remainder_y;
 453
 454               if (power10_table_128[15].w[0] > remainder_y)
 455                 uf_status = UNDERFLOW_EXCEPTION;
 456             }
 457           } else        // RN or RN_away
 458           {
 459             if (remainder_y && (sign_z != (sign_y ^ sign_x)))
 460               remainder_y = power10_table_128[16].w[0] - remainder_y;
 461
 462             if (!extra_digits) {
 463               remainder_y += round_const_table[rmode][15];
 464               if (remainder_y < power10_table_128[16].w[0])
 465                 uf_status = UNDERFLOW_EXCEPTION;
 466             } else {
 467               if (remainder_y < round_const_table[rmode][16])
 468                 uf_status = UNDERFLOW_EXCEPTION;
 469             }
 470           }
 471           //__set_status_flags (pfpsf, uf_status);
 472         }
 473       }
 474 #endif
 475       res =
 476         __bid_full_round64_remainder (sign_z, ez - extra_digits, CT,
 477                                       extra_digits, remainder_y,
 478                                       rnd_mode, pfpsf, uf_status);
 479       BID_RETURN (res);
 480
 481     } else {
 482       if ((sign_z == (sign_x ^ sign_y))
 483           || (final_exponent > 3 * 256 + 15)) {
 484         res =
 485           fast_get_BID64_check_OF (sign_x ^ sign_y, final_exponent,
 486                                    1000000000000000ull, rnd_mode,
 487                                    pfpsf);
 488         BID_RETURN (res);
 489       }
 490     }
 491   }
 492
 493
 494   if (extra_digits > 0) {
 495     res =
 496       get_add128 (sign_z, exponent_z, coefficient_z, sign_x ^ sign_y,
 497                   final_exponent, P, extra_digits, rnd_mode, pfpsf);
 498     BID_RETURN (res);
 499   }
 500   // go to convert_format and exit
 501   else {
 502     C64 = __low_64 (P);
 503
 504     res =
 505       get_add64 (sign_x ^ sign_y,
 506                  exponent_x + exponent_y - DECIMAL_EXPONENT_BIAS, C64,
 507                  sign_z, exponent_z, coefficient_z,
 508                  rnd_mode, pfpsf);
 509     BID_RETURN (res);
 510   }
 511 }