lib/fma.c

   1 /* Fused multiply-add.
   2    Copyright (C) 2007, 2009, 2011-2018 Free Software Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation; either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  16
  17 /* Written by Bruno Haible <bruno@clisp.org>, 2011.  */
  18
  19 #if ! defined USE_LONG_DOUBLE
  20 # include <config.h>
  21 #endif
  22
  23 /* Specification.  */
  24 #include <math.h>
  25
  26 #include <stdbool.h>
  27 #include <stdlib.h>
  28
  29 #if HAVE_FEGETROUND
  30 # include <fenv.h>
  31 #endif
  32
  33 #include "float+.h"
  34 #include "integer_length.h"
  35 #include "verify.h"
  36
  37 #ifdef USE_LONG_DOUBLE
  38 # define FUNC fmal
  39 # define DOUBLE long double
  40 # define FREXP frexpl
  41 # define LDEXP ldexpl
  42 # define MIN_EXP LDBL_MIN_EXP
  43 # define MANT_BIT LDBL_MANT_BIT
  44 # define L_(literal) literal##L
  45 #elif ! defined USE_FLOAT
  46 # define FUNC fma
  47 # define DOUBLE double
  48 # define FREXP frexp
  49 # define LDEXP ldexp
  50 # define MIN_EXP DBL_MIN_EXP
  51 # define MANT_BIT DBL_MANT_BIT
  52 # define L_(literal) literal
  53 #else /* defined USE_FLOAT */
  54 # define FUNC fmaf
  55 # define DOUBLE float
  56 # define FREXP frexpf
  57 # define LDEXP ldexpf
  58 # define MIN_EXP FLT_MIN_EXP
  59 # define MANT_BIT FLT_MANT_BIT
  60 # define L_(literal) literal##f
  61 #endif
  62
  63 #undef MAX
  64 #define MAX(a,b) ((a) > (b) ? (a) : (b))
  65
  66 #undef MIN
  67 #define MIN(a,b) ((a) < (b) ? (a) : (b))
  68
  69 /* MSVC with option -fp:strict refuses to compile constant initializers that
  70    contain floating-point operations.  Pacify this compiler.  */
  71 #ifdef _MSC_VER
  72 # pragma fenv_access (off)
  73 #endif
  74
  75 /* Work around GCC 4.2.1 bug on OpenBSD 5.1/SPARC64.  */
  76 #if defined __GNUC__ && defined __sparc__
  77 # define VOLATILE volatile
  78 #else
  79 # define VOLATILE
  80 #endif
  81
  82 /* It is possible to write an implementation of fused multiply-add with
  83    floating-point operations alone.  See
  84      Sylvie Boldo, Guillaume Melquiond:
  85      Emulation of FMA and correctly-rounded sums: proved algorithms using
  86      rounding to odd.
  87      <https://www.lri.fr/~melquion/doc/08-tc.pdf>
  88    But is it complicated.
  89    Here we take the simpler (and probably slower) approach of doing
  90    multi-precision arithmetic.  */
  91
  92 /* We use the naming conventions of GNU gmp, but vastly simpler (and slower)
  93    algorithms.  */
  94
  95 typedef unsigned int mp_limb_t;
  96 #define GMP_LIMB_BITS 32
  97 verify (sizeof (mp_limb_t) * CHAR_BIT == GMP_LIMB_BITS);
  98
  99 typedef unsigned long long mp_twolimb_t;
 100 #define GMP_TWOLIMB_BITS 64
 101 verify (sizeof (mp_twolimb_t) * CHAR_BIT == GMP_TWOLIMB_BITS);
 102
 103 /* Number of limbs needed for a single DOUBLE.  */
 104 #define NLIMBS1 ((MANT_BIT + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS)
 105
 106 /* Number of limbs needed for the accumulator.  */
 107 #define NLIMBS3 (3 * NLIMBS1 + 1)
 108
 109 /* Assuming 0.5 <= x < 1.0:
 110    Convert the mantissa (x * 2^DBL_MANT_BIT) to a sequence of limbs.  */
 111 static void
 112 decode (DOUBLE x, mp_limb_t limbs[NLIMBS1])
 113 {
 114   /* I'm not sure whether it's safe to cast a 'double' value between
 115      2^31 and 2^32 to 'unsigned int', therefore play safe and cast only
 116      'double' values between 0 and 2^31 (to 'unsigned int' or 'int',
 117      doesn't matter).
 118      So, we split the MANT_BIT bits of x into a number of chunks of
 119      at most 31 bits.  */
 120   enum { chunk_count = (MANT_BIT - 1) / 31 + 1 };
 121   /* Variables used for storing the bits limb after limb.  */
 122   mp_limb_t *p = limbs + NLIMBS1 - 1;
 123   mp_limb_t accu = 0;
 124   unsigned int bits_needed = MANT_BIT - (NLIMBS1 - 1) * GMP_LIMB_BITS;
 125   /* The bits bits_needed-1...0 need to be ORed into the accu.
 126      1 <= bits_needed <= GMP_LIMB_BITS.  */
 127   /* Unroll the first 4 loop rounds.  */
 128   if (chunk_count > 0)
 129     {
 130       /* Here we still have MANT_BIT-0*31 bits to extract from x.  */
 131       enum { chunk_bits = MIN (31, MANT_BIT - 0 * 31) }; /* > 0, <= 31 */
 132       mp_limb_t d;
 133       x *= (mp_limb_t) 1 << chunk_bits;
 134       d = (int) x; /* 0 <= d < 2^chunk_bits.  */
 135       x -= d;
 136       if (!(x >= L_(0.0) && x < L_(1.0)))
 137         abort ();
 138       if (bits_needed < chunk_bits)
 139         {
 140           /* store bits_needed bits */
 141           accu |= d >> (chunk_bits - bits_needed);
 142           *p = accu;
 143           if (p == limbs)
 144             goto done;
 145           p--;
 146           /* hold (chunk_bits - bits_needed) bits */
 147           accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed));
 148           bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed);
 149         }
 150       else
 151         {
 152           /* store chunk_bits bits */
 153           accu |= d << (bits_needed - chunk_bits);
 154           bits_needed -= chunk_bits;
 155           if (bits_needed == 0)
 156             {
 157               *p = accu;
 158               if (p == limbs)
 159                 goto done;
 160               p--;
 161               accu = 0;
 162               bits_needed = GMP_LIMB_BITS;
 163             }
 164         }
 165     }
 166   if (chunk_count > 1)
 167     {
 168       /* Here we still have MANT_BIT-1*31 bits to extract from x.  */
 169       enum { chunk_bits = MIN (31, MAX (MANT_BIT - 1 * 31, 0)) }; /* > 0, <= 31 */
 170       mp_limb_t d;
 171       x *= (mp_limb_t) 1 << chunk_bits;
 172       d = (int) x; /* 0 <= d < 2^chunk_bits.  */
 173       x -= d;
 174       if (!(x >= L_(0.0) && x < L_(1.0)))
 175         abort ();
 176       if (bits_needed < chunk_bits)
 177         {
 178           /* store bits_needed bits */
 179           accu |= d >> (chunk_bits - bits_needed);
 180           *p = accu;
 181           if (p == limbs)
 182             goto done;
 183           p--;
 184           /* hold (chunk_bits - bits_needed) bits */
 185           accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed));
 186           bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed);
 187         }
 188       else
 189         {
 190           /* store chunk_bits bits */
 191           accu |= d << (bits_needed - chunk_bits);
 192           bits_needed -= chunk_bits;
 193           if (bits_needed == 0)
 194             {
 195               *p = accu;
 196               if (p == limbs)
 197                 goto done;
 198               p--;
 199               accu = 0;
 200               bits_needed = GMP_LIMB_BITS;
 201             }
 202         }
 203     }
 204   if (chunk_count > 2)
 205     {
 206       /* Here we still have MANT_BIT-2*31 bits to extract from x.  */
 207       enum { chunk_bits = MIN (31, MAX (MANT_BIT - 2 * 31, 0)) }; /* > 0, <= 31 */
 208       mp_limb_t d;
 209       x *= (mp_limb_t) 1 << chunk_bits;
 210       d = (int) x; /* 0 <= d < 2^chunk_bits.  */
 211       x -= d;
 212       if (!(x >= L_(0.0) && x < L_(1.0)))
 213         abort ();
 214       if (bits_needed < chunk_bits)
 215         {
 216           /* store bits_needed bits */
 217           accu |= d >> (chunk_bits - bits_needed);
 218           *p = accu;
 219           if (p == limbs)
 220             goto done;
 221           p--;
 222           /* hold (chunk_bits - bits_needed) bits */
 223           accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed));
 224           bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed);
 225         }
 226       else
 227         {
 228           /* store chunk_bits bits */
 229           accu |= d << (bits_needed - chunk_bits);
 230           bits_needed -= chunk_bits;
 231           if (bits_needed == 0)
 232             {
 233               *p = accu;
 234               if (p == limbs)
 235                 goto done;
 236               p--;
 237               accu = 0;
 238               bits_needed = GMP_LIMB_BITS;
 239             }
 240         }
 241     }
 242   if (chunk_count > 3)
 243     {
 244       /* Here we still have MANT_BIT-3*31 bits to extract from x.  */
 245       enum { chunk_bits = MIN (31, MAX (MANT_BIT - 3 * 31, 0)) }; /* > 0, <= 31 */
 246       mp_limb_t d;
 247       x *= (mp_limb_t) 1 << chunk_bits;
 248       d = (int) x; /* 0 <= d < 2^chunk_bits.  */
 249       x -= d;
 250       if (!(x >= L_(0.0) && x < L_(1.0)))
 251         abort ();
 252       if (bits_needed < chunk_bits)
 253         {
 254           /* store bits_needed bits */
 255           accu |= d >> (chunk_bits - bits_needed);
 256           *p = accu;
 257           if (p == limbs)
 258             goto done;
 259           p--;
 260           /* hold (chunk_bits - bits_needed) bits */
 261           accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed));
 262           bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed);
 263         }
 264       else
 265         {
 266           /* store chunk_bits bits */
 267           accu |= d << (bits_needed - chunk_bits);
 268           bits_needed -= chunk_bits;
 269           if (bits_needed == 0)
 270             {
 271               *p = accu;
 272               if (p == limbs)
 273                 goto done;
 274               p--;
 275               accu = 0;
 276               bits_needed = GMP_LIMB_BITS;
 277             }
 278         }
 279     }
 280   if (chunk_count > 4)
 281     {
 282       /* Here we still have MANT_BIT-4*31 bits to extract from x.  */
 283       /* Generic loop.  */
 284       size_t k;
 285       for (k = 4; k < chunk_count; k++)
 286         {
 287           size_t chunk_bits = MIN (31, MANT_BIT - k * 31); /* > 0, <= 31 */
 288           mp_limb_t d;
 289           x *= (mp_limb_t) 1 << chunk_bits;
 290           d = (int) x; /* 0 <= d < 2^chunk_bits.  */
 291           x -= d;
 292           if (!(x >= L_(0.0) && x < L_(1.0)))
 293             abort ();
 294           if (bits_needed < chunk_bits)
 295             {
 296               /* store bits_needed bits */
 297               accu |= d >> (chunk_bits - bits_needed);
 298               *p = accu;
 299               if (p == limbs)
 300                 goto done;
 301               p--;
 302               /* hold (chunk_bits - bits_needed) bits */
 303               accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed));
 304               bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed);
 305             }
 306           else
 307             {
 308               /* store chunk_bits bits */
 309               accu |= d << (bits_needed - chunk_bits);
 310               bits_needed -= chunk_bits;
 311               if (bits_needed == 0)
 312                 {
 313                   *p = accu;
 314                   if (p == limbs)
 315                     goto done;
 316                   p--;
 317                   accu = 0;
 318                   bits_needed = GMP_LIMB_BITS;
 319                 }
 320             }
 321         }
 322     }
 323   /* We shouldn't get here.  */
 324   abort ();
 325
 326  done: ;
 327 #ifndef USE_LONG_DOUBLE /* On FreeBSD 6.1/x86, 'long double' numbers sometimes
 328                            have excess precision.  */
 329   if (!(x == L_(0.0)))
 330     abort ();
 331 #endif
 332 }
 333
 334 /* Multiply two sequences of limbs.  */
 335 static void
 336 multiply (mp_limb_t xlimbs[NLIMBS1], mp_limb_t ylimbs[NLIMBS1],
 337           mp_limb_t prod_limbs[2 * NLIMBS1])
 338 {
 339   size_t k, i, j;
 340   enum { len1 = NLIMBS1 };
 341   enum { len2 = NLIMBS1 };
 342
 343   for (k = len2; k > 0; )
 344     prod_limbs[--k] = 0;
 345   for (i = 0; i < len1; i++)
 346     {
 347       mp_limb_t digit1 = xlimbs[i];
 348       mp_twolimb_t carry = 0;
 349       for (j = 0; j < len2; j++)
 350         {
 351           mp_limb_t digit2 = ylimbs[j];
 352           carry += (mp_twolimb_t) digit1 * (mp_twolimb_t) digit2;
 353           carry += prod_limbs[i + j];
 354           prod_limbs[i + j] = (mp_limb_t) carry;
 355           carry = carry >> GMP_LIMB_BITS;
 356         }
 357       prod_limbs[i + len2] = (mp_limb_t) carry;
 358     }
 359 }
 360
 361 DOUBLE
 362 FUNC (DOUBLE x, DOUBLE y, DOUBLE z)
 363 {
 364   if (isfinite (x) && isfinite (y))
 365     {
 366       if (isfinite (z))
 367         {
 368           /* x, y, z are all finite.  */
 369           if (x == L_(0.0) || y == L_(0.0))
 370             return z;
 371           if (z == L_(0.0))
 372             return x * y;
 373           /* x, y, z are all non-zero.
 374              The result is x * y + z.  */
 375           {
 376             int e;                  /* exponent of x * y + z */
 377             int sign;
 378             mp_limb_t sum[NLIMBS3];
 379             size_t sum_len;
 380
 381             {
 382               int xys;                /* sign of x * y */
 383               int zs;                 /* sign of z */
 384               int xye;                /* sum of exponents of x and y */
 385               int ze;                 /* exponent of z */
 386               mp_limb_t summand1[NLIMBS3];
 387               size_t summand1_len;
 388               mp_limb_t summand2[NLIMBS3];
 389               size_t summand2_len;
 390
 391               {
 392                 mp_limb_t zlimbs[NLIMBS1];
 393                 mp_limb_t xylimbs[2 * NLIMBS1];
 394
 395                 {
 396                   DOUBLE xn;              /* normalized part of x */
 397                   DOUBLE yn;              /* normalized part of y */
 398                   DOUBLE zn;              /* normalized part of z */
 399                   int xe;                 /* exponent of x */
 400                   int ye;                 /* exponent of y */
 401                   mp_limb_t xlimbs[NLIMBS1];
 402                   mp_limb_t ylimbs[NLIMBS1];
 403
 404                   xys = 0;
 405                   xn = x;
 406                   if (x < 0)
 407                     {
 408                       xys = 1;
 409                       xn = - x;
 410                     }
 411                   yn = y;
 412                   if (y < 0)
 413                     {
 414                       xys = 1 - xys;
 415                       yn = - y;
 416                     }
 417
 418                   zs = 0;
 419                   zn = z;
 420                   if (z < 0)
 421                     {
 422                       zs = 1;
 423                       zn = - z;
 424                     }
 425
 426                   /* xn, yn, zn are all positive.
 427                      The result is (-1)^xys * xn * yn + (-1)^zs * zn.  */
 428                   xn = FREXP (xn, &xe);
 429                   yn = FREXP (yn, &ye);
 430                   zn = FREXP (zn, &ze);
 431                   xye = xe + ye;
 432                   /* xn, yn, zn are all < 1.0 and >= 0.5.
 433                      The result is
 434                        (-1)^xys * 2^xye * xn * yn + (-1)^zs * 2^ze * zn.  */
 435                   if (xye < ze - MANT_BIT)
 436                     {
 437                       /* 2^xye * xn * yn < 2^xye <= 2^(ze-MANT_BIT-1)  */
 438                       return z;
 439                     }
 440                   if (xye - 2 * MANT_BIT > ze)
 441                     {
 442                       /* 2^ze * zn < 2^ze <= 2^(xye-2*MANT_BIT-1).
 443                          We cannot simply do
 444                            return x * y;
 445                          because it would round differently: A round-to-even
 446                          in the multiplication can be a round-up or round-down
 447                          here, due to z.  So replace z with a value that doesn't
 448                          require the use of long bignums but that rounds the
 449                          same way.  */
 450                       zn = L_(0.5);
 451                       ze = xye - 2 * MANT_BIT - 1;
 452                     }
 453                   /* Convert mantissas of xn, yn, zn to limb sequences:
 454                      xlimbs = 2^MANT_BIT * xn
 455                      ylimbs = 2^MANT_BIT * yn
 456                      zlimbs = 2^MANT_BIT * zn  */
 457                   decode (xn, xlimbs);
 458                   decode (yn, ylimbs);
 459                   decode (zn, zlimbs);
 460                   /* Multiply the mantissas of xn and yn:
 461                      xylimbs = xlimbs * ylimbs  */
 462                   multiply (xlimbs, ylimbs, xylimbs);
 463                 }
 464                 /* The result is
 465                      (-1)^xys * 2^(xye-2*MANT_BIT) * xylimbs
 466                      + (-1)^zs * 2^(ze-MANT_BIT) * zlimbs.
 467                    Compute
 468                      e = min (xye-2*MANT_BIT, ze-MANT_BIT)
 469                    then
 470                      summand1 = 2^(xye-2*MANT_BIT-e) * xylimbs
 471                      summand2 = 2^(ze-MANT_BIT-e) * zlimbs  */
 472                 e = MIN (xye - 2 * MANT_BIT, ze - MANT_BIT);
 473                 if (e == xye - 2 * MANT_BIT)
 474                   {
 475                     /* Simply copy the limbs of xylimbs.  */
 476                     size_t i;
 477                     for (i = 0; i < 2 * NLIMBS1; i++)
 478                       summand1[i] = xylimbs[i];
 479                     summand1_len = 2 * NLIMBS1;
 480                   }
 481                 else
 482                   {
 483                     size_t ediff = xye - 2 * MANT_BIT - e;
 484                     /* Left shift the limbs of xylimbs by ediff bits.  */
 485                     size_t ldiff = ediff / GMP_LIMB_BITS;
 486                     size_t shift = ediff % GMP_LIMB_BITS;
 487                     size_t i;
 488                     for (i = 0; i < ldiff; i++)
 489                       summand1[i] = 0;
 490                     if (shift > 0)
 491                       {
 492                         mp_limb_t carry = 0;
 493                         for (i = 0; i < 2 * NLIMBS1; i++)
 494                           {
 495                             summand1[ldiff + i] = (xylimbs[i] << shift) | carry;
 496                             carry = xylimbs[i] >> (GMP_LIMB_BITS - shift);
 497                           }
 498                         summand1[ldiff + 2 * NLIMBS1] = carry;
 499                         summand1_len = ldiff + 2 * NLIMBS1 + 1;
 500                       }
 501                     else
 502                       {
 503                         for (i = 0; i < 2 * NLIMBS1; i++)
 504                           summand1[ldiff + i] = xylimbs[i];
 505                         summand1_len = ldiff + 2 * NLIMBS1;
 506                       }
 507                     /* Estimation of needed array size:
 508                        ediff = (xye - 2 * MANT_BIT) - (ze - MANT_BIT) <= MANT_BIT + 1
 509                        therefore
 510                        summand1_len
 511                          = (ediff + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS + 2 * NLIMBS1
 512                          <= (MANT_BIT + GMP_LIMB_BITS) / GMP_LIMB_BITS + 2 * NLIMBS1
 513                          <= 3 * NLIMBS1 + 1
 514                          = NLIMBS3  */
 515                     if (!(summand1_len <= NLIMBS3))
 516                       abort ();
 517                   }
 518                 if (e == ze - MANT_BIT)
 519                   {
 520                     /* Simply copy the limbs of zlimbs.  */
 521                     size_t i;
 522                     for (i = 0; i < NLIMBS1; i++)
 523                       summand2[i] = zlimbs[i];
 524                     summand2_len = NLIMBS1;
 525                   }
 526                 else
 527                   {
 528                     size_t ediff = ze - MANT_BIT - e;
 529                     /* Left shift the limbs of zlimbs by ediff bits.  */
 530                     size_t ldiff = ediff / GMP_LIMB_BITS;
 531                     size_t shift = ediff % GMP_LIMB_BITS;
 532                     size_t i;
 533                     for (i = 0; i < ldiff; i++)
 534                       summand2[i] = 0;
 535                     if (shift > 0)
 536                       {
 537                         mp_limb_t carry = 0;
 538                         for (i = 0; i < NLIMBS1; i++)
 539                           {
 540                             summand2[ldiff + i] = (zlimbs[i] << shift) | carry;
 541                             carry = zlimbs[i] >> (GMP_LIMB_BITS - shift);
 542                           }
 543                         summand2[ldiff + NLIMBS1] = carry;
 544                         summand2_len = ldiff + NLIMBS1 + 1;
 545                       }
 546                     else
 547                       {
 548                         for (i = 0; i < NLIMBS1; i++)
 549                           summand2[ldiff + i] = zlimbs[i];
 550                         summand2_len = ldiff + NLIMBS1;
 551                       }
 552                     /* Estimation of needed array size:
 553                        ediff = (ze - MANT_BIT) - (xye - 2 * MANT_BIT) <= 2 * MANT_BIT
 554                        therefore
 555                        summand2_len
 556                          = (ediff + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS + NLIMBS1
 557                          <= (2 * MANT_BIT + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS + NLIMBS1
 558                          <= 3 * NLIMBS1 + 1
 559                          = NLIMBS3  */
 560                     if (!(summand2_len <= NLIMBS3))
 561                       abort ();
 562                   }
 563               }
 564               /* The result is
 565                    (-1)^xys * 2^e * summand1 + (-1)^zs * 2^e * summand2.  */
 566               if (xys == zs)
 567                 {
 568                   /* Perform an addition.  */
 569                   size_t i;
 570                   mp_limb_t carry;
 571
 572                   sign = xys;
 573                   carry = 0;
 574                   for (i = 0; i < MIN (summand1_len, summand2_len); i++)
 575                     {
 576                       mp_limb_t digit1 = summand1[i];
 577                       mp_limb_t digit2 = summand2[i];
 578                       sum[i] = digit1 + digit2 + carry;
 579                       carry = (carry
 580                                ? digit1 >= (mp_limb_t)-1 - digit2
 581                                : digit1 > (mp_limb_t)-1 - digit2);
 582                     }
 583                   if (summand1_len > summand2_len)
 584                     for (; i < summand1_len; i++)
 585                       {
 586                         mp_limb_t digit1 = summand1[i];
 587                         sum[i] = carry + digit1;
 588                         carry = carry && digit1 == (mp_limb_t)-1;
 589                       }
 590                   else
 591                     for (; i < summand2_len; i++)
 592                       {
 593                         mp_limb_t digit2 = summand2[i];
 594                         sum[i] = carry + digit2;
 595                         carry = carry && digit2 == (mp_limb_t)-1;
 596                       }
 597                   if (carry)
 598                     sum[i++] = carry;
 599                   sum_len = i;
 600                 }
 601               else
 602                 {
 603                   /* Perform a subtraction.  */
 604                   /* Compare summand1 and summand2 by magnitude.  */
 605                   while (summand1[summand1_len - 1] == 0)
 606                     summand1_len--;
 607                   while (summand2[summand2_len - 1] == 0)
 608                     summand2_len--;
 609                   if (summand1_len > summand2_len)
 610                     sign = xys;
 611                   else if (summand1_len < summand2_len)
 612                     sign = zs;
 613                   else
 614                     {
 615                       size_t i = summand1_len;
 616                       for (;;)
 617                         {
 618                           --i;
 619                           if (summand1[i] > summand2[i])
 620                             {
 621                               sign = xys;
 622                               break;
 623                             }
 624                           if (summand1[i] < summand2[i])
 625                             {
 626                               sign = zs;
 627                               break;
 628                             }
 629                           if (i == 0)
 630                             /* summand1 and summand2 are equal.  */
 631                             return L_(0.0);
 632                         }
 633                     }
 634                   if (sign == xys)
 635                     {
 636                       /* Compute summand1 - summand2.  */
 637                       size_t i;
 638                       mp_limb_t carry;
 639
 640                       carry = 0;
 641                       for (i = 0; i < summand2_len; i++)
 642                         {
 643                           mp_limb_t digit1 = summand1[i];
 644                           mp_limb_t digit2 = summand2[i];
 645                           sum[i] = digit1 - digit2 - carry;
 646                           carry = (carry ? digit1 <= digit2 : digit1 < digit2);
 647                         }
 648                       for (; i < summand1_len; i++)
 649                         {
 650                           mp_limb_t digit1 = summand1[i];
 651                           sum[i] = digit1 - carry;
 652                           carry = carry && digit1 == 0;
 653                         }
 654                       if (carry)
 655                         abort ();
 656                       sum_len = summand1_len;
 657                     }
 658                   else
 659                     {
 660                       /* Compute summand2 - summand1.  */
 661                       size_t i;
 662                       mp_limb_t carry;
 663
 664                       carry = 0;
 665                       for (i = 0; i < summand1_len; i++)
 666                         {
 667                           mp_limb_t digit1 = summand1[i];
 668                           mp_limb_t digit2 = summand2[i];
 669                           sum[i] = digit2 - digit1 - carry;
 670                           carry = (carry ? digit2 <= digit1 : digit2 < digit1);
 671                         }
 672                       for (; i < summand2_len; i++)
 673                         {
 674                           mp_limb_t digit2 = summand2[i];
 675                           sum[i] = digit2 - carry;
 676                           carry = carry && digit2 == 0;
 677                         }
 678                       if (carry)
 679                         abort ();
 680                       sum_len = summand2_len;
 681                     }
 682                 }
 683             }
 684             /* The result is
 685                  (-1)^sign * 2^e * sum.  */
 686             /* Now perform the rounding to MANT_BIT mantissa bits.  */
 687             while (sum[sum_len - 1] == 0)
 688               sum_len--;
 689             /* Here we know that the most significant limb, sum[sum_len - 1], is
 690                non-zero.  */
 691             {
 692               /* How many bits the sum has.  */
 693               unsigned int sum_bits =
 694                 integer_length (sum[sum_len - 1]) + (sum_len - 1) * GMP_LIMB_BITS;
 695               /* How many bits to keep when rounding.  */
 696               unsigned int keep_bits;
 697               /* How many bits to round off.  */
 698               unsigned int roundoff_bits;
 699               if (e + (int) sum_bits >= MIN_EXP)
 700                 /* 2^e * sum >= 2^(MIN_EXP-1).
 701                    result will be a normalized number.  */
 702                 keep_bits = MANT_BIT;
 703               else if (e + (int) sum_bits >= MIN_EXP - MANT_BIT)
 704                 /* 2^e * sum >= 2^(MIN_EXP-MANT_BIT-1).
 705                    result will be a denormalized number or rounded to zero.  */
 706                 keep_bits = e + (int) sum_bits - (MIN_EXP + MANT_BIT);
 707               else
 708                 /* 2^e * sum < 2^(MIN_EXP-MANT_BIT-1).  Round to zero.  */
 709                 return L_(0.0);
 710               /* Note: 0 <= keep_bits <= MANT_BIT.  */
 711               if (sum_bits <= keep_bits)
 712                 {
 713                   /* Nothing to do.  */
 714                   roundoff_bits = 0;
 715                   keep_bits = sum_bits;
 716                 }
 717               else
 718                 {
 719                   int round_up;
 720                   roundoff_bits = sum_bits - keep_bits; /* > 0, <= sum_bits */
 721                   {
 722 #if HAVE_FEGETROUND && defined FE_TOWARDZERO
 723                     /* Cf. <http://pubs.opengroup.org/onlinepubs/9699919799/functions/fegetround.html> */
 724                     int rounding_mode = fegetround ();
 725                     if (rounding_mode == FE_TOWARDZERO)
 726                       round_up = 0;
 727                     else if (rounding_mode == FE_DOWNWARD)
 728                       round_up = sign;
 729                     else if (rounding_mode == FE_UPWARD)
 730                       round_up = !sign;
 731 #else
 732                     /* Cf. <http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/float.h.html> */
 733                     int rounding_mode = FLT_ROUNDS;
 734                     if (rounding_mode == 0) /* toward zero */
 735                       round_up = 0;
 736                     else if (rounding_mode == 3) /* toward negative infinity */
 737                       round_up = sign;
 738                     else if (rounding_mode == 2) /* toward positive infinity */
 739                       round_up = !sign;
 740 #endif
 741                     else
 742                       {
 743                         /* Round to nearest.  */
 744                         round_up = 0;
 745                         /* Test bit (roundoff_bits-1).  */
 746                         if ((sum[(roundoff_bits - 1) / GMP_LIMB_BITS]
 747                              >> ((roundoff_bits - 1) % GMP_LIMB_BITS)) & 1)
 748                           {
 749                             /* Test bits roundoff_bits-1 .. 0.  */
 750                             bool halfway =
 751                               ((sum[(roundoff_bits - 1) / GMP_LIMB_BITS]
 752                                 & (((mp_limb_t) 1 << ((roundoff_bits - 1) % GMP_LIMB_BITS)) - 1))
 753                                == 0);
 754                             if (halfway)
 755                               {
 756                                 int i;
 757                                 for (i = (roundoff_bits - 1) / GMP_LIMB_BITS - 1; i >= 0; i--)
 758                                   if (sum[i] != 0)
 759                                     {
 760                                       halfway = false;
 761                                       break;
 762                                     }
 763                               }
 764                             if (halfway)
 765                               /* Round to even.  Test bit roundoff_bits.  */
 766                               round_up = ((sum[roundoff_bits / GMP_LIMB_BITS]
 767                                           >> (roundoff_bits % GMP_LIMB_BITS)) & 1);
 768                             else
 769                               /* Round up.  */
 770                               round_up = 1;
 771                           }
 772                       }
 773                   }
 774                   /* Perform the rounding.  */
 775                   {
 776                     size_t i = roundoff_bits / GMP_LIMB_BITS;
 777                     {
 778                       size_t j = i;
 779                       while (j > 0)
 780                         sum[--j] = 0;
 781                     }
 782                     if (round_up)
 783                       {
 784                         /* Round up.  */
 785                         sum[i] =
 786                           (sum[i]
 787                            | (((mp_limb_t) 1 << (roundoff_bits % GMP_LIMB_BITS)) - 1))
 788                           + 1;
 789                         if (sum[i] == 0)
 790                           {
 791                             /* Propagate carry.  */
 792                             while (i < sum_len - 1)
 793                               {
 794                                 ++i;
 795                                 sum[i] += 1;
 796                                 if (sum[i] != 0)
 797                                   break;
 798                               }
 799                           }
 800                         /* sum[i] is the most significant limb that was
 801                            incremented.  */
 802                         if (i == sum_len - 1 && (sum[i] & (sum[i] - 1)) == 0)
 803                           {
 804                             /* Through the carry, one more bit is needed.  */
 805                             if (sum[i] != 0)
 806                               sum_bits += 1;
 807                             else
 808                               {
 809                                 /* Instead of requiring one more limb of memory,
 810                                    perform a shift by one bit, and adjust the
 811                                    exponent.  */
 812                                 sum[i] = (mp_limb_t) 1 << (GMP_LIMB_BITS - 1);
 813                                 e += 1;
 814                               }
 815                             /* The bit sequence has the form 1000...000.  */
 816                             keep_bits = 1;
 817                           }
 818                       }
 819                     else
 820                       {
 821                         /* Round down.  */
 822                         sum[i] &= ((mp_limb_t) -1 << (roundoff_bits % GMP_LIMB_BITS));
 823                         if (i == sum_len - 1 && sum[i] == 0)
 824                           /* The entire sum has become zero.  */
 825                           return L_(0.0);
 826                       }
 827                   }
 828                 }
 829               /* The result is
 830                    (-1)^sign * 2^e * sum
 831                  and here we know that
 832                    2^(sum_bits-1) <= sum < 2^sum_bits,
 833                  and sum is a multiple of 2^(sum_bits-keep_bits), where
 834                    0 < keep_bits <= MANT_BIT  and  keep_bits <= sum_bits.
 835                  (If keep_bits was initially 0, the rounding either returned zero
 836                  or produced a bit sequence of the form 1000...000, setting
 837                  keep_bits to 1.)  */
 838               {
 839                 /* Split the keep_bits bits into chunks of at most 32 bits.  */
 840                 unsigned int chunk_count = (keep_bits - 1) / GMP_LIMB_BITS + 1;
 841                 /* 1 <= chunk_count <= ceil (sum_bits / GMP_LIMB_BITS) = sum_len. */
 842                 static const DOUBLE chunk_multiplier = /* 2^-GMP_LIMB_BITS */
 843                   L_(1.0) / ((DOUBLE) (1 << (GMP_LIMB_BITS / 2))
 844                              * (DOUBLE) (1 << ((GMP_LIMB_BITS + 1) / 2)));
 845                 unsigned int shift = sum_bits % GMP_LIMB_BITS;
 846                 DOUBLE fsum;
 847                 if (MANT_BIT <= GMP_LIMB_BITS)
 848                   {
 849                     /* Since keep_bits <= MANT_BIT <= GMP_LIMB_BITS,
 850                        chunk_count is 1.  No need for a loop.  */
 851                     if (shift == 0)
 852                       fsum = (DOUBLE) sum[sum_len - 1];
 853                     else
 854                       fsum = (DOUBLE)
 855                              ((sum[sum_len - 1] << (GMP_LIMB_BITS - shift))
 856                               | (sum_len >= 2 ? sum[sum_len - 2] >> shift : 0));
 857                   }
 858                 else
 859                   {
 860                     int k;
 861                     k = chunk_count - 1;
 862                     if (shift == 0)
 863                       {
 864                         /* First loop round.  */
 865                         fsum = (DOUBLE) sum[sum_len - k - 1];
 866                         /* General loop.  */
 867                         while (--k >= 0)
 868                           {
 869                             fsum *= chunk_multiplier;
 870                             fsum += (DOUBLE) sum[sum_len - k - 1];
 871                           }
 872                       }
 873                     else
 874                       {
 875                         /* First loop round.  */
 876                         {
 877                           VOLATILE mp_limb_t chunk =
 878                             (sum[sum_len - k - 1] << (GMP_LIMB_BITS - shift))
 879                             | (sum_len >= k + 2 ? sum[sum_len - k - 2] >> shift : 0);
 880                           fsum = (DOUBLE) chunk;
 881                         }
 882                         /* General loop.  */
 883                         while (--k >= 0)
 884                           {
 885                             fsum *= chunk_multiplier;
 886                             {
 887                               VOLATILE mp_limb_t chunk =
 888                                 (sum[sum_len - k - 1] << (GMP_LIMB_BITS - shift))
 889                                 | (sum[sum_len - k - 2] >> shift);
 890                               fsum += (DOUBLE) chunk;
 891                             }
 892                           }
 893                       }
 894                   }
 895                 fsum = LDEXP (fsum, e + (int) sum_bits - GMP_LIMB_BITS);
 896                 return (sign ? - fsum : fsum);
 897               }
 898             }
 899           }
 900         }
 901       else
 902         return z;
 903     }
 904   else
 905     return (x * y) + z;
 906 }