lib/fma.c

   1 /* Fused multiply-add.
   2    Copyright (C) 2007, 2009, 2011-2020 Free Software Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation; either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  16
  17 /* Written by Bruno Haible <bruno@clisp.org>, 2011.  */
  18
  19 #if ! defined USE_LONG_DOUBLE
  20 # include <config.h>
  21 #endif
  22
  23 /* Specification.  */
  24 #include <math.h>
  25
  26 #include <limits.h>
  27 #include <stdbool.h>
  28 #include <stdlib.h>
  29
  30 #if HAVE_FEGETROUND
  31 # include <fenv.h>
  32 #endif
  33
  34 #include "float+.h"
  35 #include "integer_length.h"
  36 #include "verify.h"
  37
  38 #ifdef USE_LONG_DOUBLE
  39 # define FUNC fmal
  40 # define DOUBLE long double
  41 # define FREXP frexpl
  42 # define LDEXP ldexpl
  43 # define MIN_EXP LDBL_MIN_EXP
  44 # define MANT_BIT LDBL_MANT_BIT
  45 # define L_(literal) literal##L
  46 #elif ! defined USE_FLOAT
  47 # define FUNC fma
  48 # define DOUBLE double
  49 # define FREXP frexp
  50 # define LDEXP ldexp
  51 # define MIN_EXP DBL_MIN_EXP
  52 # define MANT_BIT DBL_MANT_BIT
  53 # define L_(literal) literal
  54 #else /* defined USE_FLOAT */
  55 # define FUNC fmaf
  56 # define DOUBLE float
  57 # define FREXP frexpf
  58 # define LDEXP ldexpf
  59 # define MIN_EXP FLT_MIN_EXP
  60 # define MANT_BIT FLT_MANT_BIT
  61 # define L_(literal) literal##f
  62 #endif
  63
  64 #undef MAX
  65 #define MAX(a,b) ((a) > (b) ? (a) : (b))
  66
  67 #undef MIN
  68 #define MIN(a,b) ((a) < (b) ? (a) : (b))
  69
  70 /* MSVC with option -fp:strict refuses to compile constant initializers that
  71    contain floating-point operations.  Pacify this compiler.  */
  72 #if defined _MSC_VER && !defined __clang__
  73 # pragma fenv_access (off)
  74 #endif
  75
  76 /* Work around GCC 4.2.1 bug on OpenBSD 5.1/SPARC64.  */
  77 #if defined __GNUC__ && defined __sparc__
  78 # define VOLATILE volatile
  79 #else
  80 # define VOLATILE
  81 #endif
  82
  83 /* It is possible to write an implementation of fused multiply-add with
  84    floating-point operations alone.  See
  85      Sylvie Boldo, Guillaume Melquiond:
  86      Emulation of FMA and correctly-rounded sums: proved algorithms using
  87      rounding to odd.
  88      <https://www.lri.fr/~melquion/doc/08-tc.pdf>
  89    But is it complicated.
  90    Here we take the simpler (and probably slower) approach of doing
  91    multi-precision arithmetic.  */
  92
  93 /* We use the naming conventions of GNU gmp, but vastly simpler (and slower)
  94    algorithms.  */
  95
  96 typedef unsigned int mp_limb_t;
  97 #define GMP_LIMB_BITS 32
  98 verify (sizeof (mp_limb_t) * CHAR_BIT == GMP_LIMB_BITS);
  99
 100 typedef unsigned long long mp_twolimb_t;
 101 #define GMP_TWOLIMB_BITS 64
 102 verify (sizeof (mp_twolimb_t) * CHAR_BIT == GMP_TWOLIMB_BITS);
 103
 104 /* Number of limbs needed for a single DOUBLE.  */
 105 #define NLIMBS1 ((MANT_BIT + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS)
 106
 107 /* Number of limbs needed for the accumulator.  */
 108 #define NLIMBS3 (3 * NLIMBS1 + 1)
 109
 110 /* Assuming 0.5 <= x < 1.0:
 111    Convert the mantissa (x * 2^DBL_MANT_BIT) to a sequence of limbs.  */
 112 static void
 113 decode (DOUBLE x, mp_limb_t limbs[NLIMBS1])
 114 {
 115   /* I'm not sure whether it's safe to cast a 'double' value between
 116      2^31 and 2^32 to 'unsigned int', therefore play safe and cast only
 117      'double' values between 0 and 2^31 (to 'unsigned int' or 'int',
 118      doesn't matter).
 119      So, we split the MANT_BIT bits of x into a number of chunks of
 120      at most 31 bits.  */
 121   enum { chunk_count = (MANT_BIT - 1) / 31 + 1 };
 122   /* Variables used for storing the bits limb after limb.  */
 123   mp_limb_t *p = limbs + NLIMBS1 - 1;
 124   mp_limb_t accu = 0;
 125   unsigned int bits_needed = MANT_BIT - (NLIMBS1 - 1) * GMP_LIMB_BITS;
 126   /* The bits bits_needed-1...0 need to be ORed into the accu.
 127      1 <= bits_needed <= GMP_LIMB_BITS.  */
 128   /* Unroll the first 4 loop rounds.  */
 129   if (chunk_count > 0)
 130     {
 131       /* Here we still have MANT_BIT-0*31 bits to extract from x.  */
 132       enum { chunk_bits = MIN (31, MANT_BIT - 0 * 31) }; /* > 0, <= 31 */
 133       mp_limb_t d;
 134       x *= (mp_limb_t) 1 << chunk_bits;
 135       d = (int) x; /* 0 <= d < 2^chunk_bits.  */
 136       x -= d;
 137       if (!(x >= L_(0.0) && x < L_(1.0)))
 138         abort ();
 139       if (bits_needed < chunk_bits)
 140         {
 141           /* store bits_needed bits */
 142           accu |= d >> (chunk_bits - bits_needed);
 143           *p = accu;
 144           if (p == limbs)
 145             goto done;
 146           p--;
 147           /* hold (chunk_bits - bits_needed) bits */
 148           accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed));
 149           bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed);
 150         }
 151       else
 152         {
 153           /* store chunk_bits bits */
 154           accu |= d << (bits_needed - chunk_bits);
 155           bits_needed -= chunk_bits;
 156           if (bits_needed == 0)
 157             {
 158               *p = accu;
 159               if (p == limbs)
 160                 goto done;
 161               p--;
 162               accu = 0;
 163               bits_needed = GMP_LIMB_BITS;
 164             }
 165         }
 166     }
 167   if (chunk_count > 1)
 168     {
 169       /* Here we still have MANT_BIT-1*31 bits to extract from x.  */
 170       enum { chunk_bits = MIN (31, MAX (MANT_BIT - 1 * 31, 0)) }; /* > 0, <= 31 */
 171       mp_limb_t d;
 172       x *= (mp_limb_t) 1 << chunk_bits;
 173       d = (int) x; /* 0 <= d < 2^chunk_bits.  */
 174       x -= d;
 175       if (!(x >= L_(0.0) && x < L_(1.0)))
 176         abort ();
 177       if (bits_needed < chunk_bits)
 178         {
 179           /* store bits_needed bits */
 180           accu |= d >> (chunk_bits - bits_needed);
 181           *p = accu;
 182           if (p == limbs)
 183             goto done;
 184           p--;
 185           /* hold (chunk_bits - bits_needed) bits */
 186           accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed));
 187           bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed);
 188         }
 189       else
 190         {
 191           /* store chunk_bits bits */
 192           accu |= d << (bits_needed - chunk_bits);
 193           bits_needed -= chunk_bits;
 194           if (bits_needed == 0)
 195             {
 196               *p = accu;
 197               if (p == limbs)
 198                 goto done;
 199               p--;
 200               accu = 0;
 201               bits_needed = GMP_LIMB_BITS;
 202             }
 203         }
 204     }
 205   if (chunk_count > 2)
 206     {
 207       /* Here we still have MANT_BIT-2*31 bits to extract from x.  */
 208       enum { chunk_bits = MIN (31, MAX (MANT_BIT - 2 * 31, 0)) }; /* > 0, <= 31 */
 209       mp_limb_t d;
 210       x *= (mp_limb_t) 1 << chunk_bits;
 211       d = (int) x; /* 0 <= d < 2^chunk_bits.  */
 212       x -= d;
 213       if (!(x >= L_(0.0) && x < L_(1.0)))
 214         abort ();
 215       if (bits_needed < chunk_bits)
 216         {
 217           /* store bits_needed bits */
 218           accu |= d >> (chunk_bits - bits_needed);
 219           *p = accu;
 220           if (p == limbs)
 221             goto done;
 222           p--;
 223           /* hold (chunk_bits - bits_needed) bits */
 224           accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed));
 225           bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed);
 226         }
 227       else
 228         {
 229           /* store chunk_bits bits */
 230           accu |= d << (bits_needed - chunk_bits);
 231           bits_needed -= chunk_bits;
 232           if (bits_needed == 0)
 233             {
 234               *p = accu;
 235               if (p == limbs)
 236                 goto done;
 237               p--;
 238               accu = 0;
 239               bits_needed = GMP_LIMB_BITS;
 240             }
 241         }
 242     }
 243   if (chunk_count > 3)
 244     {
 245       /* Here we still have MANT_BIT-3*31 bits to extract from x.  */
 246       enum { chunk_bits = MIN (31, MAX (MANT_BIT - 3 * 31, 0)) }; /* > 0, <= 31 */
 247       mp_limb_t d;
 248       x *= (mp_limb_t) 1 << chunk_bits;
 249       d = (int) x; /* 0 <= d < 2^chunk_bits.  */
 250       x -= d;
 251       if (!(x >= L_(0.0) && x < L_(1.0)))
 252         abort ();
 253       if (bits_needed < chunk_bits)
 254         {
 255           /* store bits_needed bits */
 256           accu |= d >> (chunk_bits - bits_needed);
 257           *p = accu;
 258           if (p == limbs)
 259             goto done;
 260           p--;
 261           /* hold (chunk_bits - bits_needed) bits */
 262           accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed));
 263           bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed);
 264         }
 265       else
 266         {
 267           /* store chunk_bits bits */
 268           accu |= d << (bits_needed - chunk_bits);
 269           bits_needed -= chunk_bits;
 270           if (bits_needed == 0)
 271             {
 272               *p = accu;
 273               if (p == limbs)
 274                 goto done;
 275               p--;
 276               accu = 0;
 277               bits_needed = GMP_LIMB_BITS;
 278             }
 279         }
 280     }
 281   if (chunk_count > 4)
 282     {
 283       /* Here we still have MANT_BIT-4*31 bits to extract from x.  */
 284       /* Generic loop.  */
 285       size_t k;
 286       for (k = 4; k < chunk_count; k++)
 287         {
 288           size_t chunk_bits = MIN (31, MANT_BIT - k * 31); /* > 0, <= 31 */
 289           mp_limb_t d;
 290           x *= (mp_limb_t) 1 << chunk_bits;
 291           d = (int) x; /* 0 <= d < 2^chunk_bits.  */
 292           x -= d;
 293           if (!(x >= L_(0.0) && x < L_(1.0)))
 294             abort ();
 295           if (bits_needed < chunk_bits)
 296             {
 297               /* store bits_needed bits */
 298               accu |= d >> (chunk_bits - bits_needed);
 299               *p = accu;
 300               if (p == limbs)
 301                 goto done;
 302               p--;
 303               /* hold (chunk_bits - bits_needed) bits */
 304               accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed));
 305               bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed);
 306             }
 307           else
 308             {
 309               /* store chunk_bits bits */
 310               accu |= d << (bits_needed - chunk_bits);
 311               bits_needed -= chunk_bits;
 312               if (bits_needed == 0)
 313                 {
 314                   *p = accu;
 315                   if (p == limbs)
 316                     goto done;
 317                   p--;
 318                   accu = 0;
 319                   bits_needed = GMP_LIMB_BITS;
 320                 }
 321             }
 322         }
 323     }
 324   /* We shouldn't get here.  */
 325   abort ();
 326
 327  done: ;
 328 #ifndef USE_LONG_DOUBLE /* On FreeBSD 6.1/x86, 'long double' numbers sometimes
 329                            have excess precision.  */
 330   if (!(x == L_(0.0)))
 331     abort ();
 332 #endif
 333 }
 334
 335 /* Multiply two sequences of limbs.  */
 336 static void
 337 multiply (mp_limb_t xlimbs[NLIMBS1], mp_limb_t ylimbs[NLIMBS1],
 338           mp_limb_t prod_limbs[2 * NLIMBS1])
 339 {
 340   size_t k, i, j;
 341   enum { len1 = NLIMBS1 };
 342   enum { len2 = NLIMBS1 };
 343
 344   for (k = len2; k > 0; )
 345     prod_limbs[--k] = 0;
 346   for (i = 0; i < len1; i++)
 347     {
 348       mp_limb_t digit1 = xlimbs[i];
 349       mp_twolimb_t carry = 0;
 350       for (j = 0; j < len2; j++)
 351         {
 352           mp_limb_t digit2 = ylimbs[j];
 353           carry += (mp_twolimb_t) digit1 * (mp_twolimb_t) digit2;
 354           carry += prod_limbs[i + j];
 355           prod_limbs[i + j] = (mp_limb_t) carry;
 356           carry = carry >> GMP_LIMB_BITS;
 357         }
 358       prod_limbs[i + len2] = (mp_limb_t) carry;
 359     }
 360 }
 361
 362 DOUBLE
 363 FUNC (DOUBLE x, DOUBLE y, DOUBLE z)
 364 {
 365   if (isfinite (x) && isfinite (y))
 366     {
 367       if (isfinite (z))
 368         {
 369           /* x, y, z are all finite.  */
 370           if (x == L_(0.0) || y == L_(0.0))
 371             return z;
 372           if (z == L_(0.0))
 373             return x * y;
 374           /* x, y, z are all non-zero.
 375              The result is x * y + z.  */
 376           {
 377             int e;                  /* exponent of x * y + z */
 378             int sign;
 379             mp_limb_t sum[NLIMBS3];
 380             size_t sum_len;
 381
 382             {
 383               int xys;                /* sign of x * y */
 384               int zs;                 /* sign of z */
 385               int xye;                /* sum of exponents of x and y */
 386               int ze;                 /* exponent of z */
 387               mp_limb_t summand1[NLIMBS3];
 388               size_t summand1_len;
 389               mp_limb_t summand2[NLIMBS3];
 390               size_t summand2_len;
 391
 392               {
 393                 mp_limb_t zlimbs[NLIMBS1];
 394                 mp_limb_t xylimbs[2 * NLIMBS1];
 395
 396                 {
 397                   DOUBLE xn;              /* normalized part of x */
 398                   DOUBLE yn;              /* normalized part of y */
 399                   DOUBLE zn;              /* normalized part of z */
 400                   int xe;                 /* exponent of x */
 401                   int ye;                 /* exponent of y */
 402                   mp_limb_t xlimbs[NLIMBS1];
 403                   mp_limb_t ylimbs[NLIMBS1];
 404
 405                   xys = 0;
 406                   xn = x;
 407                   if (x < 0)
 408                     {
 409                       xys = 1;
 410                       xn = - x;
 411                     }
 412                   yn = y;
 413                   if (y < 0)
 414                     {
 415                       xys = 1 - xys;
 416                       yn = - y;
 417                     }
 418
 419                   zs = 0;
 420                   zn = z;
 421                   if (z < 0)
 422                     {
 423                       zs = 1;
 424                       zn = - z;
 425                     }
 426
 427                   /* xn, yn, zn are all positive.
 428                      The result is (-1)^xys * xn * yn + (-1)^zs * zn.  */
 429                   xn = FREXP (xn, &xe);
 430                   yn = FREXP (yn, &ye);
 431                   zn = FREXP (zn, &ze);
 432                   xye = xe + ye;
 433                   /* xn, yn, zn are all < 1.0 and >= 0.5.
 434                      The result is
 435                        (-1)^xys * 2^xye * xn * yn + (-1)^zs * 2^ze * zn.  */
 436                   if (xye < ze - MANT_BIT)
 437                     {
 438                       /* 2^xye * xn * yn < 2^xye <= 2^(ze-MANT_BIT-1)  */
 439                       return z;
 440                     }
 441                   if (xye - 2 * MANT_BIT > ze)
 442                     {
 443                       /* 2^ze * zn < 2^ze <= 2^(xye-2*MANT_BIT-1).
 444                          We cannot simply do
 445                            return x * y;
 446                          because it would round differently: A round-to-even
 447                          in the multiplication can be a round-up or round-down
 448                          here, due to z.  So replace z with a value that doesn't
 449                          require the use of long bignums but that rounds the
 450                          same way.  */
 451                       zn = L_(0.5);
 452                       ze = xye - 2 * MANT_BIT - 1;
 453                     }
 454                   /* Convert mantissas of xn, yn, zn to limb sequences:
 455                      xlimbs = 2^MANT_BIT * xn
 456                      ylimbs = 2^MANT_BIT * yn
 457                      zlimbs = 2^MANT_BIT * zn  */
 458                   decode (xn, xlimbs);
 459                   decode (yn, ylimbs);
 460                   decode (zn, zlimbs);
 461                   /* Multiply the mantissas of xn and yn:
 462                      xylimbs = xlimbs * ylimbs  */
 463                   multiply (xlimbs, ylimbs, xylimbs);
 464                 }
 465                 /* The result is
 466                      (-1)^xys * 2^(xye-2*MANT_BIT) * xylimbs
 467                      + (-1)^zs * 2^(ze-MANT_BIT) * zlimbs.
 468                    Compute
 469                      e = min (xye-2*MANT_BIT, ze-MANT_BIT)
 470                    then
 471                      summand1 = 2^(xye-2*MANT_BIT-e) * xylimbs
 472                      summand2 = 2^(ze-MANT_BIT-e) * zlimbs  */
 473                 e = MIN (xye - 2 * MANT_BIT, ze - MANT_BIT);
 474                 if (e == xye - 2 * MANT_BIT)
 475                   {
 476                     /* Simply copy the limbs of xylimbs.  */
 477                     size_t i;
 478                     for (i = 0; i < 2 * NLIMBS1; i++)
 479                       summand1[i] = xylimbs[i];
 480                     summand1_len = 2 * NLIMBS1;
 481                   }
 482                 else
 483                   {
 484                     size_t ediff = xye - 2 * MANT_BIT - e;
 485                     /* Left shift the limbs of xylimbs by ediff bits.  */
 486                     size_t ldiff = ediff / GMP_LIMB_BITS;
 487                     size_t shift = ediff % GMP_LIMB_BITS;
 488                     size_t i;
 489                     for (i = 0; i < ldiff; i++)
 490                       summand1[i] = 0;
 491                     if (shift > 0)
 492                       {
 493                         mp_limb_t carry = 0;
 494                         for (i = 0; i < 2 * NLIMBS1; i++)
 495                           {
 496                             summand1[ldiff + i] = (xylimbs[i] << shift) | carry;
 497                             carry = xylimbs[i] >> (GMP_LIMB_BITS - shift);
 498                           }
 499                         summand1[ldiff + 2 * NLIMBS1] = carry;
 500                         summand1_len = ldiff + 2 * NLIMBS1 + 1;
 501                       }
 502                     else
 503                       {
 504                         for (i = 0; i < 2 * NLIMBS1; i++)
 505                           summand1[ldiff + i] = xylimbs[i];
 506                         summand1_len = ldiff + 2 * NLIMBS1;
 507                       }
 508                     /* Estimation of needed array size:
 509                        ediff = (xye - 2 * MANT_BIT) - (ze - MANT_BIT) <= MANT_BIT + 1
 510                        therefore
 511                        summand1_len
 512                          = (ediff + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS + 2 * NLIMBS1
 513                          <= (MANT_BIT + GMP_LIMB_BITS) / GMP_LIMB_BITS + 2 * NLIMBS1
 514                          <= 3 * NLIMBS1 + 1
 515                          = NLIMBS3  */
 516                     if (!(summand1_len <= NLIMBS3))
 517                       abort ();
 518                   }
 519                 if (e == ze - MANT_BIT)
 520                   {
 521                     /* Simply copy the limbs of zlimbs.  */
 522                     size_t i;
 523                     for (i = 0; i < NLIMBS1; i++)
 524                       summand2[i] = zlimbs[i];
 525                     summand2_len = NLIMBS1;
 526                   }
 527                 else
 528                   {
 529                     size_t ediff = ze - MANT_BIT - e;
 530                     /* Left shift the limbs of zlimbs by ediff bits.  */
 531                     size_t ldiff = ediff / GMP_LIMB_BITS;
 532                     size_t shift = ediff % GMP_LIMB_BITS;
 533                     size_t i;
 534                     for (i = 0; i < ldiff; i++)
 535                       summand2[i] = 0;
 536                     if (shift > 0)
 537                       {
 538                         mp_limb_t carry = 0;
 539                         for (i = 0; i < NLIMBS1; i++)
 540                           {
 541                             summand2[ldiff + i] = (zlimbs[i] << shift) | carry;
 542                             carry = zlimbs[i] >> (GMP_LIMB_BITS - shift);
 543                           }
 544                         summand2[ldiff + NLIMBS1] = carry;
 545                         summand2_len = ldiff + NLIMBS1 + 1;
 546                       }
 547                     else
 548                       {
 549                         for (i = 0; i < NLIMBS1; i++)
 550                           summand2[ldiff + i] = zlimbs[i];
 551                         summand2_len = ldiff + NLIMBS1;
 552                       }
 553                     /* Estimation of needed array size:
 554                        ediff = (ze - MANT_BIT) - (xye - 2 * MANT_BIT) <= 2 * MANT_BIT
 555                        therefore
 556                        summand2_len
 557                          = (ediff + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS + NLIMBS1
 558                          <= (2 * MANT_BIT + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS + NLIMBS1
 559                          <= 3 * NLIMBS1 + 1
 560                          = NLIMBS3  */
 561                     if (!(summand2_len <= NLIMBS3))
 562                       abort ();
 563                   }
 564               }
 565               /* The result is
 566                    (-1)^xys * 2^e * summand1 + (-1)^zs * 2^e * summand2.  */
 567               if (xys == zs)
 568                 {
 569                   /* Perform an addition.  */
 570                   size_t i;
 571                   mp_limb_t carry;
 572
 573                   sign = xys;
 574                   carry = 0;
 575                   for (i = 0; i < MIN (summand1_len, summand2_len); i++)
 576                     {
 577                       mp_limb_t digit1 = summand1[i];
 578                       mp_limb_t digit2 = summand2[i];
 579                       sum[i] = digit1 + digit2 + carry;
 580                       carry = (carry
 581                                ? digit1 >= (mp_limb_t)-1 - digit2
 582                                : digit1 > (mp_limb_t)-1 - digit2);
 583                     }
 584                   if (summand1_len > summand2_len)
 585                     for (; i < summand1_len; i++)
 586                       {
 587                         mp_limb_t digit1 = summand1[i];
 588                         sum[i] = carry + digit1;
 589                         carry = carry && digit1 == (mp_limb_t)-1;
 590                       }
 591                   else
 592                     for (; i < summand2_len; i++)
 593                       {
 594                         mp_limb_t digit2 = summand2[i];
 595                         sum[i] = carry + digit2;
 596                         carry = carry && digit2 == (mp_limb_t)-1;
 597                       }
 598                   if (carry)
 599                     sum[i++] = carry;
 600                   sum_len = i;
 601                 }
 602               else
 603                 {
 604                   /* Perform a subtraction.  */
 605                   /* Compare summand1 and summand2 by magnitude.  */
 606                   while (summand1[summand1_len - 1] == 0)
 607                     summand1_len--;
 608                   while (summand2[summand2_len - 1] == 0)
 609                     summand2_len--;
 610                   if (summand1_len > summand2_len)
 611                     sign = xys;
 612                   else if (summand1_len < summand2_len)
 613                     sign = zs;
 614                   else
 615                     {
 616                       size_t i = summand1_len;
 617                       for (;;)
 618                         {
 619                           --i;
 620                           if (summand1[i] > summand2[i])
 621                             {
 622                               sign = xys;
 623                               break;
 624                             }
 625                           if (summand1[i] < summand2[i])
 626                             {
 627                               sign = zs;
 628                               break;
 629                             }
 630                           if (i == 0)
 631                             /* summand1 and summand2 are equal.  */
 632                             return L_(0.0);
 633                         }
 634                     }
 635                   if (sign == xys)
 636                     {
 637                       /* Compute summand1 - summand2.  */
 638                       size_t i;
 639                       mp_limb_t carry;
 640
 641                       carry = 0;
 642                       for (i = 0; i < summand2_len; i++)
 643                         {
 644                           mp_limb_t digit1 = summand1[i];
 645                           mp_limb_t digit2 = summand2[i];
 646                           sum[i] = digit1 - digit2 - carry;
 647                           carry = (carry ? digit1 <= digit2 : digit1 < digit2);
 648                         }
 649                       for (; i < summand1_len; i++)
 650                         {
 651                           mp_limb_t digit1 = summand1[i];
 652                           sum[i] = digit1 - carry;
 653                           carry = carry && digit1 == 0;
 654                         }
 655                       if (carry)
 656                         abort ();
 657                       sum_len = summand1_len;
 658                     }
 659                   else
 660                     {
 661                       /* Compute summand2 - summand1.  */
 662                       size_t i;
 663                       mp_limb_t carry;
 664
 665                       carry = 0;
 666                       for (i = 0; i < summand1_len; i++)
 667                         {
 668                           mp_limb_t digit1 = summand1[i];
 669                           mp_limb_t digit2 = summand2[i];
 670                           sum[i] = digit2 - digit1 - carry;
 671                           carry = (carry ? digit2 <= digit1 : digit2 < digit1);
 672                         }
 673                       for (; i < summand2_len; i++)
 674                         {
 675                           mp_limb_t digit2 = summand2[i];
 676                           sum[i] = digit2 - carry;
 677                           carry = carry && digit2 == 0;
 678                         }
 679                       if (carry)
 680                         abort ();
 681                       sum_len = summand2_len;
 682                     }
 683                 }
 684             }
 685             /* The result is
 686                  (-1)^sign * 2^e * sum.  */
 687             /* Now perform the rounding to MANT_BIT mantissa bits.  */
 688             while (sum[sum_len - 1] == 0)
 689               sum_len--;
 690             /* Here we know that the most significant limb, sum[sum_len - 1], is
 691                non-zero.  */
 692             {
 693               /* How many bits the sum has.  */
 694               unsigned int sum_bits =
 695                 integer_length (sum[sum_len - 1]) + (sum_len - 1) * GMP_LIMB_BITS;
 696               /* How many bits to keep when rounding.  */
 697               unsigned int keep_bits;
 698               /* How many bits to round off.  */
 699               unsigned int roundoff_bits;
 700               if (e + (int) sum_bits >= MIN_EXP)
 701                 /* 2^e * sum >= 2^(MIN_EXP-1).
 702                    result will be a normalized number.  */
 703                 keep_bits = MANT_BIT;
 704               else if (e + (int) sum_bits >= MIN_EXP - MANT_BIT)
 705                 /* 2^e * sum >= 2^(MIN_EXP-MANT_BIT-1).
 706                    result will be a denormalized number or rounded to zero.  */
 707                 keep_bits = e + (int) sum_bits - (MIN_EXP + MANT_BIT);
 708               else
 709                 /* 2^e * sum < 2^(MIN_EXP-MANT_BIT-1).  Round to zero.  */
 710                 return L_(0.0);
 711               /* Note: 0 <= keep_bits <= MANT_BIT.  */
 712               if (sum_bits <= keep_bits)
 713                 {
 714                   /* Nothing to do.  */
 715                   roundoff_bits = 0;
 716                   keep_bits = sum_bits;
 717                 }
 718               else
 719                 {
 720                   int round_up;
 721                   roundoff_bits = sum_bits - keep_bits; /* > 0, <= sum_bits */
 722                   {
 723 #if HAVE_FEGETROUND && defined FE_TOWARDZERO
 724                     /* Cf. <https://pubs.opengroup.org/onlinepubs/9699919799/functions/fegetround.html> */
 725                     int rounding_mode = fegetround ();
 726                     if (rounding_mode == FE_TOWARDZERO)
 727                       round_up = 0;
 728                     else if (rounding_mode == FE_DOWNWARD)
 729                       round_up = sign;
 730                     else if (rounding_mode == FE_UPWARD)
 731                       round_up = !sign;
 732 #else
 733                     /* Cf. <https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/float.h.html> */
 734                     int rounding_mode = FLT_ROUNDS;
 735                     if (rounding_mode == 0) /* toward zero */
 736                       round_up = 0;
 737                     else if (rounding_mode == 3) /* toward negative infinity */
 738                       round_up = sign;
 739                     else if (rounding_mode == 2) /* toward positive infinity */
 740                       round_up = !sign;
 741 #endif
 742                     else
 743                       {
 744                         /* Round to nearest.  */
 745                         round_up = 0;
 746                         /* Test bit (roundoff_bits-1).  */
 747                         if ((sum[(roundoff_bits - 1) / GMP_LIMB_BITS]
 748                              >> ((roundoff_bits - 1) % GMP_LIMB_BITS)) & 1)
 749                           {
 750                             /* Test bits roundoff_bits-1 .. 0.  */
 751                             bool halfway =
 752                               ((sum[(roundoff_bits - 1) / GMP_LIMB_BITS]
 753                                 & (((mp_limb_t) 1 << ((roundoff_bits - 1) % GMP_LIMB_BITS)) - 1))
 754                                == 0);
 755                             if (halfway)
 756                               {
 757                                 int i;
 758                                 for (i = (roundoff_bits - 1) / GMP_LIMB_BITS - 1; i >= 0; i--)
 759                                   if (sum[i] != 0)
 760                                     {
 761                                       halfway = false;
 762                                       break;
 763                                     }
 764                               }
 765                             if (halfway)
 766                               /* Round to even.  Test bit roundoff_bits.  */
 767                               round_up = ((sum[roundoff_bits / GMP_LIMB_BITS]
 768                                           >> (roundoff_bits % GMP_LIMB_BITS)) & 1);
 769                             else
 770                               /* Round up.  */
 771                               round_up = 1;
 772                           }
 773                       }
 774                   }
 775                   /* Perform the rounding.  */
 776                   {
 777                     size_t i = roundoff_bits / GMP_LIMB_BITS;
 778                     {
 779                       size_t j = i;
 780                       while (j > 0)
 781                         sum[--j] = 0;
 782                     }
 783                     if (round_up)
 784                       {
 785                         /* Round up.  */
 786                         sum[i] =
 787                           (sum[i]
 788                            | (((mp_limb_t) 1 << (roundoff_bits % GMP_LIMB_BITS)) - 1))
 789                           + 1;
 790                         if (sum[i] == 0)
 791                           {
 792                             /* Propagate carry.  */
 793                             while (i < sum_len - 1)
 794                               {
 795                                 ++i;
 796                                 sum[i] += 1;
 797                                 if (sum[i] != 0)
 798                                   break;
 799                               }
 800                           }
 801                         /* sum[i] is the most significant limb that was
 802                            incremented.  */
 803                         if (i == sum_len - 1 && (sum[i] & (sum[i] - 1)) == 0)
 804                           {
 805                             /* Through the carry, one more bit is needed.  */
 806                             if (sum[i] != 0)
 807                               sum_bits += 1;
 808                             else
 809                               {
 810                                 /* Instead of requiring one more limb of memory,
 811                                    perform a shift by one bit, and adjust the
 812                                    exponent.  */
 813                                 sum[i] = (mp_limb_t) 1 << (GMP_LIMB_BITS - 1);
 814                                 e += 1;
 815                               }
 816                             /* The bit sequence has the form 1000...000.  */
 817                             keep_bits = 1;
 818                           }
 819                       }
 820                     else
 821                       {
 822                         /* Round down.  */
 823                         sum[i] &= ((mp_limb_t) -1 << (roundoff_bits % GMP_LIMB_BITS));
 824                         if (i == sum_len - 1 && sum[i] == 0)
 825                           /* The entire sum has become zero.  */
 826                           return L_(0.0);
 827                       }
 828                   }
 829                 }
 830               /* The result is
 831                    (-1)^sign * 2^e * sum
 832                  and here we know that
 833                    2^(sum_bits-1) <= sum < 2^sum_bits,
 834                  and sum is a multiple of 2^(sum_bits-keep_bits), where
 835                    0 < keep_bits <= MANT_BIT  and  keep_bits <= sum_bits.
 836                  (If keep_bits was initially 0, the rounding either returned zero
 837                  or produced a bit sequence of the form 1000...000, setting
 838                  keep_bits to 1.)  */
 839               {
 840                 /* Split the keep_bits bits into chunks of at most 32 bits.  */
 841                 unsigned int chunk_count = (keep_bits - 1) / GMP_LIMB_BITS + 1;
 842                 /* 1 <= chunk_count <= ceil (sum_bits / GMP_LIMB_BITS) = sum_len. */
 843                 static const DOUBLE chunk_multiplier = /* 2^-GMP_LIMB_BITS */
 844                   L_(1.0) / ((DOUBLE) (1 << (GMP_LIMB_BITS / 2))
 845                              * (DOUBLE) (1 << ((GMP_LIMB_BITS + 1) / 2)));
 846                 unsigned int shift = sum_bits % GMP_LIMB_BITS;
 847                 DOUBLE fsum;
 848                 if (MANT_BIT <= GMP_LIMB_BITS)
 849                   {
 850                     /* Since keep_bits <= MANT_BIT <= GMP_LIMB_BITS,
 851                        chunk_count is 1.  No need for a loop.  */
 852                     if (shift == 0)
 853                       fsum = (DOUBLE) sum[sum_len - 1];
 854                     else
 855                       fsum = (DOUBLE)
 856                              ((sum[sum_len - 1] << (GMP_LIMB_BITS - shift))
 857                               | (sum_len >= 2 ? sum[sum_len - 2] >> shift : 0));
 858                   }
 859                 else
 860                   {
 861                     int k;
 862                     k = chunk_count - 1;
 863                     if (shift == 0)
 864                       {
 865                         /* First loop round.  */
 866                         fsum = (DOUBLE) sum[sum_len - k - 1];
 867                         /* General loop.  */
 868                         while (--k >= 0)
 869                           {
 870                             fsum *= chunk_multiplier;
 871                             fsum += (DOUBLE) sum[sum_len - k - 1];
 872                           }
 873                       }
 874                     else
 875                       {
 876                         /* First loop round.  */
 877                         {
 878                           VOLATILE mp_limb_t chunk =
 879                             (sum[sum_len - k - 1] << (GMP_LIMB_BITS - shift))
 880                             | (sum_len >= k + 2 ? sum[sum_len - k - 2] >> shift : 0);
 881                           fsum = (DOUBLE) chunk;
 882                         }
 883                         /* General loop.  */
 884                         while (--k >= 0)
 885                           {
 886                             fsum *= chunk_multiplier;
 887                             {
 888                               VOLATILE mp_limb_t chunk =
 889                                 (sum[sum_len - k - 1] << (GMP_LIMB_BITS - shift))
 890                                 | (sum[sum_len - k - 2] >> shift);
 891                               fsum += (DOUBLE) chunk;
 892                             }
 893                           }
 894                       }
 895                   }
 896                 fsum = LDEXP (fsum, e + (int) sum_bits - GMP_LIMB_BITS);
 897                 return (sign ? - fsum : fsum);
 898               }
 899             }
 900           }
 901         }
 902       else
 903         return z;
 904     }
 905   else
 906     return (x * y) + z;
 907 }