lib/fma.c

   1 /* Fused multiply-add.
   2    Copyright (C) 2007, 2009, 2011-2024 Free Software Foundation, Inc.
   3
   4    This file is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU Lesser General Public License as
   6    published by the Free Software Foundation, either version 3 of the
   7    License, or (at your option) any later version.
   8
   9    This file is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU Lesser General Public License for more details.
  13
  14    You should have received a copy of the GNU Lesser General Public License
  15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  16
  17 /* Written by Bruno Haible <bruno@clisp.org>, 2011.  */
  18
  19 #if ! defined USE_LONG_DOUBLE
  20 # include <config.h>
  21 #endif
  22
  23 /* Specification.  */
  24 #include <math.h>
  25
  26 #include <limits.h>
  27 #include <stdlib.h>
  28
  29 #if HAVE_FEGETROUND
  30 # include <fenv.h>
  31 #endif
  32
  33 #include "float+.h"
  34 #include "integer_length.h"
  35
  36 #ifdef USE_LONG_DOUBLE
  37 # define FUNC fmal
  38 # define DOUBLE long double
  39 # define FREXP frexpl
  40 # define LDEXP ldexpl
  41 # define MIN_EXP LDBL_MIN_EXP
  42 # define MANT_BIT LDBL_MANT_BIT
  43 # define L_(literal) literal##L
  44 #elif ! defined USE_FLOAT
  45 # define FUNC fma
  46 # define DOUBLE double
  47 # define FREXP frexp
  48 # define LDEXP ldexp
  49 # define MIN_EXP DBL_MIN_EXP
  50 # define MANT_BIT DBL_MANT_BIT
  51 # define L_(literal) literal
  52 #else /* defined USE_FLOAT */
  53 # define FUNC fmaf
  54 # define DOUBLE float
  55 # define FREXP frexpf
  56 # define LDEXP ldexpf
  57 # define MIN_EXP FLT_MIN_EXP
  58 # define MANT_BIT FLT_MANT_BIT
  59 # define L_(literal) literal##f
  60 #endif
  61
  62 #undef MAX
  63 #define MAX(a,b) ((a) > (b) ? (a) : (b))
  64
  65 #undef MIN
  66 #define MIN(a,b) ((a) < (b) ? (a) : (b))
  67
  68 /* MSVC with option -fp:strict refuses to compile constant initializers that
  69    contain floating-point operations.  Pacify this compiler.  */
  70 #if defined _MSC_VER && !defined __clang__
  71 # pragma fenv_access (off)
  72 #endif
  73
  74 /* Work around GCC 4.2.1 bug on OpenBSD 5.1/SPARC64.  */
  75 #if defined __GNUC__ && defined __sparc__
  76 # define VOLATILE volatile
  77 #else
  78 # define VOLATILE
  79 #endif
  80
  81 /* It is possible to write an implementation of fused multiply-add with
  82    floating-point operations alone.  See
  83      Sylvie Boldo, Guillaume Melquiond:
  84      Emulation of FMA and correctly-rounded sums: proved algorithms using
  85      rounding to odd.
  86      <https://www.lri.fr/~melquion/doc/08-tc.pdf>
  87    But is it complicated.
  88    Here we take the simpler (and probably slower) approach of doing
  89    multi-precision arithmetic.  */
  90
  91 /* We use the naming conventions of GNU gmp, but vastly simpler (and slower)
  92    algorithms.  */
  93
  94 typedef unsigned int mp_limb_t;
  95 #define GMP_LIMB_BITS 32
  96 static_assert (sizeof (mp_limb_t) * CHAR_BIT == GMP_LIMB_BITS);
  97
  98 typedef unsigned long long mp_twolimb_t;
  99 #define GMP_TWOLIMB_BITS 64
 100 static_assert (sizeof (mp_twolimb_t) * CHAR_BIT == GMP_TWOLIMB_BITS);
 101
 102 /* Number of limbs needed for a single DOUBLE.  */
 103 #define NLIMBS1 ((MANT_BIT + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS)
 104
 105 /* Number of limbs needed for the accumulator.  */
 106 #define NLIMBS3 (3 * NLIMBS1 + 1)
 107
 108 /* Assuming 0.5 <= x < 1.0:
 109    Convert the mantissa (x * 2^DBL_MANT_BIT) to a sequence of limbs.  */
 110 static void
 111 decode (DOUBLE x, mp_limb_t limbs[NLIMBS1])
 112 {
 113   /* I'm not sure whether it's safe to cast a 'double' value between
 114      2^31 and 2^32 to 'unsigned int', therefore play safe and cast only
 115      'double' values between 0 and 2^31 (to 'unsigned int' or 'int',
 116      doesn't matter).
 117      So, we split the MANT_BIT bits of x into a number of chunks of
 118      at most 31 bits.  */
 119   enum { chunk_count = (MANT_BIT - 1) / 31 + 1 };
 120   /* Variables used for storing the bits limb after limb.  */
 121   mp_limb_t *p = limbs + NLIMBS1 - 1;
 122   mp_limb_t accu = 0;
 123   unsigned int bits_needed = MANT_BIT - (NLIMBS1 - 1) * GMP_LIMB_BITS;
 124   /* The bits bits_needed-1...0 need to be ORed into the accu.
 125      1 <= bits_needed <= GMP_LIMB_BITS.  */
 126   /* Unroll the first 4 loop rounds.  */
 127   if (chunk_count > 0)
 128     {
 129       /* Here we still have MANT_BIT-0*31 bits to extract from x.  */
 130       enum { chunk_bits = MIN (31, MANT_BIT - 0 * 31) }; /* > 0, <= 31 */
 131       mp_limb_t d;
 132       x *= (mp_limb_t) 1 << chunk_bits;
 133       d = (int) x; /* 0 <= d < 2^chunk_bits.  */
 134       x -= d;
 135       if (!(x >= L_(0.0) && x < L_(1.0)))
 136         abort ();
 137       if (bits_needed < chunk_bits)
 138         {
 139           /* store bits_needed bits */
 140           accu |= d >> (chunk_bits - bits_needed);
 141           *p = accu;
 142           if (p == limbs)
 143             goto done;
 144           p--;
 145           /* hold (chunk_bits - bits_needed) bits */
 146           accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed));
 147           bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed);
 148         }
 149       else
 150         {
 151           /* store chunk_bits bits */
 152           accu |= d << (bits_needed - chunk_bits);
 153           bits_needed -= chunk_bits;
 154           if (bits_needed == 0)
 155             {
 156               *p = accu;
 157               if (p == limbs)
 158                 goto done;
 159               p--;
 160               accu = 0;
 161               bits_needed = GMP_LIMB_BITS;
 162             }
 163         }
 164     }
 165   if (chunk_count > 1)
 166     {
 167       /* Here we still have MANT_BIT-1*31 bits to extract from x.  */
 168       enum { chunk_bits = MIN (31, MAX (MANT_BIT - 1 * 31, 0)) }; /* > 0, <= 31 */
 169       mp_limb_t d;
 170       x *= (mp_limb_t) 1 << chunk_bits;
 171       d = (int) x; /* 0 <= d < 2^chunk_bits.  */
 172       x -= d;
 173       if (!(x >= L_(0.0) && x < L_(1.0)))
 174         abort ();
 175       if (bits_needed < chunk_bits)
 176         {
 177           /* store bits_needed bits */
 178           accu |= d >> (chunk_bits - bits_needed);
 179           *p = accu;
 180           if (p == limbs)
 181             goto done;
 182           p--;
 183           /* hold (chunk_bits - bits_needed) bits */
 184           accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed));
 185           bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed);
 186         }
 187       else
 188         {
 189           /* store chunk_bits bits */
 190           accu |= d << (bits_needed - chunk_bits);
 191           bits_needed -= chunk_bits;
 192           if (bits_needed == 0)
 193             {
 194               *p = accu;
 195               if (p == limbs)
 196                 goto done;
 197               p--;
 198               accu = 0;
 199               bits_needed = GMP_LIMB_BITS;
 200             }
 201         }
 202     }
 203   if (chunk_count > 2)
 204     {
 205       /* Here we still have MANT_BIT-2*31 bits to extract from x.  */
 206       enum { chunk_bits = MIN (31, MAX (MANT_BIT - 2 * 31, 0)) }; /* > 0, <= 31 */
 207       mp_limb_t d;
 208       x *= (mp_limb_t) 1 << chunk_bits;
 209       d = (int) x; /* 0 <= d < 2^chunk_bits.  */
 210       x -= d;
 211       if (!(x >= L_(0.0) && x < L_(1.0)))
 212         abort ();
 213       if (bits_needed < chunk_bits)
 214         {
 215           /* store bits_needed bits */
 216           accu |= d >> (chunk_bits - bits_needed);
 217           *p = accu;
 218           if (p == limbs)
 219             goto done;
 220           p--;
 221           /* hold (chunk_bits - bits_needed) bits */
 222           accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed));
 223           bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed);
 224         }
 225       else
 226         {
 227           /* store chunk_bits bits */
 228           accu |= d << (bits_needed - chunk_bits);
 229           bits_needed -= chunk_bits;
 230           if (bits_needed == 0)
 231             {
 232               *p = accu;
 233               if (p == limbs)
 234                 goto done;
 235               p--;
 236               accu = 0;
 237               bits_needed = GMP_LIMB_BITS;
 238             }
 239         }
 240     }
 241   if (chunk_count > 3)
 242     {
 243       /* Here we still have MANT_BIT-3*31 bits to extract from x.  */
 244       enum { chunk_bits = MIN (31, MAX (MANT_BIT - 3 * 31, 0)) }; /* > 0, <= 31 */
 245       mp_limb_t d;
 246       x *= (mp_limb_t) 1 << chunk_bits;
 247       d = (int) x; /* 0 <= d < 2^chunk_bits.  */
 248       x -= d;
 249       if (!(x >= L_(0.0) && x < L_(1.0)))
 250         abort ();
 251       if (bits_needed < chunk_bits)
 252         {
 253           /* store bits_needed bits */
 254           accu |= d >> (chunk_bits - bits_needed);
 255           *p = accu;
 256           if (p == limbs)
 257             goto done;
 258           p--;
 259           /* hold (chunk_bits - bits_needed) bits */
 260           accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed));
 261           bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed);
 262         }
 263       else
 264         {
 265           /* store chunk_bits bits */
 266           accu |= d << (bits_needed - chunk_bits);
 267           bits_needed -= chunk_bits;
 268           if (bits_needed == 0)
 269             {
 270               *p = accu;
 271               if (p == limbs)
 272                 goto done;
 273               p--;
 274               accu = 0;
 275               bits_needed = GMP_LIMB_BITS;
 276             }
 277         }
 278     }
 279   if (chunk_count > 4)
 280     {
 281       /* Here we still have MANT_BIT-4*31 bits to extract from x.  */
 282       /* Generic loop.  */
 283       size_t k;
 284       for (k = 4; k < chunk_count; k++)
 285         {
 286           size_t chunk_bits = MIN (31, MANT_BIT - k * 31); /* > 0, <= 31 */
 287           mp_limb_t d;
 288           x *= (mp_limb_t) 1 << chunk_bits;
 289           d = (int) x; /* 0 <= d < 2^chunk_bits.  */
 290           x -= d;
 291           if (!(x >= L_(0.0) && x < L_(1.0)))
 292             abort ();
 293           if (bits_needed < chunk_bits)
 294             {
 295               /* store bits_needed bits */
 296               accu |= d >> (chunk_bits - bits_needed);
 297               *p = accu;
 298               if (p == limbs)
 299                 goto done;
 300               p--;
 301               /* hold (chunk_bits - bits_needed) bits */
 302               accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed));
 303               bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed);
 304             }
 305           else
 306             {
 307               /* store chunk_bits bits */
 308               accu |= d << (bits_needed - chunk_bits);
 309               bits_needed -= chunk_bits;
 310               if (bits_needed == 0)
 311                 {
 312                   *p = accu;
 313                   if (p == limbs)
 314                     goto done;
 315                   p--;
 316                   accu = 0;
 317                   bits_needed = GMP_LIMB_BITS;
 318                 }
 319             }
 320         }
 321     }
 322   /* We shouldn't get here.  */
 323   abort ();
 324
 325  done: ;
 326 #ifndef USE_LONG_DOUBLE /* On FreeBSD 6.1/x86, 'long double' numbers sometimes
 327                            have excess precision.  */
 328   if (!(x == L_(0.0)))
 329     abort ();
 330 #endif
 331 }
 332
 333 /* Multiply two sequences of limbs.  */
 334 static void
 335 multiply (mp_limb_t xlimbs[NLIMBS1], mp_limb_t ylimbs[NLIMBS1],
 336           mp_limb_t prod_limbs[2 * NLIMBS1])
 337 {
 338   size_t k, i, j;
 339   enum { len1 = NLIMBS1 };
 340   enum { len2 = NLIMBS1 };
 341
 342   for (k = len2; k > 0; )
 343     prod_limbs[--k] = 0;
 344   for (i = 0; i < len1; i++)
 345     {
 346       mp_limb_t digit1 = xlimbs[i];
 347       mp_twolimb_t carry = 0;
 348       for (j = 0; j < len2; j++)
 349         {
 350           mp_limb_t digit2 = ylimbs[j];
 351           carry += (mp_twolimb_t) digit1 * (mp_twolimb_t) digit2;
 352           carry += prod_limbs[i + j];
 353           prod_limbs[i + j] = (mp_limb_t) carry;
 354           carry = carry >> GMP_LIMB_BITS;
 355         }
 356       prod_limbs[i + len2] = (mp_limb_t) carry;
 357     }
 358 }
 359
 360 DOUBLE
 361 FUNC (DOUBLE x, DOUBLE y, DOUBLE z)
 362 {
 363   if (isfinite (x) && isfinite (y))
 364     {
 365       if (isfinite (z))
 366         {
 367           /* x, y, z are all finite.  */
 368           if (x == L_(0.0) || y == L_(0.0))
 369             return z;
 370           if (z == L_(0.0))
 371             return x * y;
 372           /* x, y, z are all non-zero.
 373              The result is x * y + z.  */
 374           {
 375             int e;                  /* exponent of x * y + z */
 376             int sign;
 377             mp_limb_t sum[NLIMBS3];
 378             size_t sum_len;
 379
 380             {
 381               int xys;                /* sign of x * y */
 382               int zs;                 /* sign of z */
 383               int xye;                /* sum of exponents of x and y */
 384               int ze;                 /* exponent of z */
 385               mp_limb_t summand1[NLIMBS3];
 386               size_t summand1_len;
 387               mp_limb_t summand2[NLIMBS3];
 388               size_t summand2_len;
 389
 390               {
 391                 mp_limb_t zlimbs[NLIMBS1];
 392                 mp_limb_t xylimbs[2 * NLIMBS1];
 393
 394                 {
 395                   DOUBLE xn;              /* normalized part of x */
 396                   DOUBLE yn;              /* normalized part of y */
 397                   DOUBLE zn;              /* normalized part of z */
 398                   int xe;                 /* exponent of x */
 399                   int ye;                 /* exponent of y */
 400                   mp_limb_t xlimbs[NLIMBS1];
 401                   mp_limb_t ylimbs[NLIMBS1];
 402
 403                   xys = 0;
 404                   xn = x;
 405                   if (x < 0)
 406                     {
 407                       xys = 1;
 408                       xn = - x;
 409                     }
 410                   yn = y;
 411                   if (y < 0)
 412                     {
 413                       xys = 1 - xys;
 414                       yn = - y;
 415                     }
 416
 417                   zs = 0;
 418                   zn = z;
 419                   if (z < 0)
 420                     {
 421                       zs = 1;
 422                       zn = - z;
 423                     }
 424
 425                   /* xn, yn, zn are all positive.
 426                      The result is (-1)^xys * xn * yn + (-1)^zs * zn.  */
 427                   xn = FREXP (xn, &xe);
 428                   yn = FREXP (yn, &ye);
 429                   zn = FREXP (zn, &ze);
 430                   xye = xe + ye;
 431                   /* xn, yn, zn are all < 1.0 and >= 0.5.
 432                      The result is
 433                        (-1)^xys * 2^xye * xn * yn + (-1)^zs * 2^ze * zn.  */
 434                   if (xye < ze - MANT_BIT)
 435                     {
 436                       /* 2^xye * xn * yn < 2^xye <= 2^(ze-MANT_BIT-1)  */
 437                       return z;
 438                     }
 439                   if (xye - 2 * MANT_BIT > ze)
 440                     {
 441                       /* 2^ze * zn < 2^ze <= 2^(xye-2*MANT_BIT-1).
 442                          We cannot simply do
 443                            return x * y;
 444                          because it would round differently: A round-to-even
 445                          in the multiplication can be a round-up or round-down
 446                          here, due to z.  So replace z with a value that doesn't
 447                          require the use of long bignums but that rounds the
 448                          same way.  */
 449                       zn = L_(0.5);
 450                       ze = xye - 2 * MANT_BIT - 1;
 451                     }
 452                   /* Convert mantissas of xn, yn, zn to limb sequences:
 453                      xlimbs = 2^MANT_BIT * xn
 454                      ylimbs = 2^MANT_BIT * yn
 455                      zlimbs = 2^MANT_BIT * zn  */
 456                   decode (xn, xlimbs);
 457                   decode (yn, ylimbs);
 458                   decode (zn, zlimbs);
 459                   /* Multiply the mantissas of xn and yn:
 460                      xylimbs = xlimbs * ylimbs  */
 461                   multiply (xlimbs, ylimbs, xylimbs);
 462                 }
 463                 /* The result is
 464                      (-1)^xys * 2^(xye-2*MANT_BIT) * xylimbs
 465                      + (-1)^zs * 2^(ze-MANT_BIT) * zlimbs.
 466                    Compute
 467                      e = min (xye-2*MANT_BIT, ze-MANT_BIT)
 468                    then
 469                      summand1 = 2^(xye-2*MANT_BIT-e) * xylimbs
 470                      summand2 = 2^(ze-MANT_BIT-e) * zlimbs  */
 471                 e = MIN (xye - 2 * MANT_BIT, ze - MANT_BIT);
 472                 if (e == xye - 2 * MANT_BIT)
 473                   {
 474                     /* Simply copy the limbs of xylimbs.  */
 475                     size_t i;
 476                     for (i = 0; i < 2 * NLIMBS1; i++)
 477                       summand1[i] = xylimbs[i];
 478                     summand1_len = 2 * NLIMBS1;
 479                   }
 480                 else
 481                   {
 482                     size_t ediff = xye - 2 * MANT_BIT - e;
 483                     /* Left shift the limbs of xylimbs by ediff bits.  */
 484                     size_t ldiff = ediff / GMP_LIMB_BITS;
 485                     size_t shift = ediff % GMP_LIMB_BITS;
 486                     size_t i;
 487                     for (i = 0; i < ldiff; i++)
 488                       summand1[i] = 0;
 489                     if (shift > 0)
 490                       {
 491                         mp_limb_t carry = 0;
 492                         for (i = 0; i < 2 * NLIMBS1; i++)
 493                           {
 494                             summand1[ldiff + i] = (xylimbs[i] << shift) | carry;
 495                             carry = xylimbs[i] >> (GMP_LIMB_BITS - shift);
 496                           }
 497                         summand1[ldiff + 2 * NLIMBS1] = carry;
 498                         summand1_len = ldiff + 2 * NLIMBS1 + 1;
 499                       }
 500                     else
 501                       {
 502                         for (i = 0; i < 2 * NLIMBS1; i++)
 503                           summand1[ldiff + i] = xylimbs[i];
 504                         summand1_len = ldiff + 2 * NLIMBS1;
 505                       }
 506                     /* Estimation of needed array size:
 507                        ediff = (xye - 2 * MANT_BIT) - (ze - MANT_BIT) <= MANT_BIT + 1
 508                        therefore
 509                        summand1_len
 510                          = (ediff + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS + 2 * NLIMBS1
 511                          <= (MANT_BIT + GMP_LIMB_BITS) / GMP_LIMB_BITS + 2 * NLIMBS1
 512                          <= 3 * NLIMBS1 + 1
 513                          = NLIMBS3  */
 514                     if (!(summand1_len <= NLIMBS3))
 515                       abort ();
 516                   }
 517                 if (e == ze - MANT_BIT)
 518                   {
 519                     /* Simply copy the limbs of zlimbs.  */
 520                     size_t i;
 521                     for (i = 0; i < NLIMBS1; i++)
 522                       summand2[i] = zlimbs[i];
 523                     summand2_len = NLIMBS1;
 524                   }
 525                 else
 526                   {
 527                     size_t ediff = ze - MANT_BIT - e;
 528                     /* Left shift the limbs of zlimbs by ediff bits.  */
 529                     size_t ldiff = ediff / GMP_LIMB_BITS;
 530                     size_t shift = ediff % GMP_LIMB_BITS;
 531                     size_t i;
 532                     for (i = 0; i < ldiff; i++)
 533                       summand2[i] = 0;
 534                     if (shift > 0)
 535                       {
 536                         mp_limb_t carry = 0;
 537                         for (i = 0; i < NLIMBS1; i++)
 538                           {
 539                             summand2[ldiff + i] = (zlimbs[i] << shift) | carry;
 540                             carry = zlimbs[i] >> (GMP_LIMB_BITS - shift);
 541                           }
 542                         summand2[ldiff + NLIMBS1] = carry;
 543                         summand2_len = ldiff + NLIMBS1 + 1;
 544                       }
 545                     else
 546                       {
 547                         for (i = 0; i < NLIMBS1; i++)
 548                           summand2[ldiff + i] = zlimbs[i];
 549                         summand2_len = ldiff + NLIMBS1;
 550                       }
 551                     /* Estimation of needed array size:
 552                        ediff = (ze - MANT_BIT) - (xye - 2 * MANT_BIT) <= 2 * MANT_BIT
 553                        therefore
 554                        summand2_len
 555                          = (ediff + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS + NLIMBS1
 556                          <= (2 * MANT_BIT + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS + NLIMBS1
 557                          <= 3 * NLIMBS1 + 1
 558                          = NLIMBS3  */
 559                     if (!(summand2_len <= NLIMBS3))
 560                       abort ();
 561                   }
 562               }
 563               /* The result is
 564                    (-1)^xys * 2^e * summand1 + (-1)^zs * 2^e * summand2.  */
 565               if (xys == zs)
 566                 {
 567                   /* Perform an addition.  */
 568                   size_t i;
 569                   mp_limb_t carry;
 570
 571                   sign = xys;
 572                   carry = 0;
 573                   for (i = 0; i < MIN (summand1_len, summand2_len); i++)
 574                     {
 575                       mp_limb_t digit1 = summand1[i];
 576                       mp_limb_t digit2 = summand2[i];
 577                       sum[i] = digit1 + digit2 + carry;
 578                       carry = (carry
 579                                ? digit1 >= (mp_limb_t)-1 - digit2
 580                                : digit1 > (mp_limb_t)-1 - digit2);
 581                     }
 582                   if (summand1_len > summand2_len)
 583                     for (; i < summand1_len; i++)
 584                       {
 585                         mp_limb_t digit1 = summand1[i];
 586                         sum[i] = carry + digit1;
 587                         carry = carry && digit1 == (mp_limb_t)-1;
 588                       }
 589                   else
 590                     for (; i < summand2_len; i++)
 591                       {
 592                         mp_limb_t digit2 = summand2[i];
 593                         sum[i] = carry + digit2;
 594                         carry = carry && digit2 == (mp_limb_t)-1;
 595                       }
 596                   if (carry)
 597                     sum[i++] = carry;
 598                   sum_len = i;
 599                 }
 600               else
 601                 {
 602                   /* Perform a subtraction.  */
 603                   /* Compare summand1 and summand2 by magnitude.  */
 604                   while (summand1[summand1_len - 1] == 0)
 605                     summand1_len--;
 606                   while (summand2[summand2_len - 1] == 0)
 607                     summand2_len--;
 608                   if (summand1_len > summand2_len)
 609                     sign = xys;
 610                   else if (summand1_len < summand2_len)
 611                     sign = zs;
 612                   else
 613                     {
 614                       size_t i = summand1_len;
 615                       for (;;)
 616                         {
 617                           --i;
 618                           if (summand1[i] > summand2[i])
 619                             {
 620                               sign = xys;
 621                               break;
 622                             }
 623                           if (summand1[i] < summand2[i])
 624                             {
 625                               sign = zs;
 626                               break;
 627                             }
 628                           if (i == 0)
 629                             /* summand1 and summand2 are equal.  */
 630                             return L_(0.0);
 631                         }
 632                     }
 633                   if (sign == xys)
 634                     {
 635                       /* Compute summand1 - summand2.  */
 636                       size_t i;
 637                       mp_limb_t carry;
 638
 639                       carry = 0;
 640                       for (i = 0; i < summand2_len; i++)
 641                         {
 642                           mp_limb_t digit1 = summand1[i];
 643                           mp_limb_t digit2 = summand2[i];
 644                           sum[i] = digit1 - digit2 - carry;
 645                           carry = (carry ? digit1 <= digit2 : digit1 < digit2);
 646                         }
 647                       for (; i < summand1_len; i++)
 648                         {
 649                           mp_limb_t digit1 = summand1[i];
 650                           sum[i] = digit1 - carry;
 651                           carry = carry && digit1 == 0;
 652                         }
 653                       if (carry)
 654                         abort ();
 655                       sum_len = summand1_len;
 656                     }
 657                   else
 658                     {
 659                       /* Compute summand2 - summand1.  */
 660                       size_t i;
 661                       mp_limb_t carry;
 662
 663                       carry = 0;
 664                       for (i = 0; i < summand1_len; i++)
 665                         {
 666                           mp_limb_t digit1 = summand1[i];
 667                           mp_limb_t digit2 = summand2[i];
 668                           sum[i] = digit2 - digit1 - carry;
 669                           carry = (carry ? digit2 <= digit1 : digit2 < digit1);
 670                         }
 671                       for (; i < summand2_len; i++)
 672                         {
 673                           mp_limb_t digit2 = summand2[i];
 674                           sum[i] = digit2 - carry;
 675                           carry = carry && digit2 == 0;
 676                         }
 677                       if (carry)
 678                         abort ();
 679                       sum_len = summand2_len;
 680                     }
 681                 }
 682             }
 683             /* The result is
 684                  (-1)^sign * 2^e * sum.  */
 685             /* Now perform the rounding to MANT_BIT mantissa bits.  */
 686             while (sum[sum_len - 1] == 0)
 687               sum_len--;
 688             /* Here we know that the most significant limb, sum[sum_len - 1], is
 689                non-zero.  */
 690             {
 691               /* How many bits the sum has.  */
 692               unsigned int sum_bits =
 693                 integer_length (sum[sum_len - 1]) + (sum_len - 1) * GMP_LIMB_BITS;
 694               /* How many bits to keep when rounding.  */
 695               unsigned int keep_bits;
 696               /* How many bits to round off.  */
 697               unsigned int roundoff_bits;
 698               if (e + (int) sum_bits >= MIN_EXP)
 699                 /* 2^e * sum >= 2^(MIN_EXP-1).
 700                    result will be a normalized number.  */
 701                 keep_bits = MANT_BIT;
 702               else if (e + (int) sum_bits >= MIN_EXP - MANT_BIT)
 703                 /* 2^e * sum >= 2^(MIN_EXP-MANT_BIT-1).
 704                    result will be a denormalized number or rounded to zero.  */
 705                 keep_bits = e + (int) sum_bits - (MIN_EXP + MANT_BIT);
 706               else
 707                 /* 2^e * sum < 2^(MIN_EXP-MANT_BIT-1).  Round to zero.  */
 708                 return L_(0.0);
 709               /* Note: 0 <= keep_bits <= MANT_BIT.  */
 710               if (sum_bits <= keep_bits)
 711                 {
 712                   /* Nothing to do.  */
 713                   roundoff_bits = 0;
 714                   keep_bits = sum_bits;
 715                 }
 716               else
 717                 {
 718                   int round_up;
 719                   roundoff_bits = sum_bits - keep_bits; /* > 0, <= sum_bits */
 720                   {
 721 #if HAVE_FEGETROUND && defined FE_TOWARDZERO
 722                     /* Cf. <https://pubs.opengroup.org/onlinepubs/9699919799/functions/fegetround.html> */
 723                     int rounding_mode = fegetround ();
 724                     if (rounding_mode == FE_TOWARDZERO)
 725                       round_up = 0;
 726 # if defined FE_DOWNWARD /* not defined on sh4 */
 727                     else if (rounding_mode == FE_DOWNWARD)
 728                       round_up = sign;
 729 # endif
 730 # if defined FE_UPWARD /* not defined on sh4 */
 731                     else if (rounding_mode == FE_UPWARD)
 732                       round_up = !sign;
 733 # endif
 734 #else
 735                     /* Cf. <https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/float.h.html> */
 736                     int rounding_mode = FLT_ROUNDS;
 737                     if (rounding_mode == 0) /* toward zero */
 738                       round_up = 0;
 739                     else if (rounding_mode == 3) /* toward negative infinity */
 740                       round_up = sign;
 741                     else if (rounding_mode == 2) /* toward positive infinity */
 742                       round_up = !sign;
 743 #endif
 744                     else
 745                       {
 746                         /* Round to nearest.  */
 747                         round_up = 0;
 748                         /* Test bit (roundoff_bits-1).  */
 749                         if ((sum[(roundoff_bits - 1) / GMP_LIMB_BITS]
 750                              >> ((roundoff_bits - 1) % GMP_LIMB_BITS)) & 1)
 751                           {
 752                             /* Test bits roundoff_bits-1 .. 0.  */
 753                             bool halfway =
 754                               ((sum[(roundoff_bits - 1) / GMP_LIMB_BITS]
 755                                 & (((mp_limb_t) 1 << ((roundoff_bits - 1) % GMP_LIMB_BITS)) - 1))
 756                                == 0);
 757                             if (halfway)
 758                               {
 759                                 int i;
 760                                 for (i = (roundoff_bits - 1) / GMP_LIMB_BITS - 1; i >= 0; i--)
 761                                   if (sum[i] != 0)
 762                                     {
 763                                       halfway = false;
 764                                       break;
 765                                     }
 766                               }
 767                             if (halfway)
 768                               /* Round to even.  Test bit roundoff_bits.  */
 769                               round_up = ((sum[roundoff_bits / GMP_LIMB_BITS]
 770                                           >> (roundoff_bits % GMP_LIMB_BITS)) & 1);
 771                             else
 772                               /* Round up.  */
 773                               round_up = 1;
 774                           }
 775                       }
 776                   }
 777                   /* Perform the rounding.  */
 778                   {
 779                     size_t i = roundoff_bits / GMP_LIMB_BITS;
 780                     {
 781                       size_t j = i;
 782                       while (j > 0)
 783                         sum[--j] = 0;
 784                     }
 785                     if (round_up)
 786                       {
 787                         /* Round up.  */
 788                         sum[i] =
 789                           (sum[i]
 790                            | (((mp_limb_t) 1 << (roundoff_bits % GMP_LIMB_BITS)) - 1))
 791                           + 1;
 792                         if (sum[i] == 0)
 793                           {
 794                             /* Propagate carry.  */
 795                             while (i < sum_len - 1)
 796                               {
 797                                 ++i;
 798                                 sum[i] += 1;
 799                                 if (sum[i] != 0)
 800                                   break;
 801                               }
 802                           }
 803                         /* sum[i] is the most significant limb that was
 804                            incremented.  */
 805                         if (i == sum_len - 1 && (sum[i] & (sum[i] - 1)) == 0)
 806                           {
 807                             /* Through the carry, one more bit is needed.  */
 808                             if (sum[i] != 0)
 809                               sum_bits += 1;
 810                             else
 811                               {
 812                                 /* Instead of requiring one more limb of memory,
 813                                    perform a shift by one bit, and adjust the
 814                                    exponent.  */
 815                                 sum[i] = (mp_limb_t) 1 << (GMP_LIMB_BITS - 1);
 816                                 e += 1;
 817                               }
 818                             /* The bit sequence has the form 1000...000.  */
 819                             keep_bits = 1;
 820                           }
 821                       }
 822                     else
 823                       {
 824                         /* Round down.  */
 825                         sum[i] &= ((mp_limb_t) -1 << (roundoff_bits % GMP_LIMB_BITS));
 826                         if (i == sum_len - 1 && sum[i] == 0)
 827                           /* The entire sum has become zero.  */
 828                           return L_(0.0);
 829                       }
 830                   }
 831                 }
 832               /* The result is
 833                    (-1)^sign * 2^e * sum
 834                  and here we know that
 835                    2^(sum_bits-1) <= sum < 2^sum_bits,
 836                  and sum is a multiple of 2^(sum_bits-keep_bits), where
 837                    0 < keep_bits <= MANT_BIT  and  keep_bits <= sum_bits.
 838                  (If keep_bits was initially 0, the rounding either returned zero
 839                  or produced a bit sequence of the form 1000...000, setting
 840                  keep_bits to 1.)  */
 841               {
 842                 /* Split the keep_bits bits into chunks of at most 32 bits.  */
 843                 unsigned int chunk_count = (keep_bits - 1) / GMP_LIMB_BITS + 1;
 844                 /* 1 <= chunk_count <= ceil (sum_bits / GMP_LIMB_BITS) = sum_len. */
 845                 static const DOUBLE chunk_multiplier = /* 2^-GMP_LIMB_BITS */
 846                   L_(1.0) / ((DOUBLE) (1 << (GMP_LIMB_BITS / 2))
 847                              * (DOUBLE) (1 << ((GMP_LIMB_BITS + 1) / 2)));
 848                 unsigned int shift = sum_bits % GMP_LIMB_BITS;
 849                 DOUBLE fsum;
 850                 if (MANT_BIT <= GMP_LIMB_BITS)
 851                   {
 852                     /* Since keep_bits <= MANT_BIT <= GMP_LIMB_BITS,
 853                        chunk_count is 1.  No need for a loop.  */
 854                     if (shift == 0)
 855                       fsum = (DOUBLE) sum[sum_len - 1];
 856                     else
 857                       fsum = (DOUBLE)
 858                              ((sum[sum_len - 1] << (GMP_LIMB_BITS - shift))
 859                               | (sum_len >= 2 ? sum[sum_len - 2] >> shift : 0));
 860                   }
 861                 else
 862                   {
 863                     int k;
 864                     k = chunk_count - 1;
 865                     if (shift == 0)
 866                       {
 867                         /* First loop round.  */
 868                         fsum = (DOUBLE) sum[sum_len - k - 1];
 869                         /* General loop.  */
 870                         while (--k >= 0)
 871                           {
 872                             fsum *= chunk_multiplier;
 873                             fsum += (DOUBLE) sum[sum_len - k - 1];
 874                           }
 875                       }
 876                     else
 877                       {
 878                         /* First loop round.  */
 879                         {
 880                           VOLATILE mp_limb_t chunk =
 881                             (sum[sum_len - k - 1] << (GMP_LIMB_BITS - shift))
 882                             | (sum_len >= k + 2 ? sum[sum_len - k - 2] >> shift : 0);
 883                           fsum = (DOUBLE) chunk;
 884                         }
 885                         /* General loop.  */
 886                         while (--k >= 0)
 887                           {
 888                             fsum *= chunk_multiplier;
 889                             {
 890                               VOLATILE mp_limb_t chunk =
 891                                 (sum[sum_len - k - 1] << (GMP_LIMB_BITS - shift))
 892                                 | (sum[sum_len - k - 2] >> shift);
 893                               fsum += (DOUBLE) chunk;
 894                             }
 895                           }
 896                       }
 897                   }
 898                 fsum = LDEXP (fsum, e + (int) sum_bits - GMP_LIMB_BITS);
 899                 return (sign ? - fsum : fsum);
 900               }
 901             }
 902           }
 903         }
 904       else
 905         return z;
 906     }
 907   else
 908     return (x * y) + z;
 909 }