math/mul_split.h

   1 /* Compute full X * Y for double type.
   2    Copyright (C) 2013-2024 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4
   5    The GNU C Library is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU Lesser General Public
   7    License as published by the Free Software Foundation; either
   8    version 2.1 of the License, or (at your option) any later version.
   9
  10    The GNU C Library is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public
  16    License along with the GNU C Library; if not, see
  17    <https://www.gnu.org/licenses/>.  */
  18
  19 #ifndef _MUL_SPLIT_H
  20 #define _MUL_SPLIT_H
  21
  22 #include <float.h>
  23
  24 /* Calculate X * Y exactly and store the result in *HI + *LO.  It is
  25    given that the values are small enough that no overflow occurs and
  26    large enough (or zero) that no underflow occurs.  */
  27
  28 static void
  29 mul_split (double *hi, double *lo, double x, double y)
  30 {
  31 #ifdef __FP_FAST_FMA
  32   /* Fast built-in fused multiply-add.  */
  33   *hi = x * y;
  34   *lo = __builtin_fma (x, y, -*hi);
  35 #else
  36   /* Apply Dekker's algorithm.  */
  37   *hi = x * y;
  38 # define C ((1 << (DBL_MANT_DIG + 1) / 2) + 1)
  39   double x1 = x * C;
  40   double y1 = y * C;
  41 # undef C
  42   x1 = (x - x1) + x1;
  43   y1 = (y - y1) + y1;
  44   double x2 = x - x1;
  45   double y2 = y - y1;
  46   *lo = (((x1 * y1 - *hi) + x1 * y2) + x2 * y1) + x2 * y2;
  47 #endif
  48 }
  49
  50 /* Add a + b exactly, such that *hi + *lo = a + b.
  51    Assumes |a| >= |b| and rounding to nearest.  */
  52 static inline void
  53 fast_two_sum (double *hi, double *lo, double a, double b)
  54 {
  55   double e;
  56
  57   *hi = a + b;
  58   e = *hi - a; /* exact  */
  59   *lo = b - e; /* exact  */
  60   /* Now *hi + *lo = a + b exactly.  */
  61 }
  62
  63 /* Multiplication of two floating-point expansions: *hi + *lo is an
  64    approximation of (h1+l1)*(h2+l2), assuming |l1| <= 1/2*ulp(h1)
  65    and |l2| <= 1/2*ulp(h2) and rounding to nearest.  */
  66 static inline void
  67 mul_expansion (double *hi, double *lo, double h1, double l1,
  68                double h2, double l2)
  69 {
  70   double r, e;
  71
  72   mul_split (hi, lo, h1, h2);
  73   r = h1 * l2 + h2 * l1;
  74   /* Now add r to (hi,lo) using fast two-sum, where we know |r| < |hi|.  */
  75   fast_two_sum (hi, &e, *hi, r);
  76   *lo -= e;
  77 }
  78
  79 /* Calculate X / Y and store the approximate result in *HI + *LO.  It is
  80    assumed that Y is not zero, that no overflow nor underflow occurs, and
  81    rounding is to nearest.  */
  82 static inline void
  83 div_split (double *hi, double *lo, double x, double y)
  84 {
  85   double a, b;
  86
  87   *hi = x / y;
  88   mul_split (&a, &b, *hi, y);
  89   /* a + b = hi*y, which should be near x.  */
  90   a = x - a; /* huge cancellation  */
  91   a = a - b;
  92   /* Now x ~ hi*y + a thus x/y ~ hi + a/y.  */
  93   *lo = a / y;
  94 }
  95
  96 /* Division of two floating-point expansions: *hi + *lo is an
  97    approximation of (h1+l1)/(h2+l2), assuming |l1| <= 1/2*ulp(h1)
  98    and |l2| <= 1/2*ulp(h2), h2+l2 is not zero, and rounding to nearest.  */
  99 static inline void
 100 div_expansion (double *hi, double *lo, double h1, double l1,
 101                double h2, double l2)
 102 {
 103   double r, e;
 104
 105   div_split (hi, lo, h1, h2);
 106   /* (h1+l1)/(h2+l2) ~ h1/h2 + (l1*h2 - l2*h1)/h2^2  */
 107   r = (l1 * h2 - l2 * h1) / (h2 * h2);
 108   /* Now add r to (hi,lo) using fast two-sum, where we know |r| < |hi|.  */
 109   fast_two_sum (hi, &e, *hi, r);
 110   *lo += e;
 111   /* Renormalize since |lo| might be larger than 0.5 ulp(hi).  */
 112   fast_two_sum (hi, lo, *hi, *lo);
 113 }
 114
 115 #endif /* _MUL_SPLIT_H */