compiler/mlib/s_fma.c

   1 /*-
   2  * Copyright (c) 2005 David Schultz <das@FreeBSD.ORG>
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  *
  14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  24  * SUCH DAMAGE.
  25  */
  26
  27 __FBSDID("$FreeBSD: src/lib/msun/src/s_fma.c,v 1.4 2005/03/18 02:27:59 das Exp $");
  28
  29 #include <aros/system.h>
  30
  31 #include <fenv.h>
  32 #include <float.h>
  33 #include <math.h>
  34
  35 /*
  36  * Fused multiply-add: Compute x * y + z with a single rounding error.
  37  *
  38  * We use scaling to avoid overflow/underflow, along with the
  39  * canonical precision-doubling technique adapted from:
  40  *
  41  *      Dekker, T.  A Floating-Point Technique for Extending the
  42  *      Available Precision.  Numer. Math. 18, 224-242 (1971).
  43  *
  44  * This algorithm is sensitive to the rounding precision.  FPUs such
  45  * as the i387 must be set in double-precision mode if variables are
  46  * to be stored in FP registers in order to avoid incorrect results.
  47  * This is the default on FreeBSD, but not on many other systems.
  48  *
  49  * Hardware instructions should be used on architectures that support it,
  50  * since this implementation will likely be several times slower.
  51  */
  52 #if LDBL_MANT_DIG != 113
  53 double
  54 fma(double x, double y, double z)
  55 {
  56         static const double split = 0x1p27 + 1.0;
  57         double xs, ys, zs;
  58         double c, cc, hx, hy, p, q, tx, ty;
  59         double r, rr, s;
  60         int oround;
  61         int ex, ey, ez;
  62         int spread;
  63
  64         if (z == 0.0)
  65                 return (x * y);
  66         if (x == 0.0 || y == 0.0)
  67                 return (x * y + z);
  68
  69         /* Results of frexp() are undefined for these cases. */
  70         if (!isfinite(x) || !isfinite(y) || !isfinite(z))
  71                 return (x * y + z);
  72
  73         xs = frexp(x, &ex);
  74         ys = frexp(y, &ey);
  75         zs = frexp(z, &ez);
  76         oround = fegetround();
  77         spread = ex + ey - ez;
  78
  79         /*
  80          * If x * y and z are many orders of magnitude apart, the scaling
  81          * will overflow, so we handle these cases specially.  Rounding
  82          * modes other than FE_TONEAREST are painful.
  83          */
  84         if (spread > DBL_MANT_DIG * 2) {
  85                 fenv_t env;
  86                 feraiseexcept(FE_INEXACT);
  87                 switch(oround) {
  88                 case FE_TONEAREST:
  89                         return (x * y);
  90                 case FE_TOWARDZERO:
  91                         if (x > 0.0 ^ y < 0.0 ^ z < 0.0)
  92                                 return (x * y);
  93                         feholdexcept(&env);
  94                         r = x * y;
  95                         if (!fetestexcept(FE_INEXACT))
  96                                 r = nextafter(r, 0);
  97                         feupdateenv(&env);
  98                         return (r);
  99                 case FE_DOWNWARD:
 100                         if (z > 0.0)
 101                                 return (x * y);
 102                         feholdexcept(&env);
 103                         r = x * y;
 104                         if (!fetestexcept(FE_INEXACT))
 105                                 r = nextafter(r, -INFINITY);
 106                         feupdateenv(&env);
 107                         return (r);
 108                 default:        /* FE_UPWARD */
 109                         if (z < 0.0)
 110                                 return (x * y);
 111                         feholdexcept(&env);
 112                         r = x * y;
 113                         if (!fetestexcept(FE_INEXACT))
 114                                 r = nextafter(r, INFINITY);
 115                         feupdateenv(&env);
 116                         return (r);
 117                 }
 118         }
 119         if (spread < -DBL_MANT_DIG) {
 120                 feraiseexcept(FE_INEXACT);
 121                 if (!isnormal(z))
 122                         feraiseexcept(FE_UNDERFLOW);
 123                 switch (oround) {
 124                 case FE_TONEAREST:
 125                         return (z);
 126                 case FE_TOWARDZERO:
 127                         if (x > 0.0 ^ y < 0.0 ^ z < 0.0)
 128                                 return (z);
 129                         else
 130                                 return (nextafter(z, 0));
 131                 case FE_DOWNWARD:
 132                         if (x > 0.0 ^ y < 0.0)
 133                                 return (z);
 134                         else
 135                                 return (nextafter(z, -INFINITY));
 136                 default:        /* FE_UPWARD */
 137                         if (x > 0.0 ^ y < 0.0)
 138                                 return (nextafter(z, INFINITY));
 139                         else
 140                                 return (z);
 141                 }
 142         }
 143
 144         /*
 145          * Use Dekker's algorithm to perform the multiplication and
 146          * subsequent addition in twice the machine precision.
 147          * Arrange so that x * y = c + cc, and x * y + z = r + rr.
 148          */
 149         fesetround(FE_TONEAREST);
 150
 151         p = xs * split;
 152         hx = xs - p;
 153         hx += p;
 154         tx = xs - hx;
 155
 156         p = ys * split;
 157         hy = ys - p;
 158         hy += p;
 159         ty = ys - hy;
 160
 161         p = hx * hy;
 162         q = hx * ty + tx * hy;
 163         c = p + q;
 164         cc = p - c + q + tx * ty;
 165
 166         zs = ldexp(zs, -spread);
 167         r = c + zs;
 168         s = r - c;
 169         rr = (c - (r - s)) + (zs - s) + cc;
 170
 171         spread = ex + ey;
 172         if (spread + ilogb(r) > -1023) {
 173                 fesetround(oround);
 174                 r = r + rr;
 175         } else {
 176                 /*
 177                  * The result is subnormal, so we round before scaling to
 178                  * avoid double rounding.
 179                  */
 180                 p = ldexp(copysign(0x1p-1022, r), -spread);
 181                 c = r + p;
 182                 s = c - r;
 183                 cc = (r - (c - s)) + (p - s) + rr;
 184                 fesetround(oround);
 185                 r = (c + cc) - p;
 186         }
 187         return (ldexp(r, spread));
 188 }
 189 #else   /* LDBL_MANT_DIG == 113 */
 190 /*
 191  * 113 bits of precision is more than twice the precision of a double,
 192  * so it is enough to represent the intermediate product exactly.
 193  */
 194 double
 195 fma(double x, double y, double z)
 196 {
 197         return ((long double)x * y + z);
 198 }
 199 #endif  /* LDBL_MANT_DIG != 113 */
 200
 201 #if (LDBL_MANT_DIG == 53)
 202 /* Alias fma -> fmal */
 203 AROS_MAKE_ASM_SYM(typeof(fmal), fmal, AROS_CSYM_FROM_ASM_NAME(fmal), AROS_CSYM_FROM_ASM_NAME(fma));
 204 AROS_EXPORT_ASM_SYM(AROS_CSYM_FROM_ASM_NAME(fmal));
 205 #endif