contrib/mpc/src/fma.c

   1 /* mpc_fma -- Fused multiply-add of three complex numbers
   2
   3 Copyright (C) 2011, 2012 INRIA
   4
   5 This file is part of GNU MPC.
   6
   7 GNU MPC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU Lesser General Public License as published by the
   9 Free Software Foundation; either version 3 of the License, or (at your
  10 option) any later version.
  11
  12 GNU MPC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  14 FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
  15 more details.
  16
  17 You should have received a copy of the GNU Lesser General Public License
  18 along with this program. If not, see http://www.gnu.org/licenses/ .
  19 */
  20
  21 #include "mpc-impl.h"
  22
  23 /* return a bound on the precision needed to add or subtract x and y exactly */
  24 static mpfr_prec_t
  25 bound_prec_addsub (mpfr_srcptr x, mpfr_srcptr y)
  26 {
  27   if (!mpfr_regular_p (x))
  28     return mpfr_get_prec (y);
  29   else if (!mpfr_regular_p (y))
  30     return mpfr_get_prec (x);
  31   else /* neither x nor y are NaN, Inf or zero */
  32     {
  33       mpfr_exp_t ex = mpfr_get_exp (x);
  34       mpfr_exp_t ey = mpfr_get_exp (y);
  35       mpfr_exp_t ulpx = ex - mpfr_get_prec (x);
  36       mpfr_exp_t ulpy = ey - mpfr_get_prec (y);
  37       return ((ex >= ey) ? ex : ey) + 1 - ((ulpx <= ulpy) ? ulpx : ulpy);
  38     }
  39 }
  40
  41 /* r <- a*b+c */
  42 int
  43 mpc_fma_naive (mpc_ptr r, mpc_srcptr a, mpc_srcptr b, mpc_srcptr c, mpc_rnd_t rnd)
  44 {
  45   mpfr_t rea_reb, rea_imb, ima_reb, ima_imb, tmp;
  46   mpfr_prec_t pre12, pre13, pre23, pim12, pim13, pim23;
  47   int inex_re, inex_im;
  48
  49   mpfr_init2 (rea_reb, mpfr_get_prec (mpc_realref(a)) + mpfr_get_prec (mpc_realref(b)));
  50   mpfr_init2 (rea_imb, mpfr_get_prec (mpc_realref(a)) + mpfr_get_prec (mpc_imagref(b)));
  51   mpfr_init2 (ima_reb, mpfr_get_prec (mpc_imagref(a)) + mpfr_get_prec (mpc_realref(b)));
  52   mpfr_init2 (ima_imb, mpfr_get_prec (mpc_imagref(a)) + mpfr_get_prec (mpc_imagref(b)));
  53
  54   mpfr_mul (rea_reb, mpc_realref(a), mpc_realref(b), GMP_RNDZ); /* exact */
  55   mpfr_mul (rea_imb, mpc_realref(a), mpc_imagref(b), GMP_RNDZ); /* exact */
  56   mpfr_mul (ima_reb, mpc_imagref(a), mpc_realref(b), GMP_RNDZ); /* exact */
  57   mpfr_mul (ima_imb, mpc_imagref(a), mpc_imagref(b), GMP_RNDZ); /* exact */
  58
  59   /* Re(r) <- rea_reb - ima_imb + Re(c) */
  60
  61   pre12 = bound_prec_addsub (rea_reb, ima_imb); /* bound on exact precision for
  62                                                    rea_reb - ima_imb */
  63   pre13 = bound_prec_addsub (rea_reb, mpc_realref(c));
  64   /* bound for rea_reb + Re(c) */
  65   pre23 = bound_prec_addsub (ima_imb, mpc_realref(c));
  66   /* bound for ima_imb - Re(c) */
  67   if (pre12 <= pre13 && pre12 <= pre23) /* (rea_reb - ima_imb) + Re(c) */
  68     {
  69       mpfr_init2 (tmp, pre12);
  70       mpfr_sub (tmp, rea_reb, ima_imb, GMP_RNDZ); /* exact */
  71       inex_re = mpfr_add (mpc_realref(r), tmp, mpc_realref(c), MPC_RND_RE(rnd));
  72       /* the only possible bad overlap is between r and c, but since we are
  73          only touching the real part of both, it is ok */
  74     }
  75   else if (pre13 <= pre23) /* (rea_reb + Re(c)) - ima_imb */
  76     {
  77       mpfr_init2 (tmp, pre13);
  78       mpfr_add (tmp, rea_reb, mpc_realref(c), GMP_RNDZ); /* exact */
  79       inex_re = mpfr_sub (mpc_realref(r), tmp, ima_imb, MPC_RND_RE(rnd));
  80       /* the only possible bad overlap is between r and c, but since we are
  81          only touching the real part of both, it is ok */
  82     }
  83   else /* rea_reb + (Re(c) - ima_imb) */
  84     {
  85       mpfr_init2 (tmp, pre23);
  86       mpfr_sub (tmp, mpc_realref(c), ima_imb, GMP_RNDZ); /* exact */
  87       inex_re = mpfr_add (mpc_realref(r), tmp, rea_reb, MPC_RND_RE(rnd));
  88       /* the only possible bad overlap is between r and c, but since we are
  89          only touching the real part of both, it is ok */
  90     }
  91
  92   /* Im(r) <- rea_imb + ima_reb + Im(c) */
  93   pim12 = bound_prec_addsub (rea_imb, ima_reb); /* bound on exact precision for
  94                                                    rea_imb + ima_reb */
  95   pim13 = bound_prec_addsub (rea_imb, mpc_imagref(c));
  96   /* bound for rea_imb + Im(c) */
  97   pim23 = bound_prec_addsub (ima_reb, mpc_imagref(c));
  98   /* bound for ima_reb + Im(c) */
  99   if (pim12 <= pim13 && pim12 <= pim23) /* (rea_imb + ima_reb) + Im(c) */
 100     {
 101       mpfr_set_prec (tmp, pim12);
 102       mpfr_add (tmp, rea_imb, ima_reb, GMP_RNDZ); /* exact */
 103       inex_im = mpfr_add (mpc_imagref(r), tmp, mpc_imagref(c), MPC_RND_IM(rnd));
 104       /* the only possible bad overlap is between r and c, but since we are
 105          only touching the imaginary part of both, it is ok */
 106     }
 107   else if (pim13 <= pim23) /* (rea_imb + Im(c)) + ima_reb */
 108     {
 109       mpfr_set_prec (tmp, pim13);
 110       mpfr_add (tmp, rea_imb, mpc_imagref(c), GMP_RNDZ); /* exact */
 111       inex_im = mpfr_add (mpc_imagref(r), tmp, ima_reb, MPC_RND_IM(rnd));
 112       /* the only possible bad overlap is between r and c, but since we are
 113          only touching the imaginary part of both, it is ok */
 114     }
 115   else /* rea_imb + (Im(c) + ima_reb) */
 116     {
 117       mpfr_set_prec (tmp, pre23);
 118       mpfr_add (tmp, mpc_imagref(c), ima_reb, GMP_RNDZ); /* exact */
 119       inex_im = mpfr_add (mpc_imagref(r), tmp, rea_imb, MPC_RND_IM(rnd));
 120       /* the only possible bad overlap is between r and c, but since we are
 121          only touching the imaginary part of both, it is ok */
 122     }
 123
 124   mpfr_clear (rea_reb);
 125   mpfr_clear (rea_imb);
 126   mpfr_clear (ima_reb);
 127   mpfr_clear (ima_imb);
 128   mpfr_clear (tmp);
 129
 130   return MPC_INEX(inex_re, inex_im);
 131 }
 132
 133 /* The algorithm is as follows:
 134    - in a first pass, we use the target precision + some extra bits
 135    - if it fails, we add the number of cancelled bits when adding
 136      Re(a*b) and Re(c) [similarly for the imaginary part]
 137    - it is fails again, we call the mpc_fma_naive function, which also
 138      deals with the special cases */
 139 int
 140 mpc_fma (mpc_ptr r, mpc_srcptr a, mpc_srcptr b, mpc_srcptr c, mpc_rnd_t rnd)
 141 {
 142   mpc_t ab;
 143   mpfr_prec_t pre, pim, wpre, wpim;
 144   mpfr_exp_t diffre, diffim;
 145   int i, inex = 0, okre = 0, okim = 0;
 146
 147   if (mpc_fin_p (a) == 0 || mpc_fin_p (b) == 0 || mpc_fin_p (c) == 0)
 148     return mpc_fma_naive (r, a, b, c, rnd);
 149
 150   pre = mpfr_get_prec (mpc_realref(r));
 151   pim = mpfr_get_prec (mpc_imagref(r));
 152   wpre = pre + mpc_ceil_log2 (pre) + 10;
 153   wpim = pim + mpc_ceil_log2 (pim) + 10;
 154   mpc_init3 (ab, wpre, wpim);
 155   for (i = 0; i < 2; ++i)
 156     {
 157       mpc_mul (ab, a, b, MPC_RNDZZ);
 158       if (mpfr_zero_p (mpc_realref(ab)) || mpfr_zero_p (mpc_imagref(ab)))
 159         break;
 160       diffre = mpfr_get_exp (mpc_realref(ab));
 161       diffim = mpfr_get_exp (mpc_imagref(ab));
 162       mpc_add (ab, ab, c, MPC_RNDZZ);
 163       if (mpfr_zero_p (mpc_realref(ab)) || mpfr_zero_p (mpc_imagref(ab)))
 164         break;
 165       diffre -= mpfr_get_exp (mpc_realref(ab));
 166       diffim -= mpfr_get_exp (mpc_imagref(ab));
 167       diffre = (diffre > 0 ? diffre + 1 : 1);
 168       diffim = (diffim > 0 ? diffim + 1 : 1);
 169       okre = diffre > (mpfr_exp_t) wpre ? 0 : mpfr_can_round (mpc_realref(ab),
 170                                  wpre - diffre, GMP_RNDN, GMP_RNDZ,
 171                                  pre + (MPC_RND_RE (rnd) == GMP_RNDN));
 172       okim = diffim > (mpfr_exp_t) wpim ? 0 : mpfr_can_round (mpc_imagref(ab),
 173                                  wpim - diffim, GMP_RNDN, GMP_RNDZ,
 174                                  pim + (MPC_RND_IM (rnd) == GMP_RNDN));
 175       if (okre && okim)
 176         {
 177           inex = mpc_set (r, ab, rnd);
 178           break;
 179         }
 180       if (i == 1)
 181         break;
 182       if (okre == 0 && diffre > 1)
 183         wpre += diffre;
 184       if (okim == 0 && diffim > 1)
 185         wpim += diffim;
 186       mpfr_set_prec (mpc_realref(ab), wpre);
 187       mpfr_set_prec (mpc_imagref(ab), wpim);
 188     }
 189   mpc_clear (ab);
 190   return okre && okim ? inex : mpc_fma_naive (r, a, b, c, rnd);
 191 }