source/libs/gmp/gmp-src/mpn/generic/mu_div_q.c

   1 /* mpn_mu_div_q.
   2
   3    Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
   4
   5    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
   6    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
   7    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
   8
   9 Copyright 2005-2007, 2009, 2010, 2013 Free Software Foundation, Inc.
  10
  11 This file is part of the GNU MP Library.
  12
  13 The GNU MP Library is free software; you can redistribute it and/or modify
  14 it under the terms of either:
  15
  16   * the GNU Lesser General Public License as published by the Free
  17     Software Foundation; either version 3 of the License, or (at your
  18     option) any later version.
  19
  20 or
  21
  22   * the GNU General Public License as published by the Free Software
  23     Foundation; either version 2 of the License, or (at your option) any
  24     later version.
  25
  26 or both in parallel, as here.
  27
  28 The GNU MP Library is distributed in the hope that it will be useful, but
  29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  30 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  31 for more details.
  32
  33 You should have received copies of the GNU General Public License and the
  34 GNU Lesser General Public License along with the GNU MP Library.  If not,
  35 see https://www.gnu.org/licenses/.  */
  36
  37
  38 /*
  39    The idea of the algorithm used herein is to compute a smaller inverted value
  40    than used in the standard Barrett algorithm, and thus save time in the
  41    Newton iterations, and pay just a small price when using the inverted value
  42    for developing quotient bits.  This algorithm was presented at ICMS 2006.
  43 */
  44
  45 /*
  46   Things to work on:
  47
  48   1. This is a rudimentary implementation of mpn_mu_div_q.  The algorithm is
  49      probably close to optimal, except when mpn_mu_divappr_q fails.
  50
  51   2. We used to fall back to mpn_mu_div_qr when we detect a possible
  52      mpn_mu_divappr_q rounding problem, now we multiply and compare.
  53      Unfortunately, since mpn_mu_divappr_q does not return the partial
  54      remainder, this also doesn't become optimal.  A mpn_mu_divappr_qr could
  55      solve that.
  56
  57   3. The allocations done here should be made from the scratch area, which
  58      then would need to be amended.
  59 */
  60
  61 #include <stdlib.h>             /* for NULL */
  62 #include "gmp.h"
  63 #include "gmp-impl.h"
  64
  65
  66 mp_limb_t
  67 mpn_mu_div_q (mp_ptr qp,
  68               mp_srcptr np, mp_size_t nn,
  69               mp_srcptr dp, mp_size_t dn,
  70               mp_ptr scratch)
  71 {
  72   mp_ptr tp, rp;
  73   mp_size_t qn;
  74   mp_limb_t cy, qh;
  75   TMP_DECL;
  76
  77   TMP_MARK;
  78
  79   qn = nn - dn;
  80
  81   tp = TMP_BALLOC_LIMBS (qn + 1);
  82
  83   if (qn >= dn)                 /* nn >= 2*dn + 1 */
  84     {
  85        /* |_______________________|   dividend
  86                          |________|   divisor  */
  87
  88       rp = TMP_BALLOC_LIMBS (nn + 1);
  89       MPN_COPY (rp + 1, np, nn);
  90       rp[0] = 0;
  91
  92       qh = mpn_cmp (rp + 1 + nn - dn, dp, dn) >= 0;
  93       if (qh != 0)
  94         mpn_sub_n (rp + 1 + nn - dn, rp + 1 + nn - dn, dp, dn);
  95
  96       cy = mpn_mu_divappr_q (tp, rp, nn + 1, dp, dn, scratch);
  97
  98       if (UNLIKELY (cy != 0))
  99         {
 100           /* Since the partial remainder fed to mpn_preinv_mu_divappr_q was
 101              canonically reduced, replace the returned value of B^(qn-dn)+eps
 102              by the largest possible value.  */
 103           mp_size_t i;
 104           for (i = 0; i < qn + 1; i++)
 105             tp[i] = GMP_NUMB_MAX;
 106         }
 107
 108       /* The max error of mpn_mu_divappr_q is +4.  If the low quotient limb is
 109          smaller than the max error, we cannot trust the quotient.  */
 110       if (tp[0] > 4)
 111         {
 112           MPN_COPY (qp, tp + 1, qn);
 113         }
 114       else
 115         {
 116           mp_limb_t cy;
 117           mp_ptr pp;
 118
 119           pp = rp;
 120           mpn_mul (pp, tp + 1, qn, dp, dn);
 121
 122           cy = (qh != 0) ? mpn_add_n (pp + qn, pp + qn, dp, dn) : 0;
 123
 124           if (cy || mpn_cmp (pp, np, nn) > 0) /* At most is wrong by one, no cycle. */
 125             qh -= mpn_sub_1 (qp, tp + 1, qn, 1);
 126           else /* Same as above */
 127             MPN_COPY (qp, tp + 1, qn);
 128         }
 129     }
 130   else
 131     {
 132        /* |_______________________|   dividend
 133                  |________________|   divisor  */
 134
 135       /* FIXME: When nn = 2dn-1, qn becomes dn-1, and the numerator size passed
 136          here becomes 2dn, i.e., more than nn.  This shouldn't hurt, since only
 137          the most significant dn-1 limbs will actually be read, but it is not
 138          pretty.  */
 139
 140       qh = mpn_mu_divappr_q (tp, np + nn - (2 * qn + 2), 2 * qn + 2,
 141                              dp + dn - (qn + 1), qn + 1, scratch);
 142
 143       /* The max error of mpn_mu_divappr_q is +4, but we get an additional
 144          error from the divisor truncation.  */
 145       if (tp[0] > 6)
 146         {
 147           MPN_COPY (qp, tp + 1, qn);
 148         }
 149       else
 150         {
 151           mp_limb_t cy;
 152
 153           /* FIXME: a shorter product should be enough; we may use already
 154              allocated space... */
 155           rp = TMP_BALLOC_LIMBS (nn);
 156           mpn_mul (rp, dp, dn, tp + 1, qn);
 157
 158           cy = (qh != 0) ? mpn_add_n (rp + qn, rp + qn, dp, dn) : 0;
 159
 160           if (cy || mpn_cmp (rp, np, nn) > 0) /* At most is wrong by one, no cycle. */
 161             qh -= mpn_sub_1 (qp, tp + 1, qn, 1);
 162           else /* Same as above */
 163             MPN_COPY (qp, tp + 1, qn);
 164         }
 165     }
 166
 167   TMP_FREE;
 168   return qh;
 169 }
 170
 171 mp_size_t
 172 mpn_mu_div_q_itch (mp_size_t nn, mp_size_t dn, int mua_k)
 173 {
 174   mp_size_t qn;
 175
 176   qn = nn - dn;
 177   if (qn >= dn)
 178     {
 179       return mpn_mu_divappr_q_itch (nn + 1, dn, mua_k);
 180     }
 181   else
 182     {
 183       return mpn_mu_divappr_q_itch (2 * qn + 2, qn + 1, mua_k);
 184     }
 185 }