source/libs/gmp/gmp-src/mpn/generic/divexact.c

   1 /* mpn_divexact(qp,np,nn,dp,dn,tp) -- Divide N = {np,nn} by D = {dp,dn} storing
   2    the result in Q = {qp,nn-dn+1} expecting no remainder.  Overlap allowed
   3    between Q and N; all other overlap disallowed.
   4
   5    Contributed to the GNU project by Torbjorn Granlund.
   6
   7    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
   8    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
   9    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
  10
  11 Copyright 2006, 2007, 2009 Free Software Foundation, Inc.
  12
  13 This file is part of the GNU MP Library.
  14
  15 The GNU MP Library is free software; you can redistribute it and/or modify
  16 it under the terms of either:
  17
  18   * the GNU Lesser General Public License as published by the Free
  19     Software Foundation; either version 3 of the License, or (at your
  20     option) any later version.
  21
  22 or
  23
  24   * the GNU General Public License as published by the Free Software
  25     Foundation; either version 2 of the License, or (at your option) any
  26     later version.
  27
  28 or both in parallel, as here.
  29
  30 The GNU MP Library is distributed in the hope that it will be useful, but
  31 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  32 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  33 for more details.
  34
  35 You should have received copies of the GNU General Public License and the
  36 GNU Lesser General Public License along with the GNU MP Library.  If not,
  37 see https://www.gnu.org/licenses/.  */
  38
  39
  40 #include "gmp.h"
  41 #include "gmp-impl.h"
  42 #include "longlong.h"
  43
  44 #if 1
  45 void
  46 mpn_divexact (mp_ptr qp,
  47               mp_srcptr np, mp_size_t nn,
  48               mp_srcptr dp, mp_size_t dn)
  49 {
  50   unsigned shift;
  51   mp_size_t qn;
  52   mp_ptr tp;
  53   TMP_DECL;
  54
  55   ASSERT (dn > 0);
  56   ASSERT (nn >= dn);
  57   ASSERT (dp[dn-1] > 0);
  58
  59   while (dp[0] == 0)
  60     {
  61       ASSERT (np[0] == 0);
  62       dp++;
  63       np++;
  64       dn--;
  65       nn--;
  66     }
  67
  68   if (dn == 1)
  69     {
  70       MPN_DIVREM_OR_DIVEXACT_1 (qp, np, nn, dp[0]);
  71       return;
  72     }
  73
  74   TMP_MARK;
  75
  76   qn = nn + 1 - dn;
  77   count_trailing_zeros (shift, dp[0]);
  78
  79   if (shift > 0)
  80     {
  81       mp_ptr wp;
  82       mp_size_t ss;
  83       ss = (dn > qn) ? qn + 1 : dn;
  84
  85       tp = TMP_ALLOC_LIMBS (ss);
  86       mpn_rshift (tp, dp, ss, shift);
  87       dp = tp;
  88
  89       /* Since we have excluded dn == 1, we have nn > qn, and we need
  90          to shift one limb beyond qn. */
  91       wp = TMP_ALLOC_LIMBS (qn + 1);
  92       mpn_rshift (wp, np, qn + 1, shift);
  93       np = wp;
  94     }
  95
  96   if (dn > qn)
  97     dn = qn;
  98
  99   tp = TMP_ALLOC_LIMBS (mpn_bdiv_q_itch (qn, dn));
 100   mpn_bdiv_q (qp, np, qn, dp, dn, tp);
 101   TMP_FREE;
 102 }
 103
 104 #else
 105
 106 /* We use the Jebelean's bidirectional exact division algorithm.  This is
 107    somewhat naively implemented, with equal quotient parts done by 2-adic
 108    division and truncating division.  Since 2-adic division is faster, it
 109    should be used for a larger chunk.
 110
 111    This code is horrendously ugly, in all sorts of ways.
 112
 113    * It was hacked without much care or thought, but with a testing program.
 114    * It handles scratch space frivolously, and furthermore the itch function
 115      is broken.
 116    * Doesn't provide any measures to deal with mu_divappr_q's +3 error.  We
 117      have yet to provoke an error due to this, though.
 118    * Algorithm selection leaves a lot to be desired.  In particular, the choice
 119      between DC and MU isn't a point, but we treat it like one.
 120    * It makes the msb part 1 or 2 limbs larger than the lsb part, in spite of
 121      that the latter is faster.  We should at least reverse this, but perhaps
 122      we should make the lsb part considerably larger.  (How do we tune this?)
 123 */
 124
 125 mp_size_t
 126 mpn_divexact_itch (mp_size_t nn, mp_size_t dn)
 127 {
 128   return nn + dn;               /* FIXME this is not right */
 129 }
 130
 131 void
 132 mpn_divexact (mp_ptr qp,
 133               mp_srcptr np, mp_size_t nn,
 134               mp_srcptr dp, mp_size_t dn,
 135               mp_ptr scratch)
 136 {
 137   mp_size_t qn;
 138   mp_size_t nn0, qn0;
 139   mp_size_t nn1, qn1;
 140   mp_ptr tp;
 141   mp_limb_t qml;
 142   mp_limb_t qh;
 143   int cnt;
 144   mp_ptr xdp;
 145   mp_limb_t di;
 146   mp_limb_t cy;
 147   gmp_pi1_t dinv;
 148   TMP_DECL;
 149
 150   TMP_MARK;
 151
 152   qn = nn - dn + 1;
 153
 154   /* For small divisors, and small quotients, don't use Jebelean's algorithm. */
 155   if (dn < DIVEXACT_JEB_THRESHOLD || qn < DIVEXACT_JEB_THRESHOLD)
 156     {
 157       tp = scratch;
 158       MPN_COPY (tp, np, qn);
 159       binvert_limb (di, dp[0]);  di = -di;
 160       dn = MIN (dn, qn);
 161       mpn_sbpi1_bdiv_q (qp, tp, qn, dp, dn, di);
 162       TMP_FREE;
 163       return;
 164     }
 165
 166   qn0 = ((nn - dn) >> 1) + 1;   /* low quotient size */
 167
 168   /* If quotient is much larger than the divisor, the bidirectional algorithm
 169      does not work as currently implemented.  Fall back to plain bdiv.  */
 170   if (qn0 > dn)
 171     {
 172       if (BELOW_THRESHOLD (dn, DC_BDIV_Q_THRESHOLD))
 173         {
 174           tp = scratch;
 175           MPN_COPY (tp, np, qn);
 176           binvert_limb (di, dp[0]);  di = -di;
 177           dn = MIN (dn, qn);
 178           mpn_sbpi1_bdiv_q (qp, tp, qn, dp, dn, di);
 179         }
 180       else if (BELOW_THRESHOLD (dn, MU_BDIV_Q_THRESHOLD))
 181         {
 182           tp = scratch;
 183           MPN_COPY (tp, np, qn);
 184           binvert_limb (di, dp[0]);  di = -di;
 185           mpn_dcpi1_bdiv_q (qp, tp, qn, dp, dn, di);
 186         }
 187       else
 188         {
 189           mpn_mu_bdiv_q (qp, np, qn, dp, dn, scratch);
 190         }
 191       TMP_FREE;
 192       return;
 193     }
 194
 195   nn0 = qn0 + qn0;
 196
 197   nn1 = nn0 - 1 + ((nn-dn) & 1);
 198   qn1 = qn0;
 199   if (LIKELY (qn0 != dn))
 200     {
 201       nn1 = nn1 + 1;
 202       qn1 = qn1 + 1;
 203       if (UNLIKELY (dp[dn - 1] == 1 && qn1 != dn))
 204         {
 205           /* If the leading divisor limb == 1, i.e. has just one bit, we have
 206              to include an extra limb in order to get the needed overlap.  */
 207           /* FIXME: Now with the mu_divappr_q function, we should really need
 208              more overlap. That indicates one of two things: (1) The test code
 209              is not good. (2) We actually overlap too much by default.  */
 210           nn1 = nn1 + 1;
 211           qn1 = qn1 + 1;
 212         }
 213     }
 214
 215   tp = TMP_ALLOC_LIMBS (nn1 + 1);
 216
 217   count_leading_zeros (cnt, dp[dn - 1]);
 218
 219   /* Normalize divisor, store into tmp area.  */
 220   if (cnt != 0)
 221     {
 222       xdp = TMP_ALLOC_LIMBS (qn1);
 223       mpn_lshift (xdp, dp + dn - qn1, qn1, cnt);
 224     }
 225   else
 226     {
 227       xdp = (mp_ptr) dp + dn - qn1;
 228     }
 229
 230   /* Shift dividend according to the divisor normalization.  */
 231   /* FIXME: We compute too much here for XX_divappr_q, but these functions'
 232      interfaces want a pointer to the imaginative least significant limb, not
 233      to the least significant *used* limb.  Of course, we could leave nn1-qn1
 234      rubbish limbs in the low part, to save some time.  */
 235   if (cnt != 0)
 236     {
 237       cy = mpn_lshift (tp, np + nn - nn1, nn1, cnt);
 238       if (cy != 0)
 239         {
 240           tp[nn1] = cy;
 241           nn1++;
 242         }
 243     }
 244   else
 245     {
 246       /* FIXME: This copy is not needed for mpn_mu_divappr_q, except when the
 247          mpn_sub_n right before is executed.  */
 248       MPN_COPY (tp, np + nn - nn1, nn1);
 249     }
 250
 251   invert_pi1 (dinv, xdp[qn1 - 1], xdp[qn1 - 2]);
 252   if (BELOW_THRESHOLD (qn1, DC_DIVAPPR_Q_THRESHOLD))
 253     {
 254       qp[qn0 - 1 + nn1 - qn1] = mpn_sbpi1_divappr_q (qp + qn0 - 1, tp, nn1, xdp, qn1, dinv.inv32);
 255     }
 256   else if (BELOW_THRESHOLD (qn1, MU_DIVAPPR_Q_THRESHOLD))
 257     {
 258       qp[qn0 - 1 + nn1 - qn1] = mpn_dcpi1_divappr_q (qp + qn0 - 1, tp, nn1, xdp, qn1, &dinv);
 259     }
 260   else
 261     {
 262       /* FIXME: mpn_mu_divappr_q doesn't handle qh != 0.  Work around it with a
 263          conditional subtraction here.  */
 264       qh = mpn_cmp (tp + nn1 - qn1, xdp, qn1) >= 0;
 265       if (qh)
 266         mpn_sub_n (tp + nn1 - qn1, tp + nn1 - qn1, xdp, qn1);
 267       mpn_mu_divappr_q (qp + qn0 - 1, tp, nn1, xdp, qn1, scratch);
 268       qp[qn0 - 1 + nn1 - qn1] = qh;
 269     }
 270   qml = qp[qn0 - 1];
 271
 272   binvert_limb (di, dp[0]);  di = -di;
 273
 274   if (BELOW_THRESHOLD (qn0, DC_BDIV_Q_THRESHOLD))
 275     {
 276       MPN_COPY (tp, np, qn0);
 277       mpn_sbpi1_bdiv_q (qp, tp, qn0, dp, qn0, di);
 278     }
 279   else if (BELOW_THRESHOLD (qn0, MU_BDIV_Q_THRESHOLD))
 280     {
 281       MPN_COPY (tp, np, qn0);
 282       mpn_dcpi1_bdiv_q (qp, tp, qn0, dp, qn0, di);
 283     }
 284   else
 285     {
 286       mpn_mu_bdiv_q (qp, np, qn0, dp, qn0, scratch);
 287     }
 288
 289   if (qml < qp[qn0 - 1])
 290     mpn_decr_u (qp + qn0, 1);
 291
 292   TMP_FREE;
 293 }
 294 #endif