source/libs/gmp/gmp-src/mpn/generic/div_q.c

   1 /* mpn_div_q -- division for arbitrary size operands.
   2
   3    Contributed to the GNU project by Torbjorn Granlund.
   4
   5    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
   6    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
   7    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
   8
   9 Copyright 2009, 2010 Free Software Foundation, Inc.
  10
  11 This file is part of the GNU MP Library.
  12
  13 The GNU MP Library is free software; you can redistribute it and/or modify
  14 it under the terms of either:
  15
  16   * the GNU Lesser General Public License as published by the Free
  17     Software Foundation; either version 3 of the License, or (at your
  18     option) any later version.
  19
  20 or
  21
  22   * the GNU General Public License as published by the Free Software
  23     Foundation; either version 2 of the License, or (at your option) any
  24     later version.
  25
  26 or both in parallel, as here.
  27
  28 The GNU MP Library is distributed in the hope that it will be useful, but
  29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  30 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  31 for more details.
  32
  33 You should have received copies of the GNU General Public License and the
  34 GNU Lesser General Public License along with the GNU MP Library.  If not,
  35 see https://www.gnu.org/licenses/.  */
  36
  37 #include "gmp.h"
  38 #include "gmp-impl.h"
  39 #include "longlong.h"
  40
  41
  42 /* Compute Q = N/D with truncation.
  43      N = {np,nn}
  44      D = {dp,dn}
  45      Q = {qp,nn-dn+1}
  46      T = {scratch,nn+1} is scratch space
  47    N and D are both untouched by the computation.
  48    N and T may overlap; pass the same space if N is irrelevant after the call,
  49    but note that tp needs an extra limb.
  50
  51    Operand requirements:
  52      N >= D > 0
  53      dp[dn-1] != 0
  54      No overlap between the N, D, and Q areas.
  55
  56    This division function does not clobber its input operands, since it is
  57    intended to support average-O(qn) division, and for that to be effective, it
  58    cannot put requirements on callers to copy a O(nn) operand.
  59
  60    If a caller does not care about the value of {np,nn+1} after calling this
  61    function, it should pass np also for the scratch argument.  This function
  62    will then save some time and space by avoiding allocation and copying.
  63    (FIXME: Is this a good design?  We only really save any copying for
  64    already-normalised divisors, which should be rare.  It also prevents us from
  65    reasonably asking for all scratch space we need.)
  66
  67    We write nn-dn+1 limbs for the quotient, but return void.  Why not return
  68    the most significant quotient limb?  Look at the 4 main code blocks below
  69    (consisting of an outer if-else where each arm contains an if-else). It is
  70    tricky for the first code block, since the mpn_*_div_q calls will typically
  71    generate all nn-dn+1 and return 0 or 1.  I don't see how to fix that unless
  72    we generate the most significant quotient limb here, before calling
  73    mpn_*_div_q, or put the quotient in a temporary area.  Since this is a
  74    critical division case (the SB sub-case in particular) copying is not a good
  75    idea.
  76
  77    It might make sense to split the if-else parts of the (qn + FUDGE
  78    >= dn) blocks into separate functions, since we could promise quite
  79    different things to callers in these two cases.  The 'then' case
  80    benefits from np=scratch, and it could perhaps even tolerate qp=np,
  81    saving some headache for many callers.
  82
  83    FIXME: Scratch allocation leaves a lot to be desired.  E.g., for the MU size
  84    operands, we do not reuse the huge scratch for adjustments.  This can be a
  85    serious waste of memory for the largest operands.
  86 */
  87
  88 /* FUDGE determines when to try getting an approximate quotient from the upper
  89    parts of the dividend and divisor, then adjust.  N.B. FUDGE must be >= 2
  90    for the code to be correct.  */
  91 #define FUDGE 5                 /* FIXME: tune this */
  92
  93 #define DC_DIV_Q_THRESHOLD      DC_DIVAPPR_Q_THRESHOLD
  94 #define MU_DIV_Q_THRESHOLD      MU_DIVAPPR_Q_THRESHOLD
  95 #define MUPI_DIV_Q_THRESHOLD  MUPI_DIVAPPR_Q_THRESHOLD
  96 #ifndef MUPI_DIVAPPR_Q_THRESHOLD
  97 #define MUPI_DIVAPPR_Q_THRESHOLD  MUPI_DIV_QR_THRESHOLD
  98 #endif
  99
 100 void
 101 mpn_div_q (mp_ptr qp,
 102            mp_srcptr np, mp_size_t nn,
 103            mp_srcptr dp, mp_size_t dn, mp_ptr scratch)
 104 {
 105   mp_ptr new_dp, new_np, tp, rp;
 106   mp_limb_t cy, dh, qh;
 107   mp_size_t new_nn, qn;
 108   gmp_pi1_t dinv;
 109   int cnt;
 110   TMP_DECL;
 111   TMP_MARK;
 112
 113   ASSERT (nn >= dn);
 114   ASSERT (dn > 0);
 115   ASSERT (dp[dn - 1] != 0);
 116   ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, np, nn));
 117   ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, dp, dn));
 118   ASSERT (MPN_SAME_OR_SEPARATE_P (np, scratch, nn));
 119
 120   ASSERT_ALWAYS (FUDGE >= 2);
 121
 122   if (dn == 1)
 123     {
 124       mpn_divrem_1 (qp, 0L, np, nn, dp[dn - 1]);
 125       return;
 126     }
 127
 128   qn = nn - dn + 1;             /* Quotient size, high limb might be zero */
 129
 130   if (qn + FUDGE >= dn)
 131     {
 132       /* |________________________|
 133                           |_______|  */
 134       new_np = scratch;
 135
 136       dh = dp[dn - 1];
 137       if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
 138         {
 139           count_leading_zeros (cnt, dh);
 140
 141           cy = mpn_lshift (new_np, np, nn, cnt);
 142           new_np[nn] = cy;
 143           new_nn = nn + (cy != 0);
 144
 145           new_dp = TMP_ALLOC_LIMBS (dn);
 146           mpn_lshift (new_dp, dp, dn, cnt);
 147
 148           if (dn == 2)
 149             {
 150               qh = mpn_divrem_2 (qp, 0L, new_np, new_nn, new_dp);
 151             }
 152           else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
 153                    BELOW_THRESHOLD (new_nn - dn, DC_DIV_Q_THRESHOLD))
 154             {
 155               invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);
 156               qh = mpn_sbpi1_div_q (qp, new_np, new_nn, new_dp, dn, dinv.inv32);
 157             }
 158           else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) ||   /* fast condition */
 159                    BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */
 160                    (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */
 161                    + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn)   /* ...condition */
 162             {
 163               invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);
 164               qh = mpn_dcpi1_div_q (qp, new_np, new_nn, new_dp, dn, &dinv);
 165             }
 166           else
 167             {
 168               mp_size_t itch = mpn_mu_div_q_itch (new_nn, dn, 0);
 169               mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
 170               qh = mpn_mu_div_q (qp, new_np, new_nn, new_dp, dn, scratch);
 171             }
 172           if (cy == 0)
 173             qp[qn - 1] = qh;
 174           else if (UNLIKELY (qh != 0))
 175             {
 176               /* This happens only when the quotient is close to B^n and
 177                  mpn_*_divappr_q returned B^n.  */
 178               mp_size_t i, n;
 179               n = new_nn - dn;
 180               for (i = 0; i < n; i++)
 181                 qp[i] = GMP_NUMB_MAX;
 182               qh = 0;           /* currently ignored */
 183             }
 184         }
 185       else  /* divisor is already normalised */
 186         {
 187           if (new_np != np)
 188             MPN_COPY (new_np, np, nn);
 189
 190           if (dn == 2)
 191             {
 192               qh = mpn_divrem_2 (qp, 0L, new_np, nn, dp);
 193             }
 194           else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
 195                    BELOW_THRESHOLD (nn - dn, DC_DIV_Q_THRESHOLD))
 196             {
 197               invert_pi1 (dinv, dh, dp[dn - 2]);
 198               qh = mpn_sbpi1_div_q (qp, new_np, nn, dp, dn, dinv.inv32);
 199             }
 200           else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) ||   /* fast condition */
 201                    BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */
 202                    (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */
 203                    + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn)   /* ...condition */
 204             {
 205               invert_pi1 (dinv, dh, dp[dn - 2]);
 206               qh = mpn_dcpi1_div_q (qp, new_np, nn, dp, dn, &dinv);
 207             }
 208           else
 209             {
 210               mp_size_t itch = mpn_mu_div_q_itch (nn, dn, 0);
 211               mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
 212               qh = mpn_mu_div_q (qp, np, nn, dp, dn, scratch);
 213             }
 214           qp[nn - dn] = qh;
 215         }
 216     }
 217   else
 218     {
 219       /* |________________________|
 220                 |_________________|  */
 221       tp = TMP_ALLOC_LIMBS (qn + 1);
 222
 223       new_np = scratch;
 224       new_nn = 2 * qn + 1;
 225       if (new_np == np)
 226         /* We need {np,nn} to remain untouched until the final adjustment, so
 227            we need to allocate separate space for new_np.  */
 228         new_np = TMP_ALLOC_LIMBS (new_nn + 1);
 229
 230
 231       dh = dp[dn - 1];
 232       if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
 233         {
 234           count_leading_zeros (cnt, dh);
 235
 236           cy = mpn_lshift (new_np, np + nn - new_nn, new_nn, cnt);
 237           new_np[new_nn] = cy;
 238
 239           new_nn += (cy != 0);
 240
 241           new_dp = TMP_ALLOC_LIMBS (qn + 1);
 242           mpn_lshift (new_dp, dp + dn - (qn + 1), qn + 1, cnt);
 243           new_dp[0] |= dp[dn - (qn + 1) - 1] >> (GMP_NUMB_BITS - cnt);
 244
 245           if (qn + 1 == 2)
 246             {
 247               qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
 248             }
 249           else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1))
 250             {
 251               invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);
 252               qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32);
 253             }
 254           else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1))
 255             {
 256               invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);
 257               qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv);
 258             }
 259           else
 260             {
 261               mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0);
 262               mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
 263               qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch);
 264             }
 265           if (cy == 0)
 266             tp[qn] = qh;
 267           else if (UNLIKELY (qh != 0))
 268             {
 269               /* This happens only when the quotient is close to B^n and
 270                  mpn_*_divappr_q returned B^n.  */
 271               mp_size_t i, n;
 272               n = new_nn - (qn + 1);
 273               for (i = 0; i < n; i++)
 274                 tp[i] = GMP_NUMB_MAX;
 275               qh = 0;           /* currently ignored */
 276             }
 277         }
 278       else  /* divisor is already normalised */
 279         {
 280           MPN_COPY (new_np, np + nn - new_nn, new_nn); /* pointless if MU will be used */
 281
 282           new_dp = (mp_ptr) dp + dn - (qn + 1);
 283
 284           if (qn == 2 - 1)
 285             {
 286               qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
 287             }
 288           else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1))
 289             {
 290               invert_pi1 (dinv, dh, new_dp[qn - 1]);
 291               qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32);
 292             }
 293           else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1))
 294             {
 295               invert_pi1 (dinv, dh, new_dp[qn - 1]);
 296               qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv);
 297             }
 298           else
 299             {
 300               mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0);
 301               mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
 302               qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch);
 303             }
 304           tp[qn] = qh;
 305         }
 306
 307       MPN_COPY (qp, tp + 1, qn);
 308       if (tp[0] <= 4)
 309         {
 310           mp_size_t rn;
 311
 312           rp = TMP_ALLOC_LIMBS (dn + qn);
 313           mpn_mul (rp, dp, dn, tp + 1, qn);
 314           rn = dn + qn;
 315           rn -= rp[rn - 1] == 0;
 316
 317           if (rn > nn || mpn_cmp (np, rp, nn) < 0)
 318             mpn_decr_u (qp, 1);
 319         }
 320     }
 321
 322   TMP_FREE;
 323 }