source/libs/gmp/gmp-src/mpn/generic/dcpi1_divappr_q.c

   1 /* mpn_dcpi1_divappr_q -- divide-and-conquer division, returning approximate
   2    quotient.  The quotient returned is either correct, or one too large.
   3
   4    Contributed to the GNU project by Torbjorn Granlund.
   5
   6    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
   7    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
   8    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
   9
  10 Copyright 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
  11
  12 This file is part of the GNU MP Library.
  13
  14 The GNU MP Library is free software; you can redistribute it and/or modify
  15 it under the terms of either:
  16
  17   * the GNU Lesser General Public License as published by the Free
  18     Software Foundation; either version 3 of the License, or (at your
  19     option) any later version.
  20
  21 or
  22
  23   * the GNU General Public License as published by the Free Software
  24     Foundation; either version 2 of the License, or (at your option) any
  25     later version.
  26
  27 or both in parallel, as here.
  28
  29 The GNU MP Library is distributed in the hope that it will be useful, but
  30 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  31 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  32 for more details.
  33
  34 You should have received copies of the GNU General Public License and the
  35 GNU Lesser General Public License along with the GNU MP Library.  If not,
  36 see https://www.gnu.org/licenses/.  */
  37
  38 #include "gmp.h"
  39 #include "gmp-impl.h"
  40 #include "longlong.h"
  41
  42
  43 mp_limb_t
  44 mpn_dcpi1_divappr_q_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
  45                        gmp_pi1_t *dinv, mp_ptr tp)
  46 {
  47   mp_size_t lo, hi;
  48   mp_limb_t cy, qh, ql;
  49
  50   lo = n >> 1;                  /* floor(n/2) */
  51   hi = n - lo;                  /* ceil(n/2) */
  52
  53   if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD))
  54     qh = mpn_sbpi1_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dinv->inv32);
  55   else
  56     qh = mpn_dcpi1_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dinv, tp);
  57
  58   mpn_mul (tp, qp + lo, hi, dp, lo);
  59
  60   cy = mpn_sub_n (np + lo, np + lo, tp, n);
  61   if (qh != 0)
  62     cy += mpn_sub_n (np + n, np + n, dp, lo);
  63
  64   while (cy != 0)
  65     {
  66       qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1);
  67       cy -= mpn_add_n (np + lo, np + lo, dp, n);
  68     }
  69
  70   if (BELOW_THRESHOLD (lo, DC_DIVAPPR_Q_THRESHOLD))
  71     ql = mpn_sbpi1_divappr_q (qp, np + hi, 2 * lo, dp + hi, lo, dinv->inv32);
  72   else
  73     ql = mpn_dcpi1_divappr_q_n (qp, np + hi, dp + hi, lo, dinv, tp);
  74
  75   if (UNLIKELY (ql != 0))
  76     {
  77       mp_size_t i;
  78       for (i = 0; i < lo; i++)
  79         qp[i] = GMP_NUMB_MASK;
  80     }
  81
  82   return qh;
  83 }
  84
  85 mp_limb_t
  86 mpn_dcpi1_divappr_q (mp_ptr qp, mp_ptr np, mp_size_t nn,
  87                      mp_srcptr dp, mp_size_t dn, gmp_pi1_t *dinv)
  88 {
  89   mp_size_t qn;
  90   mp_limb_t qh, cy, qsave;
  91   mp_ptr tp;
  92   TMP_DECL;
  93
  94   TMP_MARK;
  95
  96   ASSERT (dn >= 6);
  97   ASSERT (nn > dn);
  98   ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
  99
 100   qn = nn - dn;
 101   qp += qn;
 102   np += nn;
 103   dp += dn;
 104
 105   if (qn >= dn)
 106     {
 107       qn++;                     /* pretend we'll need an extra limb */
 108       /* Reduce qn mod dn without division, optimizing small operations.  */
 109       do
 110         qn -= dn;
 111       while (qn > dn);
 112
 113       qp -= qn;                 /* point at low limb of next quotient block */
 114       np -= qn;                 /* point in the middle of partial remainder */
 115
 116       tp = TMP_SALLOC_LIMBS (dn);
 117
 118       /* Perform the typically smaller block first.  */
 119       if (qn == 1)
 120         {
 121           mp_limb_t q, n2, n1, n0, d1, d0;
 122
 123           /* Handle qh up front, for simplicity. */
 124           qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0;
 125           if (qh)
 126             ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn));
 127
 128           /* A single iteration of schoolbook: One 3/2 division,
 129              followed by the bignum update and adjustment. */
 130           n2 = np[0];
 131           n1 = np[-1];
 132           n0 = np[-2];
 133           d1 = dp[-1];
 134           d0 = dp[-2];
 135
 136           ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0));
 137
 138           if (UNLIKELY (n2 == d1) && n1 == d0)
 139             {
 140               q = GMP_NUMB_MASK;
 141               cy = mpn_submul_1 (np - dn, dp - dn, dn, q);
 142               ASSERT (cy == n2);
 143             }
 144           else
 145             {
 146               udiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv->inv32);
 147
 148               if (dn > 2)
 149                 {
 150                   mp_limb_t cy, cy1;
 151                   cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q);
 152
 153                   cy1 = n0 < cy;
 154                   n0 = (n0 - cy) & GMP_NUMB_MASK;
 155                   cy = n1 < cy1;
 156                   n1 = (n1 - cy1) & GMP_NUMB_MASK;
 157                   np[-2] = n0;
 158
 159                   if (UNLIKELY (cy != 0))
 160                     {
 161                       n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1);
 162                       qh -= (q == 0);
 163                       q = (q - 1) & GMP_NUMB_MASK;
 164                     }
 165                 }
 166               else
 167                 np[-2] = n0;
 168
 169               np[-1] = n1;
 170             }
 171           qp[0] = q;
 172         }
 173       else
 174         {
 175           if (qn == 2)
 176             qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2);
 177           else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
 178             qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
 179           else
 180             qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);
 181
 182           if (qn != dn)
 183             {
 184               if (qn > dn - qn)
 185                 mpn_mul (tp, qp, qn, dp - dn, dn - qn);
 186               else
 187                 mpn_mul (tp, dp - dn, dn - qn, qp, qn);
 188
 189               cy = mpn_sub_n (np - dn, np - dn, tp, dn);
 190               if (qh != 0)
 191                 cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
 192
 193               while (cy != 0)
 194                 {
 195                   qh -= mpn_sub_1 (qp, qp, qn, 1);
 196                   cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
 197                 }
 198             }
 199         }
 200       qn = nn - dn - qn + 1;
 201       while (qn > dn)
 202         {
 203           qp -= dn;
 204           np -= dn;
 205           mpn_dcpi1_div_qr_n (qp, np - dn, dp - dn, dn, dinv, tp);
 206           qn -= dn;
 207         }
 208
 209       /* Since we pretended we'd need an extra quotient limb before, we now
 210          have made sure the code above left just dn-1=qn quotient limbs to
 211          develop.  Develop that plus a guard limb. */
 212       qn--;
 213       qp -= qn;
 214       np -= dn;
 215       qsave = qp[qn];
 216       mpn_dcpi1_divappr_q_n (qp, np - dn, dp - dn, dn, dinv, tp);
 217       MPN_COPY_INCR (qp, qp + 1, qn);
 218       qp[qn] = qsave;
 219     }
 220   else    /* (qn < dn) */
 221     {
 222       mp_ptr q2p;
 223 #if 0                           /* not possible since we demand nn > dn */
 224       if (qn == 0)
 225         {
 226           qh = mpn_cmp (np - dn, dp - dn, dn) >= 0;
 227           if (qh)
 228             mpn_sub_n (np - dn, np - dn, dp - dn, dn);
 229           TMP_FREE;
 230           return qh;
 231         }
 232 #endif
 233
 234       qp -= qn;                 /* point at low limb of next quotient block */
 235       np -= qn;                 /* point in the middle of partial remainder */
 236
 237       q2p = TMP_SALLOC_LIMBS (qn + 1);
 238       /* Should we at all check DC_DIVAPPR_Q_THRESHOLD here, or reply on
 239          callers not to be silly?  */
 240       if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD))
 241         {
 242           qh = mpn_sbpi1_divappr_q (q2p, np - qn - 2, 2 * (qn + 1),
 243                                     dp - (qn + 1), qn + 1, dinv->inv32);
 244         }
 245       else
 246         {
 247           /* It is tempting to use qp for recursive scratch and put quotient in
 248              tp, but the recursive scratch needs one limb too many.  */
 249           tp = TMP_SALLOC_LIMBS (qn + 1);
 250           qh = mpn_dcpi1_divappr_q_n (q2p, np - qn - 2, dp - (qn + 1), qn + 1, dinv, tp);
 251         }
 252       MPN_COPY (qp, q2p + 1, qn);
 253     }
 254
 255   TMP_FREE;
 256   return qh;
 257 }