source/libs/mpfr/mpfr-src/src/mulders.c

   1 /* Mulders' MulHigh function (short product)
   2
   3 Copyright 2005-2016 Free Software Foundation, Inc.
   4 Contributed by the AriC and Caramba projects, INRIA.
   5
   6 This file is part of the GNU MPFR Library.
   7
   8 The GNU MPFR Library is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU Lesser General Public License as published by
  10 the Free Software Foundation; either version 3 of the License, or (at your
  11 option) any later version.
  12
  13 The GNU MPFR Library is distributed in the hope that it will be useful, but
  14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  16 License for more details.
  17
  18 You should have received a copy of the GNU Lesser General Public License
  19 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
  20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
  21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
  22
  23 /* References:
  24    [1] Short Division of Long Integers, David Harvey and Paul Zimmermann,
  25        Proceedings of the 20th Symposium on Computer Arithmetic (ARITH-20),
  26        July 25-27, 2011, pages 7-14.
  27 */
  28
  29 #define MPFR_NEED_LONGLONG_H
  30 #include "mpfr-impl.h"
  31
  32 #ifndef MUL_FFT_THRESHOLD
  33 #define MUL_FFT_THRESHOLD 8448
  34 #endif
  35
  36 /* Don't use MPFR_MULHIGH_SIZE since it is handled by tuneup */
  37 #ifdef MPFR_MULHIGH_TAB_SIZE
  38 static short mulhigh_ktab[MPFR_MULHIGH_TAB_SIZE];
  39 #else
  40 static short mulhigh_ktab[] = {MPFR_MULHIGH_TAB};
  41 #define MPFR_MULHIGH_TAB_SIZE \
  42   ((mp_size_t) (sizeof(mulhigh_ktab) / sizeof(mulhigh_ktab[0])))
  43 #endif
  44
  45 /* Put in  rp[n..2n-1] an approximation of the n high limbs
  46    of {up, n} * {vp, n}. The error is less than n ulps of rp[n] (and the
  47    approximation is always less or equal to the truncated full product).
  48    Assume 2n limbs are allocated at rp.
  49
  50    Implements Algorithm ShortMulNaive from [1].
  51 */
  52 static void
  53 mpfr_mulhigh_n_basecase (mpfr_limb_ptr rp, mpfr_limb_srcptr up,
  54                          mpfr_limb_srcptr vp, mp_size_t n)
  55 {
  56   mp_size_t i;
  57
  58   rp += n - 1;
  59   umul_ppmm (rp[1], rp[0], up[n-1], vp[0]); /* we neglect up[0..n-2]*vp[0],
  60                                                which is less than B^n */
  61   for (i = 1 ; i < n ; i++)
  62     /* here, we neglect up[0..n-i-2] * vp[i], which is less than B^n too */
  63     rp[i + 1] = mpn_addmul_1 (rp, up + (n - i - 1), i + 1, vp[i]);
  64   /* in total, we neglect less than n*B^n, i.e., n ulps of rp[n]. */
  65 }
  66
  67 /* Put in  rp[0..n] the n+1 low limbs of {up, n} * {vp, n}.
  68    Assume 2n limbs are allocated at rp. */
  69 static void
  70 mpfr_mullow_n_basecase (mpfr_limb_ptr rp, mpfr_limb_srcptr up,
  71                         mpfr_limb_srcptr vp, mp_size_t n)
  72 {
  73   mp_size_t i;
  74
  75   rp[n] = mpn_mul_1 (rp, up, n, vp[0]);
  76   for (i = 1 ; i < n ; i++)
  77     mpn_addmul_1 (rp + i, up, n - i + 1, vp[i]);
  78 }
  79
  80 /* Put in  rp[n..2n-1] an approximation of the n high limbs
  81    of {np, n} * {mp, n}. The error is less than n ulps of rp[n] (and the
  82    approximation is always less or equal to the truncated full product).
  83
  84    Implements Algorithm ShortMul from [1].
  85 */
  86 void
  87 mpfr_mulhigh_n (mpfr_limb_ptr rp, mpfr_limb_srcptr np, mpfr_limb_srcptr mp,
  88                 mp_size_t n)
  89 {
  90   mp_size_t k;
  91
  92   MPFR_ASSERTN (MPFR_MULHIGH_TAB_SIZE >= 8); /* so that 3*(n/4) > n/2 */
  93   k = MPFR_LIKELY (n < MPFR_MULHIGH_TAB_SIZE) ? mulhigh_ktab[n] : 3*(n/4);
  94   /* Algorithm ShortMul from [1] requires k >= (n+3)/2, which translates
  95      into k >= (n+4)/2 in the C language. */
  96   MPFR_ASSERTD (k == -1 || k == 0 || (k >= (n+4)/2 && k < n));
  97   if (k < 0)
  98     mpn_mul_basecase (rp, np, n, mp, n); /* result is exact, no error */
  99   else if (k == 0)
 100     mpfr_mulhigh_n_basecase (rp, np, mp, n); /* basecase error < n ulps */
 101   else if (n > MUL_FFT_THRESHOLD)
 102     mpn_mul_n (rp, np, mp, n); /* result is exact, no error */
 103   else
 104     {
 105       mp_size_t l = n - k;
 106       mp_limb_t cy;
 107
 108       mpn_mul_n (rp + 2 * l, np + l, mp + l, k); /* fills rp[2l..2n-1] */
 109       mpfr_mulhigh_n (rp, np + k, mp, l);        /* fills rp[l-1..2l-1] */
 110       cy = mpn_add_n (rp + n - 1, rp + n - 1, rp + l - 1, l + 1);
 111       mpfr_mulhigh_n (rp, np, mp + k, l);        /* fills rp[l-1..2l-1] */
 112       cy += mpn_add_n (rp + n - 1, rp + n - 1, rp + l - 1, l + 1);
 113       mpn_add_1 (rp + n + l, rp + n + l, k, cy); /* propagate carry */
 114     }
 115 }
 116
 117 /* Put in  rp[0..n] the n+1 low limbs of {np, n} * {mp, n}.
 118    Assume 2n limbs are allocated at rp. */
 119 void
 120 mpfr_mullow_n (mpfr_limb_ptr rp, mpfr_limb_srcptr np, mpfr_limb_srcptr mp,
 121                mp_size_t n)
 122 {
 123   mp_size_t k;
 124
 125   MPFR_ASSERTN (MPFR_MULHIGH_TAB_SIZE >= 8); /* so that 3*(n/4) > n/2 */
 126   k = MPFR_LIKELY (n < MPFR_MULHIGH_TAB_SIZE) ? mulhigh_ktab[n] : 3*(n/4);
 127   MPFR_ASSERTD (k == -1 || k == 0 || (2 * k >= n && k < n));
 128   if (k < 0)
 129     mpn_mul_basecase (rp, np, n, mp, n);
 130   else if (k == 0)
 131     mpfr_mullow_n_basecase (rp, np, mp, n);
 132   else if (n > MUL_FFT_THRESHOLD)
 133     mpn_mul_n (rp, np, mp, n);
 134   else
 135     {
 136       mp_size_t l = n - k;
 137
 138       mpn_mul_n (rp, np, mp, k);                      /* fills rp[0..2k] */
 139       mpfr_mullow_n (rp + n, np + k, mp, l);          /* fills rp[n..n+2l] */
 140       mpn_add_n (rp + k, rp + k, rp + n, l + 1);
 141       mpfr_mullow_n (rp + n, np, mp + k, l);          /* fills rp[n..n+2l] */
 142       mpn_add_n (rp + k, rp + k, rp + n, l + 1);
 143     }
 144 }
 145
 146 #ifdef MPFR_SQRHIGH_TAB_SIZE
 147 static short sqrhigh_ktab[MPFR_SQRHIGH_TAB_SIZE];
 148 #else
 149 static short sqrhigh_ktab[] = {MPFR_SQRHIGH_TAB};
 150 #define MPFR_SQRHIGH_TAB_SIZE (sizeof(sqrhigh_ktab) / sizeof(sqrhigh_ktab[0]))
 151 #endif
 152
 153 /* Put in  rp[n..2n-1] an approximation of the n high limbs
 154    of {np, n}^2. The error is less than n ulps of rp[n]. */
 155 void
 156 mpfr_sqrhigh_n (mpfr_limb_ptr rp, mpfr_limb_srcptr np, mp_size_t n)
 157 {
 158   mp_size_t k;
 159
 160   MPFR_ASSERTN (MPFR_SQRHIGH_TAB_SIZE > 2); /* ensures k < n */
 161   k = MPFR_LIKELY (n < MPFR_SQRHIGH_TAB_SIZE) ? sqrhigh_ktab[n]
 162     : (n+4)/2; /* ensures that k >= (n+3)/2 */
 163   MPFR_ASSERTD (k == -1 || k == 0 || (k >= (n+4)/2 && k < n));
 164   if (k < 0)
 165     /* we can't use mpn_sqr_basecase here, since it requires
 166        n <= SQR_KARATSUBA_THRESHOLD, where SQR_KARATSUBA_THRESHOLD
 167        is not exported by GMP */
 168     mpn_sqr_n (rp, np, n);
 169   else if (k == 0)
 170     mpfr_mulhigh_n_basecase (rp, np, np, n);
 171   else
 172     {
 173       mp_size_t l = n - k;
 174       mp_limb_t cy;
 175
 176       mpn_sqr_n (rp + 2 * l, np + l, k);          /* fills rp[2l..2n-1] */
 177       mpfr_mulhigh_n (rp, np, np + k, l);         /* fills rp[l-1..2l-1] */
 178       /* {rp+n-1,l+1} += 2 * {rp+l-1,l+1} */
 179       cy = mpn_lshift (rp + l - 1, rp + l - 1, l + 1, 1);
 180       cy += mpn_add_n (rp + n - 1, rp + n - 1, rp + l - 1, l + 1);
 181       mpn_add_1 (rp + n + l, rp + n + l, k, cy); /* propagate carry */
 182     }
 183 }
 184
 185 #ifdef MPFR_DIVHIGH_TAB_SIZE
 186 static short divhigh_ktab[MPFR_DIVHIGH_TAB_SIZE];
 187 #else
 188 static short divhigh_ktab[] = {MPFR_DIVHIGH_TAB};
 189 #define MPFR_DIVHIGH_TAB_SIZE (sizeof(divhigh_ktab) / sizeof(divhigh_ktab[0]))
 190 #endif
 191
 192 #ifndef __GMPFR_GMP_H__
 193 #define mpfr_pi1_t gmp_pi1_t /* with a GMP build */
 194 #endif
 195
 196 #if !(defined(WANT_GMP_INTERNALS) && defined(HAVE___GMPN_SBPI1_DIVAPPR_Q))
 197 /* Put in Q={qp, n} an approximation of N={np, 2*n} divided by D={dp, n},
 198    with the most significant limb of the quotient as return value (0 or 1).
 199    Assumes the most significant bit of D is set. Clobbers N.
 200
 201    The approximate quotient Q satisfies - 2(n-1) < N/D - Q <= 4.
 202 */
 203 static mp_limb_t
 204 mpfr_divhigh_n_basecase (mpfr_limb_ptr qp, mpfr_limb_ptr np,
 205                          mpfr_limb_srcptr dp, mp_size_t n)
 206 {
 207   mp_limb_t qh, d1, d0, dinv, q2, q1, q0;
 208   mpfr_pi1_t dinv2;
 209
 210   np += n;
 211
 212   if ((qh = (mpn_cmp (np, dp, n) >= 0)))
 213     mpn_sub_n (np, np, dp, n);
 214
 215   /* now {np, n} is less than D={dp, n}, which implies np[n-1] <= dp[n-1] */
 216
 217   d1 = dp[n - 1];
 218
 219   if (n == 1)
 220     {
 221       invert_limb (dinv, d1);
 222       umul_ppmm (q1, q0, np[0], dinv);
 223       qp[0] = np[0] + q1;
 224       return qh;
 225     }
 226
 227   /* now n >= 2 */
 228   d0 = dp[n - 2];
 229   invert_pi1 (dinv2, d1, d0);
 230   /* dinv2.inv32 = floor ((B^3 - 1) / (d0 + d1 B)) - B */
 231   while (n > 1)
 232     {
 233       /* Invariant: it remains to reduce n limbs from N (in addition to the
 234          initial low n limbs).
 235          Since n >= 2 here, necessarily we had n >= 2 initially, which means
 236          that in addition to the limb np[n-1] to reduce, we have at least 2
 237          extra limbs, thus accessing np[n-3] is valid. */
 238
 239       /* Warning: we can have np[n-1]>d1 or (np[n-1]=d1 and np[n-2]>=d0) here,
 240          since we truncate the divisor at each step, but since {np,n} < D
 241          originally, the largest possible partial quotient is B-1. */
 242       if (MPFR_UNLIKELY(np[n-1] > d1 || (np[n-1] == d1 && np[n-2] >= d0)))
 243         q2 = ~ (mp_limb_t) 0;
 244       else
 245         udiv_qr_3by2 (q2, q1, q0, np[n - 1], np[n - 2], np[n - 3],
 246                       d1, d0, dinv2.inv32);
 247       /* since q2 = floor((np[n-1]*B^2+np[n-2]*B+np[n-3])/(d1*B+d0)),
 248          we have q2 <= (np[n-1]*B^2+np[n-2]*B+np[n-3])/(d1*B+d0),
 249          thus np[n-1]*B^2+np[n-2]*B+np[n-3] >= q2*(d1*B+d0)
 250          and {np-1, n} >= q2*D - q2*B^(n-2) >= q2*D - B^(n-1)
 251          thus {np-1, n} - (q2-1)*D >= D - B^(n-1) >= 0
 252          which proves that at most one correction is needed */
 253       q0 = mpn_submul_1 (np - 1, dp, n, q2);
 254       if (MPFR_UNLIKELY(q0 > np[n - 1]))
 255         {
 256           mpn_add_n (np - 1, np - 1, dp, n);
 257           q2 --;
 258         }
 259       qp[--n] = q2;
 260       dp ++;
 261     }
 262
 263   /* we have B+dinv2 = floor((B^3-1)/(d1*B+d0)) < B^2/d1
 264      q1 = floor(np[0]*(B+dinv2)/B) <= floor(np[0]*B/d1)
 265         <= floor((np[0]*B+np[1])/d1)
 266      thus q1 is not larger than the true quotient.
 267      q1 > np[0]*(B+dinv2)/B - 1 > np[0]*(B^3-1)/(d1*B+d0)/B - 2
 268      For d1*B+d0 <> B^2/2, we have B+dinv2 = floor(B^3/(d1*B+d0))
 269      thus q1 > np[0]*B^2/(d1*B+d0) - 2, i.e.,
 270      (d1*B+d0)*q1 > np[0]*B^2 - 2*(d1*B+d0)
 271      d1*B*q1 > np[0]*B^2 - 2*d1*B - 2*d0 - d0*q1 >= np[0]*B^2 - 2*d1*B - B^2
 272      thus q1 > np[0]*B/d1 - 2 - B/d1 > np[0]*B/d1 - 4.
 273
 274      For d1*B+d0 = B^2/2, dinv2 = B-1 thus q1 > np[0]*(2B-1)/B - 1 >
 275      np[0]*B/d1 - 2.
 276
 277      In all cases, if q = floor((np[0]*B+np[1])/d1), we have:
 278      q - 4 <= q1 <= q
 279   */
 280   umul_ppmm (q1, q0, np[0], dinv2.inv32);
 281   qp[0] = np[0] + q1;
 282
 283   return qh;
 284 }
 285 #endif
 286
 287 /* Put in {qp, n} an approximation of N={np, 2*n} divided by D={dp, n},
 288    with the most significant limb of the quotient as return value (0 or 1).
 289    Assumes the most significant bit of D is set. Clobbers N.
 290
 291    This implements the ShortDiv algorithm from reference [1].
 292 */
 293 #if 1
 294 mp_limb_t
 295 mpfr_divhigh_n (mpfr_limb_ptr qp, mpfr_limb_ptr np, mpfr_limb_ptr dp,
 296                 mp_size_t n)
 297 {
 298   mp_size_t k, l;
 299   mp_limb_t qh, cy;
 300   mpfr_limb_ptr tp;
 301   MPFR_TMP_DECL(marker);
 302
 303   MPFR_ASSERTN (MPFR_MULHIGH_TAB_SIZE >= 15); /* so that 2*(n/3) >= (n+4)/2 */
 304   k = MPFR_LIKELY (n < MPFR_DIVHIGH_TAB_SIZE) ? divhigh_ktab[n] : 2*(n/3);
 305
 306   if (k == 0)
 307 #if defined(WANT_GMP_INTERNALS) && defined(HAVE___GMPN_SBPI1_DIVAPPR_Q)
 308   {
 309     mpfr_pi1_t dinv2;
 310     invert_pi1 (dinv2, dp[n - 1], dp[n - 2]);
 311     return __gmpn_sbpi1_divappr_q (qp, np, n + n, dp, n, dinv2.inv32);
 312   }
 313 #else /* use our own code for base-case short division */
 314     return mpfr_divhigh_n_basecase (qp, np, dp, n);
 315 #endif
 316   else if (k == n)
 317     /* for k=n, we use a division with remainder (mpn_divrem),
 318      which computes the exact quotient */
 319     return mpn_divrem (qp, 0, np, 2 * n, dp, n);
 320
 321   MPFR_ASSERTD ((n+4)/2 <= k && k < n); /* bounds from [1] */
 322   MPFR_TMP_MARK (marker);
 323   l = n - k;
 324   /* first divide the most significant 2k limbs from N by the most significant
 325      k limbs of D */
 326   qh = mpn_divrem (qp + l, 0, np + 2 * l, 2 * k, dp + l, k); /* exact */
 327
 328   /* it remains {np,2l+k} = {np,n+l} as remainder */
 329
 330   /* now we have to subtract high(Q1)*D0 where Q1=qh*B^k+{qp+l,k} and
 331      D0={dp,l} */
 332   tp = MPFR_TMP_LIMBS_ALLOC (2 * l);
 333   mpfr_mulhigh_n (tp, qp + k, dp, l);
 334   /* we are only interested in the upper l limbs from {tp,2l} */
 335   cy = mpn_sub_n (np + n, np + n, tp + l, l);
 336   if (qh)
 337     cy += mpn_sub_n (np + n, np + n, dp, l);
 338   while (cy > 0) /* Q1 was too large: subtract 1 to Q1 and add D to np+l */
 339     {
 340       qh -= mpn_sub_1 (qp + l, qp + l, k, MPFR_LIMB_ONE);
 341       cy -= mpn_add_n (np + l, np + l, dp, n);
 342     }
 343
 344   /* now it remains {np,n+l} to divide by D */
 345   cy = mpfr_divhigh_n (qp, np + k, dp + k, l);
 346   qh += mpn_add_1 (qp + l, qp + l, k, cy);
 347   MPFR_TMP_FREE(marker);
 348
 349   return qh;
 350 }
 351 #else /* below is the FoldDiv(K) algorithm from [1] */
 352 mp_limb_t
 353 mpfr_divhigh_n (mpfr_limb_ptr qp, mpfr_limb_ptr np, mpfr_limb_ptr dp,
 354                 mp_size_t n)
 355 {
 356   mp_size_t k, r;
 357   mpfr_limb_ptr ip, tp, up;
 358   mp_limb_t qh = 0, cy, cc;
 359   int count;
 360   MPFR_TMP_DECL(marker);
 361
 362 #define K 3
 363   if (n < K)
 364     return mpn_divrem (qp, 0, np, 2 * n, dp, n);
 365
 366   k = (n - 1) / K + 1; /* ceil(n/K) */
 367
 368   MPFR_TMP_MARK (marker);
 369   ip = MPFR_TMP_LIMBS_ALLOC (k + 1);
 370   tp = MPFR_TMP_LIMBS_ALLOC (n + k);
 371   up = MPFR_TMP_LIMBS_ALLOC (2 * (k + 1));
 372   mpn_invert (ip, dp + n - (k + 1), k + 1, NULL); /* takes about 13% for n=1000 */
 373   /* {ip, k+1} = floor((B^(2k+2)-1)/D - B^(k+1) where D = {dp+n-(k+1),k+1} */
 374   for (r = n, cc = 0UL; r > 0;)
 375     {
 376       /* cc is the carry at np[n+r] */
 377       MPFR_ASSERTD(cc <= 1);
 378       /* FIXME: why can we have cc as large as say 8? */
 379       count = 0;
 380       while (cc > 0)
 381         {
 382           count ++;
 383           MPFR_ASSERTD(count <= 1);
 384           /* subtract {dp+n-r,r} from {np+n,r} */
 385           cc -= mpn_sub_n (np + n, np + n, dp + n - r, r);
 386           /* add 1 at qp[r] */
 387           qh += mpn_add_1 (qp + r, qp + r, n - r, 1UL);
 388         }
 389       /* it remains r limbs to reduce, i.e., the remainder is {np, n+r} */
 390       if (r < k)
 391         {
 392           ip += k - r;
 393           k = r;
 394         }
 395       /* now r >= k */
 396       /* qp + r - 2 * k -> up */
 397       mpfr_mulhigh_n (up, np + n + r - (k + 1), ip, k + 1);
 398       /* take into account the term B^k in the inverse: B^k * {np+n+r-k, k} */
 399       cy = mpn_add_n (qp + r - k, up + k + 2, np + n + r - k, k);
 400       /* since we need only r limbs of tp (below), it suffices to consider
 401          r high limbs of dp */
 402       if (r > k)
 403         {
 404 #if 0
 405           mpn_mul (tp, dp + n - r, r, qp + r - k, k);
 406 #else  /* use a short product for the low k x k limbs */
 407           /* we know the upper k limbs of the r-limb product cancel with the
 408              remainder, thus we only need to compute the low r-k limbs */
 409           if (r - k >= k)
 410             mpn_mul (tp + k, dp + n - r + k, r - k, qp + r - k, k);
 411           else /* r-k < k */
 412             {
 413 /* #define LOW */
 414 #ifndef LOW
 415               mpn_mul (tp + k, qp + r - k, k, dp + n - r + k, r - k);
 416 #else
 417               mpfr_mullow_n_basecase (tp + k, qp + r - k, dp + n - r + k, r - k);
 418               /* take into account qp[2r-2k] * dp[n - r + k] */
 419               tp[r] += qp[2*r-2*k] * dp[n - r + k];
 420 #endif
 421               /* tp[k..r] is filled */
 422             }
 423 #if 0
 424           mpfr_mulhigh_n (up, dp + n - r, qp + r - k, k);
 425 #else /* compute one more limb. FIXME: we could add one limb of dp in the
 426          above, to save one mpn_addmul_1 call */
 427           mpfr_mulhigh_n (up, dp + n - r, qp + r - k, k - 1); /* {up,2k-2} */
 428           /* add {qp + r - k, k - 1} * dp[n-r+k-1] */
 429           up[2*k-2] = mpn_addmul_1 (up + k - 1, qp + r - k, k-1, dp[n-r+k-1]);
 430           /* add {dp+n-r, k} * qp[r-1] */
 431           up[2*k-1] = mpn_addmul_1 (up + k - 1, dp + n - r, k, qp[r-1]);
 432 #endif
 433 #ifndef LOW
 434           cc = mpn_add_n (tp + k, tp + k, up + k, k);
 435           mpn_add_1 (tp + 2 * k, tp + 2 * k, r - k, cc);
 436 #else
 437           /* update tp[k..r] */
 438           if (r - k + 1 <= k)
 439             mpn_add_n (tp + k, tp + k, up + k, r - k + 1);
 440           else /* r - k >= k */
 441             {
 442               cc = mpn_add_n (tp + k, tp + k, up + k, k);
 443               mpn_add_1 (tp + 2 * k, tp + 2 * k, r - 2 * k + 1, cc);
 444             }
 445 #endif
 446 #endif
 447         }
 448       else /* last step: since we only want the quotient, no need to update,
 449               just propagate the carry cy */
 450         {
 451           MPFR_ASSERTD(r < n);
 452           if (cy > 0)
 453             qh += mpn_add_1 (qp + r, qp + r, n - r, cy);
 454           break;
 455         }
 456       /* subtract {tp, n+k} from {np+r-k, n+k}; however we only want to
 457          update {np+n, n} */
 458       /* we should have tp[r] = np[n+r-k] up to 1 */
 459       MPFR_ASSERTD(tp[r] == np[n + r - k] || tp[r] + 1 == np[n + r - k]);
 460 #ifndef LOW
 461       cc = mpn_sub_n (np + n - 1, np + n - 1, tp + k - 1, r + 1); /* borrow at np[n+r] */
 462 #else
 463       cc = mpn_sub_n (np + n - 1, np + n - 1, tp + k - 1, r - k + 2);
 464 #endif
 465       /* if cy = 1, subtract {dp, n} from {np+r, n}, thus
 466          {dp+n-r,r} from {np+n,r} */
 467       if (cy)
 468         {
 469           if (r < n)
 470             cc += mpn_sub_n (np + n - 1, np + n - 1, dp + n - r - 1, r + 1);
 471           else
 472             cc += mpn_sub_n (np + n, np + n, dp + n - r, r);
 473           /* propagate cy */
 474           if (r == n)
 475             qh = cy;
 476           else
 477             qh += mpn_add_1 (qp + r, qp + r, n - r, cy);
 478         }
 479       /* cc is the borrow at np[n+r] */
 480       count = 0;
 481       while (cc > 0) /* quotient was too large */
 482         {
 483           count++;
 484           MPFR_ASSERTD (count <= 1);
 485           cy = mpn_add_n (np + n, np + n, dp + n - (r - k), r - k);
 486           cc -= mpn_add_1 (np + n + r - k, np + n + r - k, k, cy);
 487           qh -= mpn_sub_1 (qp + r - k, qp + r - k, n - (r - k), 1UL);
 488         }
 489       r -= k;
 490       cc = np[n + r];
 491     }
 492   MPFR_TMP_FREE(marker);
 493
 494   return qh;
 495 }
 496 #endif