3 Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
5 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
6 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7 GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
9 Copyright 2005-2007, 2009, 2010, 2013 Free Software Foundation, Inc.
11 This file is part of the GNU MP Library.
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of either:
16 * the GNU Lesser General Public License as published by the Free
17 Software Foundation; either version 3 of the License, or (at your
18 option) any later version.
22 * the GNU General Public License as published by the Free Software
23 Foundation; either version 2 of the License, or (at your option) any
26 or both in parallel, as here.
28 The GNU MP Library is distributed in the hope that it will be useful, but
29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
33 You should have received copies of the GNU General Public License and the
34 GNU Lesser General Public License along with the GNU MP Library. If not,
35 see https://www.gnu.org/licenses/. */
39 The idea of the algorithm used herein is to compute a smaller inverted value
40 than used in the standard Barrett algorithm, and thus save time in the
41 Newton iterations, and pay just a small price when using the inverted value
42 for developing quotient bits. This algorithm was presented at ICMS 2006.
48 1. This is a rudimentary implementation of mpn_mu_div_q. The algorithm is
49 probably close to optimal, except when mpn_mu_divappr_q fails.
51 2. We used to fall back to mpn_mu_div_qr when we detect a possible
52 mpn_mu_divappr_q rounding problem, now we multiply and compare.
53 Unfortunately, since mpn_mu_divappr_q does not return the partial
54 remainder, this also doesn't become optimal. A mpn_mu_divappr_qr could
57 3. The allocations done here should be made from the scratch area, which
58 then would need to be amended.
61 #include <stdlib.h> /* for NULL */
67 mpn_mu_div_q (mp_ptr qp
,
68 mp_srcptr np
, mp_size_t nn
,
69 mp_srcptr dp
, mp_size_t dn
,
81 tp
= TMP_BALLOC_LIMBS (qn
+ 1);
83 if (qn
>= dn
) /* nn >= 2*dn + 1 */
85 /* |_______________________| dividend
88 rp
= TMP_BALLOC_LIMBS (nn
+ 1);
89 MPN_COPY (rp
+ 1, np
, nn
);
92 qh
= mpn_cmp (rp
+ 1 + nn
- dn
, dp
, dn
) >= 0;
94 mpn_sub_n (rp
+ 1 + nn
- dn
, rp
+ 1 + nn
- dn
, dp
, dn
);
96 cy
= mpn_mu_divappr_q (tp
, rp
, nn
+ 1, dp
, dn
, scratch
);
98 if (UNLIKELY (cy
!= 0))
100 /* Since the partial remainder fed to mpn_preinv_mu_divappr_q was
101 canonically reduced, replace the returned value of B^(qn-dn)+eps
102 by the largest possible value. */
104 for (i
= 0; i
< qn
+ 1; i
++)
105 tp
[i
] = GMP_NUMB_MAX
;
108 /* The max error of mpn_mu_divappr_q is +4. If the low quotient limb is
109 smaller than the max error, we cannot trust the quotient. */
112 MPN_COPY (qp
, tp
+ 1, qn
);
120 mpn_mul (pp
, tp
+ 1, qn
, dp
, dn
);
122 cy
= (qh
!= 0) ? mpn_add_n (pp
+ qn
, pp
+ qn
, dp
, dn
) : 0;
124 if (cy
|| mpn_cmp (pp
, np
, nn
) > 0) /* At most is wrong by one, no cycle. */
125 qh
-= mpn_sub_1 (qp
, tp
+ 1, qn
, 1);
126 else /* Same as above */
127 MPN_COPY (qp
, tp
+ 1, qn
);
132 /* |_______________________| dividend
133 |________________| divisor */
135 /* FIXME: When nn = 2dn-1, qn becomes dn-1, and the numerator size passed
136 here becomes 2dn, i.e., more than nn. This shouldn't hurt, since only
137 the most significant dn-1 limbs will actually be read, but it is not
140 qh
= mpn_mu_divappr_q (tp
, np
+ nn
- (2 * qn
+ 2), 2 * qn
+ 2,
141 dp
+ dn
- (qn
+ 1), qn
+ 1, scratch
);
143 /* The max error of mpn_mu_divappr_q is +4, but we get an additional
144 error from the divisor truncation. */
147 MPN_COPY (qp
, tp
+ 1, qn
);
153 /* FIXME: a shorter product should be enough; we may use already
154 allocated space... */
155 rp
= TMP_BALLOC_LIMBS (nn
);
156 mpn_mul (rp
, dp
, dn
, tp
+ 1, qn
);
158 cy
= (qh
!= 0) ? mpn_add_n (rp
+ qn
, rp
+ qn
, dp
, dn
) : 0;
160 if (cy
|| mpn_cmp (rp
, np
, nn
) > 0) /* At most is wrong by one, no cycle. */
161 qh
-= mpn_sub_1 (qp
, tp
+ 1, qn
, 1);
162 else /* Same as above */
163 MPN_COPY (qp
, tp
+ 1, qn
);
172 mpn_mu_div_q_itch (mp_size_t nn
, mp_size_t dn
, int mua_k
)
179 return mpn_mu_divappr_q_itch (nn
+ 1, dn
, mua_k
);
183 return mpn_mu_divappr_q_itch (2 * qn
+ 2, qn
+ 1, mua_k
);