beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / generic / div_q.c
bloba4fe0e369e51ee42adc8300946b0dff92ea5a2c4
1 /* mpn_div_q -- division for arbitrary size operands.
3 Contributed to the GNU project by Torbjorn Granlund.
5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
9 Copyright 2009, 2010 Free Software Foundation, Inc.
11 This file is part of the GNU MP Library.
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of either:
16 * the GNU Lesser General Public License as published by the Free
17 Software Foundation; either version 3 of the License, or (at your
18 option) any later version.
22 * the GNU General Public License as published by the Free Software
23 Foundation; either version 2 of the License, or (at your option) any
24 later version.
26 or both in parallel, as here.
28 The GNU MP Library is distributed in the hope that it will be useful, but
29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
31 for more details.
33 You should have received copies of the GNU General Public License and the
34 GNU Lesser General Public License along with the GNU MP Library. If not,
35 see https://www.gnu.org/licenses/. */
37 #include "gmp.h"
38 #include "gmp-impl.h"
39 #include "longlong.h"
42 /* Compute Q = N/D with truncation.
43 N = {np,nn}
44 D = {dp,dn}
45 Q = {qp,nn-dn+1}
46 T = {scratch,nn+1} is scratch space
47 N and D are both untouched by the computation.
48 N and T may overlap; pass the same space if N is irrelevant after the call,
49 but note that tp needs an extra limb.
51 Operand requirements:
52 N >= D > 0
53 dp[dn-1] != 0
54 No overlap between the N, D, and Q areas.
56 This division function does not clobber its input operands, since it is
57 intended to support average-O(qn) division, and for that to be effective, it
58 cannot put requirements on callers to copy a O(nn) operand.
60 If a caller does not care about the value of {np,nn+1} after calling this
61 function, it should pass np also for the scratch argument. This function
62 will then save some time and space by avoiding allocation and copying.
63 (FIXME: Is this a good design? We only really save any copying for
64 already-normalised divisors, which should be rare. It also prevents us from
65 reasonably asking for all scratch space we need.)
67 We write nn-dn+1 limbs for the quotient, but return void. Why not return
68 the most significant quotient limb? Look at the 4 main code blocks below
69 (consisting of an outer if-else where each arm contains an if-else). It is
70 tricky for the first code block, since the mpn_*_div_q calls will typically
71 generate all nn-dn+1 and return 0 or 1. I don't see how to fix that unless
72 we generate the most significant quotient limb here, before calling
73 mpn_*_div_q, or put the quotient in a temporary area. Since this is a
74 critical division case (the SB sub-case in particular) copying is not a good
75 idea.
77 It might make sense to split the if-else parts of the (qn + FUDGE
78 >= dn) blocks into separate functions, since we could promise quite
79 different things to callers in these two cases. The 'then' case
80 benefits from np=scratch, and it could perhaps even tolerate qp=np,
81 saving some headache for many callers.
83 FIXME: Scratch allocation leaves a lot to be desired. E.g., for the MU size
84 operands, we do not reuse the huge scratch for adjustments. This can be a
85 serious waste of memory for the largest operands.
88 /* FUDGE determines when to try getting an approximate quotient from the upper
89 parts of the dividend and divisor, then adjust. N.B. FUDGE must be >= 2
90 for the code to be correct. */
91 #define FUDGE 5 /* FIXME: tune this */
93 #define DC_DIV_Q_THRESHOLD DC_DIVAPPR_Q_THRESHOLD
94 #define MU_DIV_Q_THRESHOLD MU_DIVAPPR_Q_THRESHOLD
95 #define MUPI_DIV_Q_THRESHOLD MUPI_DIVAPPR_Q_THRESHOLD
96 #ifndef MUPI_DIVAPPR_Q_THRESHOLD
97 #define MUPI_DIVAPPR_Q_THRESHOLD MUPI_DIV_QR_THRESHOLD
98 #endif
100 void
101 mpn_div_q (mp_ptr qp,
102 mp_srcptr np, mp_size_t nn,
103 mp_srcptr dp, mp_size_t dn, mp_ptr scratch)
105 mp_ptr new_dp, new_np, tp, rp;
106 mp_limb_t cy, dh, qh;
107 mp_size_t new_nn, qn;
108 gmp_pi1_t dinv;
109 int cnt;
110 TMP_DECL;
111 TMP_MARK;
113 ASSERT (nn >= dn);
114 ASSERT (dn > 0);
115 ASSERT (dp[dn - 1] != 0);
116 ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, np, nn));
117 ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, dp, dn));
118 ASSERT (MPN_SAME_OR_SEPARATE_P (np, scratch, nn));
120 ASSERT_ALWAYS (FUDGE >= 2);
122 if (dn == 1)
124 mpn_divrem_1 (qp, 0L, np, nn, dp[dn - 1]);
125 return;
128 qn = nn - dn + 1; /* Quotient size, high limb might be zero */
130 if (qn + FUDGE >= dn)
132 /* |________________________|
133 |_______| */
134 new_np = scratch;
136 dh = dp[dn - 1];
137 if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
139 count_leading_zeros (cnt, dh);
141 cy = mpn_lshift (new_np, np, nn, cnt);
142 new_np[nn] = cy;
143 new_nn = nn + (cy != 0);
145 new_dp = TMP_ALLOC_LIMBS (dn);
146 mpn_lshift (new_dp, dp, dn, cnt);
148 if (dn == 2)
150 qh = mpn_divrem_2 (qp, 0L, new_np, new_nn, new_dp);
152 else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
153 BELOW_THRESHOLD (new_nn - dn, DC_DIV_Q_THRESHOLD))
155 invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);
156 qh = mpn_sbpi1_div_q (qp, new_np, new_nn, new_dp, dn, dinv.inv32);
158 else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) || /* fast condition */
159 BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */
160 (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */
161 + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn) /* ...condition */
163 invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);
164 qh = mpn_dcpi1_div_q (qp, new_np, new_nn, new_dp, dn, &dinv);
166 else
168 mp_size_t itch = mpn_mu_div_q_itch (new_nn, dn, 0);
169 mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
170 qh = mpn_mu_div_q (qp, new_np, new_nn, new_dp, dn, scratch);
172 if (cy == 0)
173 qp[qn - 1] = qh;
174 else if (UNLIKELY (qh != 0))
176 /* This happens only when the quotient is close to B^n and
177 mpn_*_divappr_q returned B^n. */
178 mp_size_t i, n;
179 n = new_nn - dn;
180 for (i = 0; i < n; i++)
181 qp[i] = GMP_NUMB_MAX;
182 qh = 0; /* currently ignored */
185 else /* divisor is already normalised */
187 if (new_np != np)
188 MPN_COPY (new_np, np, nn);
190 if (dn == 2)
192 qh = mpn_divrem_2 (qp, 0L, new_np, nn, dp);
194 else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
195 BELOW_THRESHOLD (nn - dn, DC_DIV_Q_THRESHOLD))
197 invert_pi1 (dinv, dh, dp[dn - 2]);
198 qh = mpn_sbpi1_div_q (qp, new_np, nn, dp, dn, dinv.inv32);
200 else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) || /* fast condition */
201 BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */
202 (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */
203 + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn) /* ...condition */
205 invert_pi1 (dinv, dh, dp[dn - 2]);
206 qh = mpn_dcpi1_div_q (qp, new_np, nn, dp, dn, &dinv);
208 else
210 mp_size_t itch = mpn_mu_div_q_itch (nn, dn, 0);
211 mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
212 qh = mpn_mu_div_q (qp, np, nn, dp, dn, scratch);
214 qp[nn - dn] = qh;
217 else
219 /* |________________________|
220 |_________________| */
221 tp = TMP_ALLOC_LIMBS (qn + 1);
223 new_np = scratch;
224 new_nn = 2 * qn + 1;
225 if (new_np == np)
226 /* We need {np,nn} to remain untouched until the final adjustment, so
227 we need to allocate separate space for new_np. */
228 new_np = TMP_ALLOC_LIMBS (new_nn + 1);
231 dh = dp[dn - 1];
232 if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
234 count_leading_zeros (cnt, dh);
236 cy = mpn_lshift (new_np, np + nn - new_nn, new_nn, cnt);
237 new_np[new_nn] = cy;
239 new_nn += (cy != 0);
241 new_dp = TMP_ALLOC_LIMBS (qn + 1);
242 mpn_lshift (new_dp, dp + dn - (qn + 1), qn + 1, cnt);
243 new_dp[0] |= dp[dn - (qn + 1) - 1] >> (GMP_NUMB_BITS - cnt);
245 if (qn + 1 == 2)
247 qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
249 else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1))
251 invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);
252 qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32);
254 else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1))
256 invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);
257 qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv);
259 else
261 mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0);
262 mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
263 qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch);
265 if (cy == 0)
266 tp[qn] = qh;
267 else if (UNLIKELY (qh != 0))
269 /* This happens only when the quotient is close to B^n and
270 mpn_*_divappr_q returned B^n. */
271 mp_size_t i, n;
272 n = new_nn - (qn + 1);
273 for (i = 0; i < n; i++)
274 tp[i] = GMP_NUMB_MAX;
275 qh = 0; /* currently ignored */
278 else /* divisor is already normalised */
280 MPN_COPY (new_np, np + nn - new_nn, new_nn); /* pointless if MU will be used */
282 new_dp = (mp_ptr) dp + dn - (qn + 1);
284 if (qn == 2 - 1)
286 qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
288 else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1))
290 invert_pi1 (dinv, dh, new_dp[qn - 1]);
291 qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32);
293 else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1))
295 invert_pi1 (dinv, dh, new_dp[qn - 1]);
296 qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv);
298 else
300 mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0);
301 mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
302 qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch);
304 tp[qn] = qh;
307 MPN_COPY (qp, tp + 1, qn);
308 if (tp[0] <= 4)
310 mp_size_t rn;
312 rp = TMP_ALLOC_LIMBS (dn + qn);
313 mpn_mul (rp, dp, dn, tp + 1, qn);
314 rn = dn + qn;
315 rn -= rp[rn - 1] == 0;
317 if (rn > nn || mpn_cmp (np, rp, nn) < 0)
318 mpn_decr_u (qp, 1);
322 TMP_FREE;