beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / generic / gcd.c
blobb14e1ad888f62ecf5611b031cfd88364d38a31df
1 /* mpn/gcd.c: mpn_gcd for gcd of two odd integers.
3 Copyright 1991, 1993-1998, 2000-2005, 2008, 2010, 2012 Free Software
4 Foundation, Inc.
6 This file is part of the GNU MP Library.
8 The GNU MP Library is free software; you can redistribute it and/or modify
9 it under the terms of either:
11 * the GNU Lesser General Public License as published by the Free
12 Software Foundation; either version 3 of the License, or (at your
13 option) any later version.
17 * the GNU General Public License as published by the Free Software
18 Foundation; either version 2 of the License, or (at your option) any
19 later version.
21 or both in parallel, as here.
23 The GNU MP Library is distributed in the hope that it will be useful, but
24 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
26 for more details.
28 You should have received copies of the GNU General Public License and the
29 GNU Lesser General Public License along with the GNU MP Library. If not,
30 see https://www.gnu.org/licenses/. */
32 #include "gmp.h"
33 #include "gmp-impl.h"
34 #include "longlong.h"
36 /* Uses the HGCD operation described in
38 N. Möller, On Schönhage's algorithm and subquadratic integer gcd
39 computation, Math. Comp. 77 (2008), 589-607.
41 to reduce inputs until they are of size below GCD_DC_THRESHOLD, and
42 then uses Lehmer's algorithm.
45 /* Some reasonable choices are n / 2 (same as in hgcd), and p = (n +
46 * 2)/3, which gives a balanced multiplication in
47 * mpn_hgcd_matrix_adjust. However, p = 2 n/3 gives slightly better
48 * performance. The matrix-vector multiplication is then
49 * 4:1-unbalanced, with matrix elements of size n/6, and vector
50 * elements of size p = 2n/3. */
52 /* From analysis of the theoretical running time, it appears that when
53 * multiplication takes time O(n^alpha), p should be chosen so that
54 * the ratio of the time for the mpn_hgcd call, and the time for the
55 * multiplication in mpn_hgcd_matrix_adjust, is roughly 1/(alpha -
56 * 1). */
57 #ifdef TUNE_GCD_P
58 #define P_TABLE_SIZE 10000
59 mp_size_t p_table[P_TABLE_SIZE];
60 #define CHOOSE_P(n) ( (n) < P_TABLE_SIZE ? p_table[n] : 2*(n)/3)
61 #else
62 #define CHOOSE_P(n) (2*(n) / 3)
63 #endif
65 struct gcd_ctx
67 mp_ptr gp;
68 mp_size_t gn;
71 static void
72 gcd_hook (void *p, mp_srcptr gp, mp_size_t gn,
73 mp_srcptr qp, mp_size_t qn, int d)
75 struct gcd_ctx *ctx = (struct gcd_ctx *) p;
76 MPN_COPY (ctx->gp, gp, gn);
77 ctx->gn = gn;
80 #if GMP_NAIL_BITS > 0
81 /* Nail supports should be easy, replacing the sub_ddmmss with nails
82 * logic. */
83 #error Nails not supported.
84 #endif
86 /* Use binary algorithm to compute G <-- GCD (U, V) for usize, vsize == 2.
87 Both U and V must be odd. */
88 static inline mp_size_t
89 gcd_2 (mp_ptr gp, mp_srcptr up, mp_srcptr vp)
91 mp_limb_t u0, u1, v0, v1;
92 mp_size_t gn;
94 u0 = up[0];
95 u1 = up[1];
96 v0 = vp[0];
97 v1 = vp[1];
99 ASSERT (u0 & 1);
100 ASSERT (v0 & 1);
102 /* Check for u0 != v0 needed to ensure that argument to
103 * count_trailing_zeros is non-zero. */
104 while (u1 != v1 && u0 != v0)
106 unsigned long int r;
107 if (u1 > v1)
109 sub_ddmmss (u1, u0, u1, u0, v1, v0);
110 count_trailing_zeros (r, u0);
111 u0 = ((u1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (u0 >> r);
112 u1 >>= r;
114 else /* u1 < v1. */
116 sub_ddmmss (v1, v0, v1, v0, u1, u0);
117 count_trailing_zeros (r, v0);
118 v0 = ((v1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (v0 >> r);
119 v1 >>= r;
123 gp[0] = u0, gp[1] = u1, gn = 1 + (u1 != 0);
125 /* If U == V == GCD, done. Otherwise, compute GCD (V, |U - V|). */
126 if (u1 == v1 && u0 == v0)
127 return gn;
129 v0 = (u0 == v0) ? ((u1 > v1) ? u1-v1 : v1-u1) : ((u0 > v0) ? u0-v0 : v0-u0);
130 gp[0] = mpn_gcd_1 (gp, gn, v0);
132 return 1;
135 mp_size_t
136 mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t n)
138 mp_size_t talloc;
139 mp_size_t scratch;
140 mp_size_t matrix_scratch;
142 struct gcd_ctx ctx;
143 mp_ptr tp;
144 TMP_DECL;
146 ASSERT (usize >= n);
147 ASSERT (n > 0);
148 ASSERT (vp[n-1] > 0);
150 /* FIXME: Check for small sizes first, before setting up temporary
151 storage etc. */
152 talloc = MPN_GCD_SUBDIV_STEP_ITCH(n);
154 /* For initial division */
155 scratch = usize - n + 1;
156 if (scratch > talloc)
157 talloc = scratch;
159 #if TUNE_GCD_P
160 if (CHOOSE_P (n) > 0)
161 #else
162 if (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
163 #endif
165 mp_size_t hgcd_scratch;
166 mp_size_t update_scratch;
167 mp_size_t p = CHOOSE_P (n);
168 mp_size_t scratch;
169 #if TUNE_GCD_P
170 /* Worst case, since we don't guarantee that n - CHOOSE_P(n)
171 is increasing */
172 matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n);
173 hgcd_scratch = mpn_hgcd_itch (n);
174 update_scratch = 2*(n - 1);
175 #else
176 matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
177 hgcd_scratch = mpn_hgcd_itch (n - p);
178 update_scratch = p + n - 1;
179 #endif
180 scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
181 if (scratch > talloc)
182 talloc = scratch;
185 TMP_MARK;
186 tp = TMP_ALLOC_LIMBS(talloc);
188 if (usize > n)
190 mpn_tdiv_qr (tp, up, 0, up, usize, vp, n);
192 if (mpn_zero_p (up, n))
194 MPN_COPY (gp, vp, n);
195 ctx.gn = n;
196 goto done;
200 ctx.gp = gp;
202 #if TUNE_GCD_P
203 while (CHOOSE_P (n) > 0)
204 #else
205 while (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
206 #endif
208 struct hgcd_matrix M;
209 mp_size_t p = CHOOSE_P (n);
210 mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
211 mp_size_t nn;
212 mpn_hgcd_matrix_init (&M, n - p, tp);
213 nn = mpn_hgcd (up + p, vp + p, n - p, &M, tp + matrix_scratch);
214 if (nn > 0)
216 ASSERT (M.n <= (n - p - 1)/2);
217 ASSERT (M.n + p <= (p + n - 1) / 2);
218 /* Temporary storage 2 (p + M->n) <= p + n - 1. */
219 n = mpn_hgcd_matrix_adjust (&M, p + nn, up, vp, p, tp + matrix_scratch);
221 else
223 /* Temporary storage n */
224 n = mpn_gcd_subdiv_step (up, vp, n, 0, gcd_hook, &ctx, tp);
225 if (n == 0)
226 goto done;
230 while (n > 2)
232 struct hgcd_matrix1 M;
233 mp_limb_t uh, ul, vh, vl;
234 mp_limb_t mask;
236 mask = up[n-1] | vp[n-1];
237 ASSERT (mask > 0);
239 if (mask & GMP_NUMB_HIGHBIT)
241 uh = up[n-1]; ul = up[n-2];
242 vh = vp[n-1]; vl = vp[n-2];
244 else
246 int shift;
248 count_leading_zeros (shift, mask);
249 uh = MPN_EXTRACT_NUMB (shift, up[n-1], up[n-2]);
250 ul = MPN_EXTRACT_NUMB (shift, up[n-2], up[n-3]);
251 vh = MPN_EXTRACT_NUMB (shift, vp[n-1], vp[n-2]);
252 vl = MPN_EXTRACT_NUMB (shift, vp[n-2], vp[n-3]);
255 /* Try an mpn_hgcd2 step */
256 if (mpn_hgcd2 (uh, ul, vh, vl, &M))
258 n = mpn_matrix22_mul1_inverse_vector (&M, tp, up, vp, n);
259 MP_PTR_SWAP (up, tp);
261 else
263 /* mpn_hgcd2 has failed. Then either one of a or b is very
264 small, or the difference is very small. Perform one
265 subtraction followed by one division. */
267 /* Temporary storage n */
268 n = mpn_gcd_subdiv_step (up, vp, n, 0, &gcd_hook, &ctx, tp);
269 if (n == 0)
270 goto done;
274 ASSERT(up[n-1] | vp[n-1]);
276 if (n == 1)
278 *gp = mpn_gcd_1(up, 1, vp[0]);
279 ctx.gn = 1;
280 goto done;
283 /* Due to the calling convention for mpn_gcd, at most one can be
284 even. */
286 if (! (up[0] & 1))
287 MP_PTR_SWAP (up, vp);
289 ASSERT (up[0] & 1);
291 if (vp[0] == 0)
293 *gp = mpn_gcd_1 (up, 2, vp[1]);
294 ctx.gn = 1;
295 goto done;
297 else if (! (vp[0] & 1))
299 int r;
300 count_trailing_zeros (r, vp[0]);
301 vp[0] = ((vp[1] << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (vp[0] >> r);
302 vp[1] >>= r;
305 ctx.gn = gcd_2(gp, up, vp);
307 done:
308 TMP_FREE;
309 return ctx.gn;