1 /* mpn/gcd.c: mpn_gcd for gcd of two odd integers.
3 Copyright 1991, 1993-1998, 2000-2005, 2008, 2010, 2012 Free Software
6 This file is part of the GNU MP Library.
8 The GNU MP Library is free software; you can redistribute it and/or modify
9 it under the terms of either:
11 * the GNU Lesser General Public License as published by the Free
12 Software Foundation; either version 3 of the License, or (at your
13 option) any later version.
17 * the GNU General Public License as published by the Free Software
18 Foundation; either version 2 of the License, or (at your option) any
21 or both in parallel, as here.
23 The GNU MP Library is distributed in the hope that it will be useful, but
24 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
28 You should have received copies of the GNU General Public License and the
29 GNU Lesser General Public License along with the GNU MP Library. If not,
30 see https://www.gnu.org/licenses/. */
36 /* Uses the HGCD operation described in
38 N. Möller, On Schönhage's algorithm and subquadratic integer gcd
39 computation, Math. Comp. 77 (2008), 589-607.
41 to reduce inputs until they are of size below GCD_DC_THRESHOLD, and
42 then uses Lehmer's algorithm.
45 /* Some reasonable choices are n / 2 (same as in hgcd), and p = (n +
46 * 2)/3, which gives a balanced multiplication in
47 * mpn_hgcd_matrix_adjust. However, p = 2 n/3 gives slightly better
48 * performance. The matrix-vector multiplication is then
49 * 4:1-unbalanced, with matrix elements of size n/6, and vector
50 * elements of size p = 2n/3. */
52 /* From analysis of the theoretical running time, it appears that when
53 * multiplication takes time O(n^alpha), p should be chosen so that
54 * the ratio of the time for the mpn_hgcd call, and the time for the
55 * multiplication in mpn_hgcd_matrix_adjust, is roughly 1/(alpha -
58 #define P_TABLE_SIZE 10000
59 mp_size_t p_table
[P_TABLE_SIZE
];
60 #define CHOOSE_P(n) ( (n) < P_TABLE_SIZE ? p_table[n] : 2*(n)/3)
62 #define CHOOSE_P(n) (2*(n) / 3)
72 gcd_hook (void *p
, mp_srcptr gp
, mp_size_t gn
,
73 mp_srcptr qp
, mp_size_t qn
, int d
)
75 struct gcd_ctx
*ctx
= (struct gcd_ctx
*) p
;
76 MPN_COPY (ctx
->gp
, gp
, gn
);
81 /* Nail supports should be easy, replacing the sub_ddmmss with nails
83 #error Nails not supported.
86 /* Use binary algorithm to compute G <-- GCD (U, V) for usize, vsize == 2.
87 Both U and V must be odd. */
88 static inline mp_size_t
89 gcd_2 (mp_ptr gp
, mp_srcptr up
, mp_srcptr vp
)
91 mp_limb_t u0
, u1
, v0
, v1
;
102 /* Check for u0 != v0 needed to ensure that argument to
103 * count_trailing_zeros is non-zero. */
104 while (u1
!= v1
&& u0
!= v0
)
109 sub_ddmmss (u1
, u0
, u1
, u0
, v1
, v0
);
110 count_trailing_zeros (r
, u0
);
111 u0
= ((u1
<< (GMP_NUMB_BITS
- r
)) & GMP_NUMB_MASK
) | (u0
>> r
);
116 sub_ddmmss (v1
, v0
, v1
, v0
, u1
, u0
);
117 count_trailing_zeros (r
, v0
);
118 v0
= ((v1
<< (GMP_NUMB_BITS
- r
)) & GMP_NUMB_MASK
) | (v0
>> r
);
123 gp
[0] = u0
, gp
[1] = u1
, gn
= 1 + (u1
!= 0);
125 /* If U == V == GCD, done. Otherwise, compute GCD (V, |U - V|). */
126 if (u1
== v1
&& u0
== v0
)
129 v0
= (u0
== v0
) ? ((u1
> v1
) ? u1
-v1
: v1
-u1
) : ((u0
> v0
) ? u0
-v0
: v0
-u0
);
130 gp
[0] = mpn_gcd_1 (gp
, gn
, v0
);
136 mpn_gcd (mp_ptr gp
, mp_ptr up
, mp_size_t usize
, mp_ptr vp
, mp_size_t n
)
140 mp_size_t matrix_scratch
;
148 ASSERT (vp
[n
-1] > 0);
150 /* FIXME: Check for small sizes first, before setting up temporary
152 talloc
= MPN_GCD_SUBDIV_STEP_ITCH(n
);
154 /* For initial division */
155 scratch
= usize
- n
+ 1;
156 if (scratch
> talloc
)
160 if (CHOOSE_P (n
) > 0)
162 if (ABOVE_THRESHOLD (n
, GCD_DC_THRESHOLD
))
165 mp_size_t hgcd_scratch
;
166 mp_size_t update_scratch
;
167 mp_size_t p
= CHOOSE_P (n
);
170 /* Worst case, since we don't guarantee that n - CHOOSE_P(n)
172 matrix_scratch
= MPN_HGCD_MATRIX_INIT_ITCH (n
);
173 hgcd_scratch
= mpn_hgcd_itch (n
);
174 update_scratch
= 2*(n
- 1);
176 matrix_scratch
= MPN_HGCD_MATRIX_INIT_ITCH (n
- p
);
177 hgcd_scratch
= mpn_hgcd_itch (n
- p
);
178 update_scratch
= p
+ n
- 1;
180 scratch
= matrix_scratch
+ MAX(hgcd_scratch
, update_scratch
);
181 if (scratch
> talloc
)
186 tp
= TMP_ALLOC_LIMBS(talloc
);
190 mpn_tdiv_qr (tp
, up
, 0, up
, usize
, vp
, n
);
192 if (mpn_zero_p (up
, n
))
194 MPN_COPY (gp
, vp
, n
);
203 while (CHOOSE_P (n
) > 0)
205 while (ABOVE_THRESHOLD (n
, GCD_DC_THRESHOLD
))
208 struct hgcd_matrix M
;
209 mp_size_t p
= CHOOSE_P (n
);
210 mp_size_t matrix_scratch
= MPN_HGCD_MATRIX_INIT_ITCH (n
- p
);
212 mpn_hgcd_matrix_init (&M
, n
- p
, tp
);
213 nn
= mpn_hgcd (up
+ p
, vp
+ p
, n
- p
, &M
, tp
+ matrix_scratch
);
216 ASSERT (M
.n
<= (n
- p
- 1)/2);
217 ASSERT (M
.n
+ p
<= (p
+ n
- 1) / 2);
218 /* Temporary storage 2 (p + M->n) <= p + n - 1. */
219 n
= mpn_hgcd_matrix_adjust (&M
, p
+ nn
, up
, vp
, p
, tp
+ matrix_scratch
);
223 /* Temporary storage n */
224 n
= mpn_gcd_subdiv_step (up
, vp
, n
, 0, gcd_hook
, &ctx
, tp
);
232 struct hgcd_matrix1 M
;
233 mp_limb_t uh
, ul
, vh
, vl
;
236 mask
= up
[n
-1] | vp
[n
-1];
239 if (mask
& GMP_NUMB_HIGHBIT
)
241 uh
= up
[n
-1]; ul
= up
[n
-2];
242 vh
= vp
[n
-1]; vl
= vp
[n
-2];
248 count_leading_zeros (shift
, mask
);
249 uh
= MPN_EXTRACT_NUMB (shift
, up
[n
-1], up
[n
-2]);
250 ul
= MPN_EXTRACT_NUMB (shift
, up
[n
-2], up
[n
-3]);
251 vh
= MPN_EXTRACT_NUMB (shift
, vp
[n
-1], vp
[n
-2]);
252 vl
= MPN_EXTRACT_NUMB (shift
, vp
[n
-2], vp
[n
-3]);
255 /* Try an mpn_hgcd2 step */
256 if (mpn_hgcd2 (uh
, ul
, vh
, vl
, &M
))
258 n
= mpn_matrix22_mul1_inverse_vector (&M
, tp
, up
, vp
, n
);
259 MP_PTR_SWAP (up
, tp
);
263 /* mpn_hgcd2 has failed. Then either one of a or b is very
264 small, or the difference is very small. Perform one
265 subtraction followed by one division. */
267 /* Temporary storage n */
268 n
= mpn_gcd_subdiv_step (up
, vp
, n
, 0, &gcd_hook
, &ctx
, tp
);
274 ASSERT(up
[n
-1] | vp
[n
-1]);
278 *gp
= mpn_gcd_1(up
, 1, vp
[0]);
283 /* Due to the calling convention for mpn_gcd, at most one can be
287 MP_PTR_SWAP (up
, vp
);
293 *gp
= mpn_gcd_1 (up
, 2, vp
[1]);
297 else if (! (vp
[0] & 1))
300 count_trailing_zeros (r
, vp
[0]);
301 vp
[0] = ((vp
[1] << (GMP_NUMB_BITS
- r
)) & GMP_NUMB_MASK
) | (vp
[0] >> r
);
305 ctx
.gn
= gcd_2(gp
, up
, vp
);