beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / generic / gcdext_1.c
blobea46cceb72981d7440e88f33ef149a2681d22be7
1 /* mpn_gcdext -- Extended Greatest Common Divisor.
3 Copyright 1996, 1998, 2000-2005, 2008, 2009 Free Software Foundation, Inc.
5 This file is part of the GNU MP Library.
7 The GNU MP Library is free software; you can redistribute it and/or modify
8 it under the terms of either:
10 * the GNU Lesser General Public License as published by the Free
11 Software Foundation; either version 3 of the License, or (at your
12 option) any later version.
16 * the GNU General Public License as published by the Free Software
17 Foundation; either version 2 of the License, or (at your option) any
18 later version.
20 or both in parallel, as here.
22 The GNU MP Library is distributed in the hope that it will be useful, but
23 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 for more details.
27 You should have received copies of the GNU General Public License and the
28 GNU Lesser General Public License along with the GNU MP Library. If not,
29 see https://www.gnu.org/licenses/. */
31 #include "gmp.h"
32 #include "gmp-impl.h"
33 #include "longlong.h"
35 #ifndef GCDEXT_1_USE_BINARY
36 #define GCDEXT_1_USE_BINARY 0
37 #endif
39 #ifndef GCDEXT_1_BINARY_METHOD
40 #define GCDEXT_1_BINARY_METHOD 2
41 #endif
43 #ifndef USE_ZEROTAB
44 #define USE_ZEROTAB 1
45 #endif
47 #if GCDEXT_1_USE_BINARY
49 #if USE_ZEROTAB
50 static unsigned char zerotab[0x40] = {
51 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
52 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
53 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
54 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
56 #endif
58 mp_limb_t
59 mpn_gcdext_1 (mp_limb_signed_t *sp, mp_limb_signed_t *tp,
60 mp_limb_t u, mp_limb_t v)
62 /* Maintain
64 U = t1 u + t0 v
65 V = s1 u + s0 v
67 where U, V are the inputs (without any shared power of two),
68 and the matrix has determinant ± 2^{shift}.
70 mp_limb_t s0 = 1;
71 mp_limb_t t0 = 0;
72 mp_limb_t s1 = 0;
73 mp_limb_t t1 = 1;
74 mp_limb_t ug;
75 mp_limb_t vg;
76 mp_limb_t ugh;
77 mp_limb_t vgh;
78 unsigned zero_bits;
79 unsigned shift;
80 unsigned i;
81 #if GCDEXT_1_BINARY_METHOD == 2
82 mp_limb_t det_sign;
83 #endif
85 ASSERT (u > 0);
86 ASSERT (v > 0);
88 count_trailing_zeros (zero_bits, u | v);
89 u >>= zero_bits;
90 v >>= zero_bits;
92 if ((u & 1) == 0)
94 count_trailing_zeros (shift, u);
95 u >>= shift;
96 t1 <<= shift;
98 else if ((v & 1) == 0)
100 count_trailing_zeros (shift, v);
101 v >>= shift;
102 s0 <<= shift;
104 else
105 shift = 0;
107 #if GCDEXT_1_BINARY_METHOD == 1
108 while (u != v)
110 unsigned count;
111 if (u > v)
113 u -= v;
114 #if USE_ZEROTAB
115 count = zerotab [u & 0x3f];
116 u >>= count;
117 if (UNLIKELY (count == 6))
119 unsigned c;
122 c = zerotab[u & 0x3f];
123 u >>= c;
124 count += c;
126 while (c == 6);
128 #else
129 count_trailing_zeros (count, u);
130 u >>= count;
131 #endif
132 t0 += t1; t1 <<= count;
133 s0 += s1; s1 <<= count;
135 else
137 v -= u;
138 #if USE_ZEROTAB
139 count = zerotab [v & 0x3f];
140 v >>= count;
141 if (UNLIKELY (count == 6))
143 unsigned c;
146 c = zerotab[v & 0x3f];
147 v >>= c;
148 count += c;
150 while (c == 6);
152 #else
153 count_trailing_zeros (count, v);
154 v >>= count;
155 #endif
156 t1 += t0; t0 <<= count;
157 s1 += s0; s0 <<= count;
159 shift += count;
161 #else
162 # if GCDEXT_1_BINARY_METHOD == 2
163 u >>= 1;
164 v >>= 1;
166 det_sign = 0;
168 while (u != v)
170 unsigned count;
171 mp_limb_t d = u - v;
172 mp_limb_t vgtu = LIMB_HIGHBIT_TO_MASK (d);
173 mp_limb_t sx;
174 mp_limb_t tx;
176 /* When v <= u (vgtu == 0), the updates are:
178 (u; v) <-- ( (u - v) >> count; v) (det = +(1<<count) for corr. M factor)
179 (t1, t0) <-- (t1 << count, t0 + t1)
181 and when v > 0, the updates are
183 (u; v) <-- ( (v - u) >> count; u) (det = -(1<<count))
184 (t1, t0) <-- (t0 << count, t0 + t1)
186 and similarly for s1, s0
189 /* v <-- min (u, v) */
190 v += (vgtu & d);
192 /* u <-- |u - v| */
193 u = (d ^ vgtu) - vgtu;
195 /* Number of trailing zeros is the same no matter if we look at
196 * d or u, but using d gives more parallelism. */
197 #if USE_ZEROTAB
198 count = zerotab[d & 0x3f];
199 if (UNLIKELY (count == 6))
201 unsigned c = 6;
204 d >>= c;
205 c = zerotab[d & 0x3f];
206 count += c;
208 while (c == 6);
210 #else
211 count_trailing_zeros (count, d);
212 #endif
213 det_sign ^= vgtu;
215 tx = vgtu & (t0 - t1);
216 sx = vgtu & (s0 - s1);
217 t0 += t1;
218 s0 += s1;
219 t1 += tx;
220 s1 += sx;
222 count++;
223 u >>= count;
224 t1 <<= count;
225 s1 <<= count;
226 shift += count;
228 u = (u << 1) + 1;
229 # else /* GCDEXT_1_BINARY_METHOD == 2 */
230 # error Unknown GCDEXT_1_BINARY_METHOD
231 # endif
232 #endif
234 /* Now u = v = g = gcd (u,v). Compute U/g and V/g */
235 ug = t0 + t1;
236 vg = s0 + s1;
238 ugh = ug/2 + (ug & 1);
239 vgh = vg/2 + (vg & 1);
241 /* Now ±2^{shift} g = s0 U - t0 V. Get rid of the power of two, using
242 s0 U - t0 V = (s0 + V/g) U - (t0 + U/g) V. */
243 for (i = 0; i < shift; i++)
245 mp_limb_t mask = - ( (s0 | t0) & 1);
247 s0 /= 2;
248 t0 /= 2;
249 s0 += mask & vgh;
250 t0 += mask & ugh;
252 /* FIXME: Try simplifying this condition. */
253 if ( (s0 > 1 && 2*s0 >= vg) || (t0 > 1 && 2*t0 >= ug) )
255 s0 -= vg;
256 t0 -= ug;
258 #if GCDEXT_1_BINARY_METHOD == 2
259 /* Conditional negation. */
260 s0 = (s0 ^ det_sign) - det_sign;
261 t0 = (t0 ^ det_sign) - det_sign;
262 #endif
263 *sp = s0;
264 *tp = -t0;
266 return u << zero_bits;
269 #else /* !GCDEXT_1_USE_BINARY */
272 /* FIXME: Takes two single-word limbs. It could be extended to a
273 * function that accepts a bignum for the first input, and only
274 * returns the first co-factor. */
276 mp_limb_t
277 mpn_gcdext_1 (mp_limb_signed_t *up, mp_limb_signed_t *vp,
278 mp_limb_t a, mp_limb_t b)
280 /* Maintain
282 a = u0 A + v0 B
283 b = u1 A + v1 B
285 where A, B are the original inputs.
287 mp_limb_signed_t u0 = 1;
288 mp_limb_signed_t v0 = 0;
289 mp_limb_signed_t u1 = 0;
290 mp_limb_signed_t v1 = 1;
292 ASSERT (a > 0);
293 ASSERT (b > 0);
295 if (a < b)
296 goto divide_by_b;
298 for (;;)
300 mp_limb_t q;
302 q = a / b;
303 a -= q * b;
305 if (a == 0)
307 *up = u1;
308 *vp = v1;
309 return b;
311 u0 -= q * u1;
312 v0 -= q * v1;
314 divide_by_b:
315 q = b / a;
316 b -= q * a;
318 if (b == 0)
320 *up = u0;
321 *vp = v0;
322 return a;
324 u1 -= q * u0;
325 v1 -= q * v0;
328 #endif /* !GCDEXT_1_USE_BINARY */