beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / generic / gcdext_lehmer.c
blob547f69a40984e57948e35ca3237d5d43801eda27
1 /* mpn_gcdext -- Extended Greatest Common Divisor.
3 Copyright 1996, 1998, 2000-2005, 2008, 2009, 2012 Free Software Foundation,
4 Inc.
6 This file is part of the GNU MP Library.
8 The GNU MP Library is free software; you can redistribute it and/or modify
9 it under the terms of either:
11 * the GNU Lesser General Public License as published by the Free
12 Software Foundation; either version 3 of the License, or (at your
13 option) any later version.
17 * the GNU General Public License as published by the Free Software
18 Foundation; either version 2 of the License, or (at your option) any
19 later version.
21 or both in parallel, as here.
23 The GNU MP Library is distributed in the hope that it will be useful, but
24 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
26 for more details.
28 You should have received copies of the GNU General Public License and the
29 GNU Lesser General Public License along with the GNU MP Library. If not,
30 see https://www.gnu.org/licenses/. */
32 #include "gmp.h"
33 #include "gmp-impl.h"
34 #include "longlong.h"
36 /* Here, d is the index of the cofactor to update. FIXME: Could use qn
37 = 0 for the common case q = 1. */
38 void
39 mpn_gcdext_hook (void *p, mp_srcptr gp, mp_size_t gn,
40 mp_srcptr qp, mp_size_t qn, int d)
42 struct gcdext_ctx *ctx = (struct gcdext_ctx *) p;
43 mp_size_t un = ctx->un;
45 if (gp)
47 mp_srcptr up;
49 ASSERT (gn > 0);
50 ASSERT (gp[gn-1] > 0);
52 MPN_COPY (ctx->gp, gp, gn);
53 ctx->gn = gn;
55 if (d < 0)
57 int c;
59 /* Must return the smallest cofactor, +u1 or -u0 */
60 MPN_CMP (c, ctx->u0, ctx->u1, un);
61 ASSERT (c != 0 || (un == 1 && ctx->u0[0] == 1 && ctx->u1[0] == 1));
63 d = c < 0;
66 up = d ? ctx->u0 : ctx->u1;
68 MPN_NORMALIZE (up, un);
69 MPN_COPY (ctx->up, up, un);
71 *ctx->usize = d ? -un : un;
73 else
75 mp_limb_t cy;
76 mp_ptr u0 = ctx->u0;
77 mp_ptr u1 = ctx->u1;
79 ASSERT (d >= 0);
81 if (d)
82 MP_PTR_SWAP (u0, u1);
84 qn -= (qp[qn-1] == 0);
86 /* Update u0 += q * u1 */
87 if (qn == 1)
89 mp_limb_t q = qp[0];
91 if (q == 1)
92 /* A common case. */
93 cy = mpn_add_n (u0, u0, u1, un);
94 else
95 cy = mpn_addmul_1 (u0, u1, un, q);
97 else
99 mp_size_t u1n;
100 mp_ptr tp;
102 u1n = un;
103 MPN_NORMALIZE (u1, u1n);
105 if (u1n == 0)
106 return;
108 /* Should always have u1n == un here, and u1 >= u0. The
109 reason is that we alternate adding u0 to u1 and u1 to u0
110 (corresponding to subtractions a - b and b - a), and we
111 can get a large quotient only just after a switch, which
112 means that we'll add (a multiple of) the larger u to the
113 smaller. */
115 tp = ctx->tp;
117 if (qn > u1n)
118 mpn_mul (tp, qp, qn, u1, u1n);
119 else
120 mpn_mul (tp, u1, u1n, qp, qn);
122 u1n += qn;
123 u1n -= tp[u1n-1] == 0;
125 if (u1n >= un)
127 cy = mpn_add (u0, tp, u1n, u0, un);
128 un = u1n;
130 else
131 /* Note: Unlikely case, maybe never happens? */
132 cy = mpn_add (u0, u0, un, tp, u1n);
135 u0[un] = cy;
136 ctx->un = un + (cy > 0);
140 /* Temporary storage: 3*(n+1) for u. If hgcd2 succeeds, we need n for
141 the matrix-vector multiplication adjusting a, b. If hgcd fails, we
142 need at most n for the quotient and n+1 for the u update (reusing
143 the extra u). In all, 4n + 3. */
145 mp_size_t
146 mpn_gcdext_lehmer_n (mp_ptr gp, mp_ptr up, mp_size_t *usize,
147 mp_ptr ap, mp_ptr bp, mp_size_t n,
148 mp_ptr tp)
150 mp_size_t ualloc = n + 1;
152 /* Keeps track of the second row of the reduction matrix
154 * M = (v0, v1 ; u0, u1)
156 * which correspond to the first column of the inverse
158 * M^{-1} = (u1, -v1; -u0, v0)
160 * This implies that
162 * a = u1 A (mod B)
163 * b = -u0 A (mod B)
165 * where A, B denotes the input values.
168 struct gcdext_ctx ctx;
169 mp_size_t un;
170 mp_ptr u0;
171 mp_ptr u1;
172 mp_ptr u2;
174 MPN_ZERO (tp, 3*ualloc);
175 u0 = tp; tp += ualloc;
176 u1 = tp; tp += ualloc;
177 u2 = tp; tp += ualloc;
179 u1[0] = 1; un = 1;
181 ctx.gp = gp;
182 ctx.up = up;
183 ctx.usize = usize;
185 /* FIXME: Handle n == 2 differently, after the loop? */
186 while (n >= 2)
188 struct hgcd_matrix1 M;
189 mp_limb_t ah, al, bh, bl;
190 mp_limb_t mask;
192 mask = ap[n-1] | bp[n-1];
193 ASSERT (mask > 0);
195 if (mask & GMP_NUMB_HIGHBIT)
197 ah = ap[n-1]; al = ap[n-2];
198 bh = bp[n-1]; bl = bp[n-2];
200 else if (n == 2)
202 /* We use the full inputs without truncation, so we can
203 safely shift left. */
204 int shift;
206 count_leading_zeros (shift, mask);
207 ah = MPN_EXTRACT_NUMB (shift, ap[1], ap[0]);
208 al = ap[0] << shift;
209 bh = MPN_EXTRACT_NUMB (shift, bp[1], bp[0]);
210 bl = bp[0] << shift;
212 else
214 int shift;
216 count_leading_zeros (shift, mask);
217 ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
218 al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
219 bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
220 bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
223 /* Try an mpn_nhgcd2 step */
224 if (mpn_hgcd2 (ah, al, bh, bl, &M))
226 n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n);
227 MP_PTR_SWAP (ap, tp);
228 un = mpn_hgcd_mul_matrix1_vector(&M, u2, u0, u1, un);
229 MP_PTR_SWAP (u0, u2);
231 else
233 /* mpn_hgcd2 has failed. Then either one of a or b is very
234 small, or the difference is very small. Perform one
235 subtraction followed by one division. */
236 ctx.u0 = u0;
237 ctx.u1 = u1;
238 ctx.tp = u2;
239 ctx.un = un;
241 /* Temporary storage n for the quotient and ualloc for the
242 new cofactor. */
243 n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
244 if (n == 0)
245 return ctx.gn;
247 un = ctx.un;
250 ASSERT_ALWAYS (ap[0] > 0);
251 ASSERT_ALWAYS (bp[0] > 0);
253 if (ap[0] == bp[0])
255 int c;
257 /* Which cofactor to return now? Candidates are +u1 and -u0,
258 depending on which of a and b was most recently reduced,
259 which we don't keep track of. So compare and get the smallest
260 one. */
262 gp[0] = ap[0];
264 MPN_CMP (c, u0, u1, un);
265 ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));
266 if (c < 0)
268 MPN_NORMALIZE (u0, un);
269 MPN_COPY (up, u0, un);
270 *usize = -un;
272 else
274 MPN_NORMALIZE_NOT_ZERO (u1, un);
275 MPN_COPY (up, u1, un);
276 *usize = un;
278 return 1;
280 else
282 mp_limb_t uh, vh;
283 mp_limb_signed_t u;
284 mp_limb_signed_t v;
285 int negate;
287 gp[0] = mpn_gcdext_1 (&u, &v, ap[0], bp[0]);
289 /* Set up = u u1 - v u0. Keep track of size, un grows by one or
290 two limbs. */
292 if (u == 0)
294 ASSERT (v == 1);
295 MPN_NORMALIZE (u0, un);
296 MPN_COPY (up, u0, un);
297 *usize = -un;
298 return 1;
300 else if (v == 0)
302 ASSERT (u == 1);
303 MPN_NORMALIZE (u1, un);
304 MPN_COPY (up, u1, un);
305 *usize = un;
306 return 1;
308 else if (u > 0)
310 negate = 0;
311 ASSERT (v < 0);
312 v = -v;
314 else
316 negate = 1;
317 ASSERT (v > 0);
318 u = -u;
321 uh = mpn_mul_1 (up, u1, un, u);
322 vh = mpn_addmul_1 (up, u0, un, v);
324 if ( (uh | vh) > 0)
326 uh += vh;
327 up[un++] = uh;
328 if (uh < vh)
329 up[un++] = 1;
332 MPN_NORMALIZE_NOT_ZERO (up, un);
334 *usize = negate ? -un : un;
335 return 1;