source/libs/gmp/gmp-src/mpn/generic/gcdext_lehmer.c

   1 /* mpn_gcdext -- Extended Greatest Common Divisor.
   2
   3 Copyright 1996, 1998, 2000-2005, 2008, 2009, 2012 Free Software Foundation,
   4 Inc.
   5
   6 This file is part of the GNU MP Library.
   7
   8 The GNU MP Library is free software; you can redistribute it and/or modify
   9 it under the terms of either:
  10
  11   * the GNU Lesser General Public License as published by the Free
  12     Software Foundation; either version 3 of the License, or (at your
  13     option) any later version.
  14
  15 or
  16
  17   * the GNU General Public License as published by the Free Software
  18     Foundation; either version 2 of the License, or (at your option) any
  19     later version.
  20
  21 or both in parallel, as here.
  22
  23 The GNU MP Library is distributed in the hope that it will be useful, but
  24 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  25 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  26 for more details.
  27
  28 You should have received copies of the GNU General Public License and the
  29 GNU Lesser General Public License along with the GNU MP Library.  If not,
  30 see https://www.gnu.org/licenses/.  */
  31
  32 #include "gmp.h"
  33 #include "gmp-impl.h"
  34 #include "longlong.h"
  35
  36 /* Here, d is the index of the cofactor to update. FIXME: Could use qn
  37    = 0 for the common case q = 1. */
  38 void
  39 mpn_gcdext_hook (void *p, mp_srcptr gp, mp_size_t gn,
  40                  mp_srcptr qp, mp_size_t qn, int d)
  41 {
  42   struct gcdext_ctx *ctx = (struct gcdext_ctx *) p;
  43   mp_size_t un = ctx->un;
  44
  45   if (gp)
  46     {
  47       mp_srcptr up;
  48
  49       ASSERT (gn > 0);
  50       ASSERT (gp[gn-1] > 0);
  51
  52       MPN_COPY (ctx->gp, gp, gn);
  53       ctx->gn = gn;
  54
  55       if (d < 0)
  56         {
  57           int c;
  58
  59           /* Must return the smallest cofactor, +u1 or -u0 */
  60           MPN_CMP (c, ctx->u0, ctx->u1, un);
  61           ASSERT (c != 0 || (un == 1 && ctx->u0[0] == 1 && ctx->u1[0] == 1));
  62
  63           d = c < 0;
  64         }
  65
  66       up = d ? ctx->u0 : ctx->u1;
  67
  68       MPN_NORMALIZE (up, un);
  69       MPN_COPY (ctx->up, up, un);
  70
  71       *ctx->usize = d ? -un : un;
  72     }
  73   else
  74     {
  75       mp_limb_t cy;
  76       mp_ptr u0 = ctx->u0;
  77       mp_ptr u1 = ctx->u1;
  78
  79       ASSERT (d >= 0);
  80
  81       if (d)
  82         MP_PTR_SWAP (u0, u1);
  83
  84       qn -= (qp[qn-1] == 0);
  85
  86       /* Update u0 += q  * u1 */
  87       if (qn == 1)
  88         {
  89           mp_limb_t q = qp[0];
  90
  91           if (q == 1)
  92             /* A common case. */
  93             cy = mpn_add_n (u0, u0, u1, un);
  94           else
  95             cy = mpn_addmul_1 (u0, u1, un, q);
  96         }
  97       else
  98         {
  99           mp_size_t u1n;
 100           mp_ptr tp;
 101
 102           u1n = un;
 103           MPN_NORMALIZE (u1, u1n);
 104
 105           if (u1n == 0)
 106             return;
 107
 108           /* Should always have u1n == un here, and u1 >= u0. The
 109              reason is that we alternate adding u0 to u1 and u1 to u0
 110              (corresponding to subtractions a - b and b - a), and we
 111              can get a large quotient only just after a switch, which
 112              means that we'll add (a multiple of) the larger u to the
 113              smaller. */
 114
 115           tp = ctx->tp;
 116
 117           if (qn > u1n)
 118             mpn_mul (tp, qp, qn, u1, u1n);
 119           else
 120             mpn_mul (tp, u1, u1n, qp, qn);
 121
 122           u1n += qn;
 123           u1n -= tp[u1n-1] == 0;
 124
 125           if (u1n >= un)
 126             {
 127               cy = mpn_add (u0, tp, u1n, u0, un);
 128               un = u1n;
 129             }
 130           else
 131             /* Note: Unlikely case, maybe never happens? */
 132             cy = mpn_add (u0, u0, un, tp, u1n);
 133
 134         }
 135       u0[un] = cy;
 136       ctx->un = un + (cy > 0);
 137     }
 138 }
 139
 140 /* Temporary storage: 3*(n+1) for u. If hgcd2 succeeds, we need n for
 141    the matrix-vector multiplication adjusting a, b. If hgcd fails, we
 142    need at most n for the quotient and n+1 for the u update (reusing
 143    the extra u). In all, 4n + 3. */
 144
 145 mp_size_t
 146 mpn_gcdext_lehmer_n (mp_ptr gp, mp_ptr up, mp_size_t *usize,
 147                      mp_ptr ap, mp_ptr bp, mp_size_t n,
 148                      mp_ptr tp)
 149 {
 150   mp_size_t ualloc = n + 1;
 151
 152   /* Keeps track of the second row of the reduction matrix
 153    *
 154    *   M = (v0, v1 ; u0, u1)
 155    *
 156    * which correspond to the first column of the inverse
 157    *
 158    *   M^{-1} = (u1, -v1; -u0, v0)
 159    *
 160    * This implies that
 161    *
 162    *   a =  u1 A (mod B)
 163    *   b = -u0 A (mod B)
 164    *
 165    * where A, B denotes the input values.
 166    */
 167
 168   struct gcdext_ctx ctx;
 169   mp_size_t un;
 170   mp_ptr u0;
 171   mp_ptr u1;
 172   mp_ptr u2;
 173
 174   MPN_ZERO (tp, 3*ualloc);
 175   u0 = tp; tp += ualloc;
 176   u1 = tp; tp += ualloc;
 177   u2 = tp; tp += ualloc;
 178
 179   u1[0] = 1; un = 1;
 180
 181   ctx.gp = gp;
 182   ctx.up = up;
 183   ctx.usize = usize;
 184
 185   /* FIXME: Handle n == 2 differently, after the loop? */
 186   while (n >= 2)
 187     {
 188       struct hgcd_matrix1 M;
 189       mp_limb_t ah, al, bh, bl;
 190       mp_limb_t mask;
 191
 192       mask = ap[n-1] | bp[n-1];
 193       ASSERT (mask > 0);
 194
 195       if (mask & GMP_NUMB_HIGHBIT)
 196         {
 197           ah = ap[n-1]; al = ap[n-2];
 198           bh = bp[n-1]; bl = bp[n-2];
 199         }
 200       else if (n == 2)
 201         {
 202           /* We use the full inputs without truncation, so we can
 203              safely shift left. */
 204           int shift;
 205
 206           count_leading_zeros (shift, mask);
 207           ah = MPN_EXTRACT_NUMB (shift, ap[1], ap[0]);
 208           al = ap[0] << shift;
 209           bh = MPN_EXTRACT_NUMB (shift, bp[1], bp[0]);
 210           bl = bp[0] << shift;
 211         }
 212       else
 213         {
 214           int shift;
 215
 216           count_leading_zeros (shift, mask);
 217           ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
 218           al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
 219           bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
 220           bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
 221         }
 222
 223       /* Try an mpn_nhgcd2 step */
 224       if (mpn_hgcd2 (ah, al, bh, bl, &M))
 225         {
 226           n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n);
 227           MP_PTR_SWAP (ap, tp);
 228           un = mpn_hgcd_mul_matrix1_vector(&M, u2, u0, u1, un);
 229           MP_PTR_SWAP (u0, u2);
 230         }
 231       else
 232         {
 233           /* mpn_hgcd2 has failed. Then either one of a or b is very
 234              small, or the difference is very small. Perform one
 235              subtraction followed by one division. */
 236           ctx.u0 = u0;
 237           ctx.u1 = u1;
 238           ctx.tp = u2;
 239           ctx.un = un;
 240
 241           /* Temporary storage n for the quotient and ualloc for the
 242              new cofactor. */
 243           n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
 244           if (n == 0)
 245             return ctx.gn;
 246
 247           un = ctx.un;
 248         }
 249     }
 250   ASSERT_ALWAYS (ap[0] > 0);
 251   ASSERT_ALWAYS (bp[0] > 0);
 252
 253   if (ap[0] == bp[0])
 254     {
 255       int c;
 256
 257       /* Which cofactor to return now? Candidates are +u1 and -u0,
 258          depending on which of a and b was most recently reduced,
 259          which we don't keep track of. So compare and get the smallest
 260          one. */
 261
 262       gp[0] = ap[0];
 263
 264       MPN_CMP (c, u0, u1, un);
 265       ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));
 266       if (c < 0)
 267         {
 268           MPN_NORMALIZE (u0, un);
 269           MPN_COPY (up, u0, un);
 270           *usize = -un;
 271         }
 272       else
 273         {
 274           MPN_NORMALIZE_NOT_ZERO (u1, un);
 275           MPN_COPY (up, u1, un);
 276           *usize = un;
 277         }
 278       return 1;
 279     }
 280   else
 281     {
 282       mp_limb_t uh, vh;
 283       mp_limb_signed_t u;
 284       mp_limb_signed_t v;
 285       int negate;
 286
 287       gp[0] = mpn_gcdext_1 (&u, &v, ap[0], bp[0]);
 288
 289       /* Set up = u u1 - v u0. Keep track of size, un grows by one or
 290          two limbs. */
 291
 292       if (u == 0)
 293         {
 294           ASSERT (v == 1);
 295           MPN_NORMALIZE (u0, un);
 296           MPN_COPY (up, u0, un);
 297           *usize = -un;
 298           return 1;
 299         }
 300       else if (v == 0)
 301         {
 302           ASSERT (u == 1);
 303           MPN_NORMALIZE (u1, un);
 304           MPN_COPY (up, u1, un);
 305           *usize = un;
 306           return 1;
 307         }
 308       else if (u > 0)
 309         {
 310           negate = 0;
 311           ASSERT (v < 0);
 312           v = -v;
 313         }
 314       else
 315         {
 316           negate = 1;
 317           ASSERT (v > 0);
 318           u = -u;
 319         }
 320
 321       uh = mpn_mul_1 (up, u1, un, u);
 322       vh = mpn_addmul_1 (up, u0, un, v);
 323
 324       if ( (uh | vh) > 0)
 325         {
 326           uh += vh;
 327           up[un++] = uh;
 328           if (uh < vh)
 329             up[un++] = 1;
 330         }
 331
 332       MPN_NORMALIZE_NOT_ZERO (up, un);
 333
 334       *usize = negate ? -un : un;
 335       return 1;
 336     }
 337 }