source/libs/gmp/gmp-src/mpn/generic/gcdext.c

   1 /* mpn_gcdext -- Extended Greatest Common Divisor.
   2
   3 Copyright 1996, 1998, 2000-2005, 2008, 2009, 2012 Free Software Foundation,
   4 Inc.
   5
   6 This file is part of the GNU MP Library.
   7
   8 The GNU MP Library is free software; you can redistribute it and/or modify
   9 it under the terms of either:
  10
  11   * the GNU Lesser General Public License as published by the Free
  12     Software Foundation; either version 3 of the License, or (at your
  13     option) any later version.
  14
  15 or
  16
  17   * the GNU General Public License as published by the Free Software
  18     Foundation; either version 2 of the License, or (at your option) any
  19     later version.
  20
  21 or both in parallel, as here.
  22
  23 The GNU MP Library is distributed in the hope that it will be useful, but
  24 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  25 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  26 for more details.
  27
  28 You should have received copies of the GNU General Public License and the
  29 GNU Lesser General Public License along with the GNU MP Library.  If not,
  30 see https://www.gnu.org/licenses/.  */
  31
  32 #include "gmp.h"
  33 #include "gmp-impl.h"
  34 #include "longlong.h"
  35
  36 /* Computes (r;b) = (a; b) M. Result is of size n + M->n +/- 1, and
  37    the size is returned (if inputs are non-normalized, result may be
  38    non-normalized too). Temporary space needed is M->n + n.
  39  */
  40 static size_t
  41 hgcd_mul_matrix_vector (struct hgcd_matrix *M,
  42                         mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n, mp_ptr tp)
  43 {
  44   mp_limb_t ah, bh;
  45
  46   /* Compute (r,b) <-- (u00 a + u10 b, u01 a + u11 b) as
  47
  48      t  = u00 * a
  49      r  = u10 * b
  50      r += t;
  51
  52      t  = u11 * b
  53      b  = u01 * a
  54      b += t;
  55   */
  56
  57   if (M->n >= n)
  58     {
  59       mpn_mul (tp, M->p[0][0], M->n, ap, n);
  60       mpn_mul (rp, M->p[1][0], M->n, bp, n);
  61     }
  62   else
  63     {
  64       mpn_mul (tp, ap, n, M->p[0][0], M->n);
  65       mpn_mul (rp, bp, n, M->p[1][0], M->n);
  66     }
  67
  68   ah = mpn_add_n (rp, rp, tp, n + M->n);
  69
  70   if (M->n >= n)
  71     {
  72       mpn_mul (tp, M->p[1][1], M->n, bp, n);
  73       mpn_mul (bp, M->p[0][1], M->n, ap, n);
  74     }
  75   else
  76     {
  77       mpn_mul (tp, bp, n, M->p[1][1], M->n);
  78       mpn_mul (bp, ap, n, M->p[0][1], M->n);
  79     }
  80   bh = mpn_add_n (bp, bp, tp, n + M->n);
  81
  82   n += M->n;
  83   if ( (ah | bh) > 0)
  84     {
  85       rp[n] = ah;
  86       bp[n] = bh;
  87       n++;
  88     }
  89   else
  90     {
  91       /* Normalize */
  92       while ( (rp[n-1] | bp[n-1]) == 0)
  93         n--;
  94     }
  95
  96   return n;
  97 }
  98
  99 #define COMPUTE_V_ITCH(n) (2*(n))
 100
 101 /* Computes |v| = |(g - u a)| / b, where u may be positive or
 102    negative, and v is of the opposite sign. max(a, b) is of size n, u and
 103    v at most size n, and v must have space for n+1 limbs. */
 104 static mp_size_t
 105 compute_v (mp_ptr vp,
 106            mp_srcptr ap, mp_srcptr bp, mp_size_t n,
 107            mp_srcptr gp, mp_size_t gn,
 108            mp_srcptr up, mp_size_t usize,
 109            mp_ptr tp)
 110 {
 111   mp_size_t size;
 112   mp_size_t an;
 113   mp_size_t bn;
 114   mp_size_t vn;
 115
 116   ASSERT (n > 0);
 117   ASSERT (gn > 0);
 118   ASSERT (usize != 0);
 119
 120   size = ABS (usize);
 121   ASSERT (size <= n);
 122   ASSERT (up[size-1] > 0);
 123
 124   an = n;
 125   MPN_NORMALIZE (ap, an);
 126   ASSERT (gn <= an);
 127
 128   if (an >= size)
 129     mpn_mul (tp, ap, an, up, size);
 130   else
 131     mpn_mul (tp, up, size, ap, an);
 132
 133   size += an;
 134
 135   if (usize > 0)
 136     {
 137       /* |v| = -v = (u a - g) / b */
 138
 139       ASSERT_NOCARRY (mpn_sub (tp, tp, size, gp, gn));
 140       MPN_NORMALIZE (tp, size);
 141       if (size == 0)
 142         return 0;
 143     }
 144   else
 145     { /* |v| = v = (g - u a) / b = (g + |u| a) / b. Since g <= a,
 146          (g + |u| a) always fits in (|usize| + an) limbs. */
 147
 148       ASSERT_NOCARRY (mpn_add (tp, tp, size, gp, gn));
 149       size -= (tp[size - 1] == 0);
 150     }
 151
 152   /* Now divide t / b. There must be no remainder */
 153   bn = n;
 154   MPN_NORMALIZE (bp, bn);
 155   ASSERT (size >= bn);
 156
 157   vn = size + 1 - bn;
 158   ASSERT (vn <= n + 1);
 159
 160   mpn_divexact (vp, tp, size, bp, bn);
 161   vn -= (vp[vn-1] == 0);
 162
 163   return vn;
 164 }
 165
 166 /* Temporary storage:
 167
 168    Initial division: Quotient of at most an - n + 1 <= an limbs.
 169
 170    Storage for u0 and u1: 2(n+1).
 171
 172    Storage for hgcd matrix M, with input ceil(n/2): 5 * ceil(n/4)
 173
 174    Storage for hgcd, input (n + 1)/2: 9 n/4 plus some.
 175
 176    When hgcd succeeds: 1 + floor(3n/2) for adjusting a and b, and 2(n+1) for the cofactors.
 177
 178    When hgcd fails: 2n + 1 for mpn_gcdext_subdiv_step, which is less.
 179
 180    For the lehmer call after the loop, Let T denote
 181    GCDEXT_DC_THRESHOLD. For the gcdext_lehmer call, we need T each for
 182    u, a and b, and 4T+3 scratch space. Next, for compute_v, we need T
 183    for u, T+1 for v and 2T scratch space. In all, 7T + 3 is
 184    sufficient for both operations.
 185
 186 */
 187
 188 /* Optimal choice of p seems difficult. In each iteration the division
 189  * of work between hgcd and the updates of u0 and u1 depends on the
 190  * current size of the u. It may be desirable to use a different
 191  * choice of p in each iteration. Also the input size seems to matter;
 192  * choosing p = n / 3 in the first iteration seems to improve
 193  * performance slightly for input size just above the threshold, but
 194  * degrade performance for larger inputs. */
 195 #define CHOOSE_P_1(n) ((n) / 2)
 196 #define CHOOSE_P_2(n) ((n) / 3)
 197
 198 mp_size_t
 199 mpn_gcdext (mp_ptr gp, mp_ptr up, mp_size_t *usizep,
 200             mp_ptr ap, mp_size_t an, mp_ptr bp, mp_size_t n)
 201 {
 202   mp_size_t talloc;
 203   mp_size_t scratch;
 204   mp_size_t matrix_scratch;
 205   mp_size_t ualloc = n + 1;
 206
 207   struct gcdext_ctx ctx;
 208   mp_size_t un;
 209   mp_ptr u0;
 210   mp_ptr u1;
 211
 212   mp_ptr tp;
 213
 214   TMP_DECL;
 215
 216   ASSERT (an >= n);
 217   ASSERT (n > 0);
 218   ASSERT (bp[n-1] > 0);
 219
 220   TMP_MARK;
 221
 222   /* FIXME: Check for small sizes first, before setting up temporary
 223      storage etc. */
 224   talloc = MPN_GCDEXT_LEHMER_N_ITCH(n);
 225
 226   /* For initial division */
 227   scratch = an - n + 1;
 228   if (scratch > talloc)
 229     talloc = scratch;
 230
 231   if (ABOVE_THRESHOLD (n, GCDEXT_DC_THRESHOLD))
 232     {
 233       /* For hgcd loop. */
 234       mp_size_t hgcd_scratch;
 235       mp_size_t update_scratch;
 236       mp_size_t p1 = CHOOSE_P_1 (n);
 237       mp_size_t p2 = CHOOSE_P_2 (n);
 238       mp_size_t min_p = MIN(p1, p2);
 239       mp_size_t max_p = MAX(p1, p2);
 240       matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - min_p);
 241       hgcd_scratch = mpn_hgcd_itch (n - min_p);
 242       update_scratch = max_p + n - 1;
 243
 244       scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
 245       if (scratch > talloc)
 246         talloc = scratch;
 247
 248       /* Final mpn_gcdext_lehmer_n call. Need space for u and for
 249          copies of a and b. */
 250       scratch = MPN_GCDEXT_LEHMER_N_ITCH (GCDEXT_DC_THRESHOLD)
 251         + 3*GCDEXT_DC_THRESHOLD;
 252
 253       if (scratch > talloc)
 254         talloc = scratch;
 255
 256       /* Cofactors u0 and u1 */
 257       talloc += 2*(n+1);
 258     }
 259
 260   tp = TMP_ALLOC_LIMBS(talloc);
 261
 262   if (an > n)
 263     {
 264       mpn_tdiv_qr (tp, ap, 0, ap, an, bp, n);
 265
 266       if (mpn_zero_p (ap, n))
 267         {
 268           MPN_COPY (gp, bp, n);
 269           *usizep = 0;
 270           TMP_FREE;
 271           return n;
 272         }
 273     }
 274
 275   if (BELOW_THRESHOLD (n, GCDEXT_DC_THRESHOLD))
 276     {
 277       mp_size_t gn = mpn_gcdext_lehmer_n(gp, up, usizep, ap, bp, n, tp);
 278
 279       TMP_FREE;
 280       return gn;
 281     }
 282
 283   MPN_ZERO (tp, 2*ualloc);
 284   u0 = tp; tp += ualloc;
 285   u1 = tp; tp += ualloc;
 286
 287   ctx.gp = gp;
 288   ctx.up = up;
 289   ctx.usize = usizep;
 290
 291   {
 292     /* For the first hgcd call, there are no u updates, and it makes
 293        some sense to use a different choice for p. */
 294
 295     /* FIXME: We could trim use of temporary storage, since u0 and u1
 296        are not used yet. For the hgcd call, we could swap in the u0
 297        and u1 pointers for the relevant matrix elements. */
 298
 299     struct hgcd_matrix M;
 300     mp_size_t p = CHOOSE_P_1 (n);
 301     mp_size_t nn;
 302
 303     mpn_hgcd_matrix_init (&M, n - p, tp);
 304     nn = mpn_hgcd (ap + p, bp + p, n - p, &M, tp + matrix_scratch);
 305     if (nn > 0)
 306       {
 307         ASSERT (M.n <= (n - p - 1)/2);
 308         ASSERT (M.n + p <= (p + n - 1) / 2);
 309
 310         /* Temporary storage 2 (p + M->n) <= p + n - 1 */
 311         n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, tp + matrix_scratch);
 312
 313         MPN_COPY (u0, M.p[1][0], M.n);
 314         MPN_COPY (u1, M.p[1][1], M.n);
 315         un = M.n;
 316         while ( (u0[un-1] | u1[un-1] ) == 0)
 317           un--;
 318       }
 319     else
 320       {
 321         /* mpn_hgcd has failed. Then either one of a or b is very
 322            small, or the difference is very small. Perform one
 323            subtraction followed by one division. */
 324         u1[0] = 1;
 325
 326         ctx.u0 = u0;
 327         ctx.u1 = u1;
 328         ctx.tp = tp + n; /* ualloc */
 329         ctx.un = 1;
 330
 331         /* Temporary storage n */
 332         n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
 333         if (n == 0)
 334           {
 335             TMP_FREE;
 336             return ctx.gn;
 337           }
 338
 339         un = ctx.un;
 340         ASSERT (un < ualloc);
 341       }
 342   }
 343
 344   while (ABOVE_THRESHOLD (n, GCDEXT_DC_THRESHOLD))
 345     {
 346       struct hgcd_matrix M;
 347       mp_size_t p = CHOOSE_P_2 (n);
 348       mp_size_t nn;
 349
 350       mpn_hgcd_matrix_init (&M, n - p, tp);
 351       nn = mpn_hgcd (ap + p, bp + p, n - p, &M, tp + matrix_scratch);
 352       if (nn > 0)
 353         {
 354           mp_ptr t0;
 355
 356           t0 = tp + matrix_scratch;
 357           ASSERT (M.n <= (n - p - 1)/2);
 358           ASSERT (M.n + p <= (p + n - 1) / 2);
 359
 360           /* Temporary storage 2 (p + M->n) <= p + n - 1 */
 361           n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, t0);
 362
 363           /* By the same analysis as for mpn_hgcd_matrix_mul */
 364           ASSERT (M.n + un <= ualloc);
 365
 366           /* FIXME: This copying could be avoided by some swapping of
 367            * pointers. May need more temporary storage, though. */
 368           MPN_COPY (t0, u0, un);
 369
 370           /* Temporary storage ualloc */
 371           un = hgcd_mul_matrix_vector (&M, u0, t0, u1, un, t0 + un);
 372
 373           ASSERT (un < ualloc);
 374           ASSERT ( (u0[un-1] | u1[un-1]) > 0);
 375         }
 376       else
 377         {
 378           /* mpn_hgcd has failed. Then either one of a or b is very
 379              small, or the difference is very small. Perform one
 380              subtraction followed by one division. */
 381           ctx.u0 = u0;
 382           ctx.u1 = u1;
 383           ctx.tp = tp + n; /* ualloc */
 384           ctx.un = un;
 385
 386           /* Temporary storage n */
 387           n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
 388           if (n == 0)
 389             {
 390               TMP_FREE;
 391               return ctx.gn;
 392             }
 393
 394           un = ctx.un;
 395           ASSERT (un < ualloc);
 396         }
 397     }
 398   /* We have A = ... a + ... b
 399              B =  u0 a +  u1 b
 400
 401              a = u1  A + ... B
 402              b = -u0 A + ... B
 403
 404      with bounds
 405
 406        |u0|, |u1| <= B / min(a, b)
 407
 408      We always have u1 > 0, and u0 == 0 is possible only if u1 == 1,
 409      in which case the only reduction done so far is a = A - k B for
 410      some k.
 411
 412      Compute g = u a + v b = (u u1 - v u0) A + (...) B
 413      Here, u, v are bounded by
 414
 415        |u| <= b,
 416        |v| <= a
 417   */
 418
 419   ASSERT ( (ap[n-1] | bp[n-1]) > 0);
 420
 421   if (UNLIKELY (mpn_cmp (ap, bp, n) == 0))
 422     {
 423       /* Must return the smallest cofactor, +u1 or -u0 */
 424       int c;
 425
 426       MPN_COPY (gp, ap, n);
 427
 428       MPN_CMP (c, u0, u1, un);
 429       /* c == 0 can happen only when A = (2k+1) G, B = 2 G. And in
 430          this case we choose the cofactor + 1, corresponding to G = A
 431          - k B, rather than -1, corresponding to G = - A + (k+1) B. */
 432       ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));
 433       if (c < 0)
 434         {
 435           MPN_NORMALIZE (u0, un);
 436           MPN_COPY (up, u0, un);
 437           *usizep = -un;
 438         }
 439       else
 440         {
 441           MPN_NORMALIZE_NOT_ZERO (u1, un);
 442           MPN_COPY (up, u1, un);
 443           *usizep = un;
 444         }
 445
 446       TMP_FREE;
 447       return n;
 448     }
 449   else if (UNLIKELY (u0[0] == 0) && un == 1)
 450     {
 451       mp_size_t gn;
 452       ASSERT (u1[0] == 1);
 453
 454       /* g = u a + v b = (u u1 - v u0) A + (...) B = u A + (...) B */
 455       gn = mpn_gcdext_lehmer_n (gp, up, usizep, ap, bp, n, tp);
 456
 457       TMP_FREE;
 458       return gn;
 459     }
 460   else
 461     {
 462       mp_size_t u0n;
 463       mp_size_t u1n;
 464       mp_size_t lehmer_un;
 465       mp_size_t lehmer_vn;
 466       mp_size_t gn;
 467
 468       mp_ptr lehmer_up;
 469       mp_ptr lehmer_vp;
 470       int negate;
 471
 472       lehmer_up = tp; tp += n;
 473
 474       /* Call mpn_gcdext_lehmer_n with copies of a and b. */
 475       MPN_COPY (tp, ap, n);
 476       MPN_COPY (tp + n, bp, n);
 477       gn = mpn_gcdext_lehmer_n (gp, lehmer_up, &lehmer_un, tp, tp + n, n, tp + 2*n);
 478
 479       u0n = un;
 480       MPN_NORMALIZE (u0, u0n);
 481       ASSERT (u0n > 0);
 482
 483       if (lehmer_un == 0)
 484         {
 485           /* u == 0  ==>  v = g / b == 1  ==> g = - u0 A + (...) B */
 486           MPN_COPY (up, u0, u0n);
 487           *usizep = -u0n;
 488
 489           TMP_FREE;
 490           return gn;
 491         }
 492
 493       lehmer_vp = tp;
 494       /* Compute v = (g - u a) / b */
 495       lehmer_vn = compute_v (lehmer_vp,
 496                              ap, bp, n, gp, gn, lehmer_up, lehmer_un, tp + n + 1);
 497
 498       if (lehmer_un > 0)
 499         negate = 0;
 500       else
 501         {
 502           lehmer_un = -lehmer_un;
 503           negate = 1;
 504         }
 505
 506       u1n = un;
 507       MPN_NORMALIZE (u1, u1n);
 508       ASSERT (u1n > 0);
 509
 510       ASSERT (lehmer_un + u1n <= ualloc);
 511       ASSERT (lehmer_vn + u0n <= ualloc);
 512
 513       /* We may still have v == 0 */
 514
 515       /* Compute u u0 */
 516       if (lehmer_un <= u1n)
 517         /* Should be the common case */
 518         mpn_mul (up, u1, u1n, lehmer_up, lehmer_un);
 519       else
 520         mpn_mul (up, lehmer_up, lehmer_un, u1, u1n);
 521
 522       un = u1n + lehmer_un;
 523       un -= (up[un - 1] == 0);
 524
 525       if (lehmer_vn > 0)
 526         {
 527           mp_limb_t cy;
 528
 529           /* Overwrites old u1 value */
 530           if (lehmer_vn <= u0n)
 531             /* Should be the common case */
 532             mpn_mul (u1, u0, u0n, lehmer_vp, lehmer_vn);
 533           else
 534             mpn_mul (u1, lehmer_vp, lehmer_vn, u0, u0n);
 535
 536           u1n = u0n + lehmer_vn;
 537           u1n -= (u1[u1n - 1] == 0);
 538
 539           if (u1n <= un)
 540             {
 541               cy = mpn_add (up, up, un, u1, u1n);
 542             }
 543           else
 544             {
 545               cy = mpn_add (up, u1, u1n, up, un);
 546               un = u1n;
 547             }
 548           up[un] = cy;
 549           un += (cy != 0);
 550
 551           ASSERT (un < ualloc);
 552         }
 553       *usizep = negate ? -un : un;
 554
 555       TMP_FREE;
 556       return gn;
 557     }
 558 }