source/libs/gmp/gmp-src/mpn/generic/gcd.c

   1 /* mpn/gcd.c: mpn_gcd for gcd of two odd integers.
   2
   3 Copyright 1991, 1993-1998, 2000-2005, 2008, 2010, 2012 Free Software
   4 Foundation, Inc.
   5
   6 This file is part of the GNU MP Library.
   7
   8 The GNU MP Library is free software; you can redistribute it and/or modify
   9 it under the terms of either:
  10
  11   * the GNU Lesser General Public License as published by the Free
  12     Software Foundation; either version 3 of the License, or (at your
  13     option) any later version.
  14
  15 or
  16
  17   * the GNU General Public License as published by the Free Software
  18     Foundation; either version 2 of the License, or (at your option) any
  19     later version.
  20
  21 or both in parallel, as here.
  22
  23 The GNU MP Library is distributed in the hope that it will be useful, but
  24 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  25 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  26 for more details.
  27
  28 You should have received copies of the GNU General Public License and the
  29 GNU Lesser General Public License along with the GNU MP Library.  If not,
  30 see https://www.gnu.org/licenses/.  */
  31
  32 #include "gmp.h"
  33 #include "gmp-impl.h"
  34 #include "longlong.h"
  35
  36 /* Uses the HGCD operation described in
  37
  38      N. Möller, On Schönhage's algorithm and subquadratic integer gcd
  39      computation, Math. Comp. 77 (2008), 589-607.
  40
  41   to reduce inputs until they are of size below GCD_DC_THRESHOLD, and
  42   then uses Lehmer's algorithm.
  43 */
  44
  45 /* Some reasonable choices are n / 2 (same as in hgcd), and p = (n +
  46  * 2)/3, which gives a balanced multiplication in
  47  * mpn_hgcd_matrix_adjust. However, p = 2 n/3 gives slightly better
  48  * performance. The matrix-vector multiplication is then
  49  * 4:1-unbalanced, with matrix elements of size n/6, and vector
  50  * elements of size p = 2n/3. */
  51
  52 /* From analysis of the theoretical running time, it appears that when
  53  * multiplication takes time O(n^alpha), p should be chosen so that
  54  * the ratio of the time for the mpn_hgcd call, and the time for the
  55  * multiplication in mpn_hgcd_matrix_adjust, is roughly 1/(alpha -
  56  * 1). */
  57 #ifdef TUNE_GCD_P
  58 #define P_TABLE_SIZE 10000
  59 mp_size_t p_table[P_TABLE_SIZE];
  60 #define CHOOSE_P(n) ( (n) < P_TABLE_SIZE ? p_table[n] : 2*(n)/3)
  61 #else
  62 #define CHOOSE_P(n) (2*(n) / 3)
  63 #endif
  64
  65 struct gcd_ctx
  66 {
  67   mp_ptr gp;
  68   mp_size_t gn;
  69 };
  70
  71 static void
  72 gcd_hook (void *p, mp_srcptr gp, mp_size_t gn,
  73           mp_srcptr qp, mp_size_t qn, int d)
  74 {
  75   struct gcd_ctx *ctx = (struct gcd_ctx *) p;
  76   MPN_COPY (ctx->gp, gp, gn);
  77   ctx->gn = gn;
  78 }
  79
  80 #if GMP_NAIL_BITS > 0
  81 /* Nail supports should be easy, replacing the sub_ddmmss with nails
  82  * logic. */
  83 #error Nails not supported.
  84 #endif
  85
  86 /* Use binary algorithm to compute G <-- GCD (U, V) for usize, vsize == 2.
  87    Both U and V must be odd. */
  88 static inline mp_size_t
  89 gcd_2 (mp_ptr gp, mp_srcptr up, mp_srcptr vp)
  90 {
  91   mp_limb_t u0, u1, v0, v1;
  92   mp_size_t gn;
  93
  94   u0 = up[0];
  95   u1 = up[1];
  96   v0 = vp[0];
  97   v1 = vp[1];
  98
  99   ASSERT (u0 & 1);
 100   ASSERT (v0 & 1);
 101
 102   /* Check for u0 != v0 needed to ensure that argument to
 103    * count_trailing_zeros is non-zero. */
 104   while (u1 != v1 && u0 != v0)
 105     {
 106       unsigned long int r;
 107       if (u1 > v1)
 108         {
 109           sub_ddmmss (u1, u0, u1, u0, v1, v0);
 110           count_trailing_zeros (r, u0);
 111           u0 = ((u1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (u0 >> r);
 112           u1 >>= r;
 113         }
 114       else  /* u1 < v1.  */
 115         {
 116           sub_ddmmss (v1, v0, v1, v0, u1, u0);
 117           count_trailing_zeros (r, v0);
 118           v0 = ((v1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (v0 >> r);
 119           v1 >>= r;
 120         }
 121     }
 122
 123   gp[0] = u0, gp[1] = u1, gn = 1 + (u1 != 0);
 124
 125   /* If U == V == GCD, done.  Otherwise, compute GCD (V, |U - V|).  */
 126   if (u1 == v1 && u0 == v0)
 127     return gn;
 128
 129   v0 = (u0 == v0) ? ((u1 > v1) ? u1-v1 : v1-u1) : ((u0 > v0) ? u0-v0 : v0-u0);
 130   gp[0] = mpn_gcd_1 (gp, gn, v0);
 131
 132   return 1;
 133 }
 134
 135 mp_size_t
 136 mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t n)
 137 {
 138   mp_size_t talloc;
 139   mp_size_t scratch;
 140   mp_size_t matrix_scratch;
 141
 142   struct gcd_ctx ctx;
 143   mp_ptr tp;
 144   TMP_DECL;
 145
 146   ASSERT (usize >= n);
 147   ASSERT (n > 0);
 148   ASSERT (vp[n-1] > 0);
 149
 150   /* FIXME: Check for small sizes first, before setting up temporary
 151      storage etc. */
 152   talloc = MPN_GCD_SUBDIV_STEP_ITCH(n);
 153
 154   /* For initial division */
 155   scratch = usize - n + 1;
 156   if (scratch > talloc)
 157     talloc = scratch;
 158
 159 #if TUNE_GCD_P
 160   if (CHOOSE_P (n) > 0)
 161 #else
 162   if (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
 163 #endif
 164     {
 165       mp_size_t hgcd_scratch;
 166       mp_size_t update_scratch;
 167       mp_size_t p = CHOOSE_P (n);
 168       mp_size_t scratch;
 169 #if TUNE_GCD_P
 170       /* Worst case, since we don't guarantee that n - CHOOSE_P(n)
 171          is increasing */
 172       matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n);
 173       hgcd_scratch = mpn_hgcd_itch (n);
 174       update_scratch = 2*(n - 1);
 175 #else
 176       matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
 177       hgcd_scratch = mpn_hgcd_itch (n - p);
 178       update_scratch = p + n - 1;
 179 #endif
 180       scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
 181       if (scratch > talloc)
 182         talloc = scratch;
 183     }
 184
 185   TMP_MARK;
 186   tp = TMP_ALLOC_LIMBS(talloc);
 187
 188   if (usize > n)
 189     {
 190       mpn_tdiv_qr (tp, up, 0, up, usize, vp, n);
 191
 192       if (mpn_zero_p (up, n))
 193         {
 194           MPN_COPY (gp, vp, n);
 195           ctx.gn = n;
 196           goto done;
 197         }
 198     }
 199
 200   ctx.gp = gp;
 201
 202 #if TUNE_GCD_P
 203   while (CHOOSE_P (n) > 0)
 204 #else
 205   while (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
 206 #endif
 207     {
 208       struct hgcd_matrix M;
 209       mp_size_t p = CHOOSE_P (n);
 210       mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
 211       mp_size_t nn;
 212       mpn_hgcd_matrix_init (&M, n - p, tp);
 213       nn = mpn_hgcd (up + p, vp + p, n - p, &M, tp + matrix_scratch);
 214       if (nn > 0)
 215         {
 216           ASSERT (M.n <= (n - p - 1)/2);
 217           ASSERT (M.n + p <= (p + n - 1) / 2);
 218           /* Temporary storage 2 (p + M->n) <= p + n - 1. */
 219           n = mpn_hgcd_matrix_adjust (&M, p + nn, up, vp, p, tp + matrix_scratch);
 220         }
 221       else
 222         {
 223           /* Temporary storage n */
 224           n = mpn_gcd_subdiv_step (up, vp, n, 0, gcd_hook, &ctx, tp);
 225           if (n == 0)
 226             goto done;
 227         }
 228     }
 229
 230   while (n > 2)
 231     {
 232       struct hgcd_matrix1 M;
 233       mp_limb_t uh, ul, vh, vl;
 234       mp_limb_t mask;
 235
 236       mask = up[n-1] | vp[n-1];
 237       ASSERT (mask > 0);
 238
 239       if (mask & GMP_NUMB_HIGHBIT)
 240         {
 241           uh = up[n-1]; ul = up[n-2];
 242           vh = vp[n-1]; vl = vp[n-2];
 243         }
 244       else
 245         {
 246           int shift;
 247
 248           count_leading_zeros (shift, mask);
 249           uh = MPN_EXTRACT_NUMB (shift, up[n-1], up[n-2]);
 250           ul = MPN_EXTRACT_NUMB (shift, up[n-2], up[n-3]);
 251           vh = MPN_EXTRACT_NUMB (shift, vp[n-1], vp[n-2]);
 252           vl = MPN_EXTRACT_NUMB (shift, vp[n-2], vp[n-3]);
 253         }
 254
 255       /* Try an mpn_hgcd2 step */
 256       if (mpn_hgcd2 (uh, ul, vh, vl, &M))
 257         {
 258           n = mpn_matrix22_mul1_inverse_vector (&M, tp, up, vp, n);
 259           MP_PTR_SWAP (up, tp);
 260         }
 261       else
 262         {
 263           /* mpn_hgcd2 has failed. Then either one of a or b is very
 264              small, or the difference is very small. Perform one
 265              subtraction followed by one division. */
 266
 267           /* Temporary storage n */
 268           n = mpn_gcd_subdiv_step (up, vp, n, 0, &gcd_hook, &ctx, tp);
 269           if (n == 0)
 270             goto done;
 271         }
 272     }
 273
 274   ASSERT(up[n-1] | vp[n-1]);
 275
 276   if (n == 1)
 277     {
 278       *gp = mpn_gcd_1(up, 1, vp[0]);
 279       ctx.gn = 1;
 280       goto done;
 281     }
 282
 283   /* Due to the calling convention for mpn_gcd, at most one can be
 284      even. */
 285
 286   if (! (up[0] & 1))
 287     MP_PTR_SWAP (up, vp);
 288
 289   ASSERT (up[0] & 1);
 290
 291   if (vp[0] == 0)
 292     {
 293       *gp = mpn_gcd_1 (up, 2, vp[1]);
 294       ctx.gn = 1;
 295       goto done;
 296     }
 297   else if (! (vp[0] & 1))
 298     {
 299       int r;
 300       count_trailing_zeros (r, vp[0]);
 301       vp[0] = ((vp[1] << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (vp[0] >> r);
 302       vp[1] >>= r;
 303     }
 304
 305   ctx.gn = gcd_2(gp, up, vp);
 306
 307 done:
 308   TMP_FREE;
 309   return ctx.gn;
 310 }