source/libs/gmp/gmp-src/mpn/generic/hgcd.c

   1 /* hgcd.c.
   2
   3    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
   4    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
   5    GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
   6
   7 Copyright 2003-2005, 2008, 2011, 2012 Free Software Foundation, Inc.
   8
   9 This file is part of the GNU MP Library.
  10
  11 The GNU MP Library is free software; you can redistribute it and/or modify
  12 it under the terms of either:
  13
  14   * the GNU Lesser General Public License as published by the Free
  15     Software Foundation; either version 3 of the License, or (at your
  16     option) any later version.
  17
  18 or
  19
  20   * the GNU General Public License as published by the Free Software
  21     Foundation; either version 2 of the License, or (at your option) any
  22     later version.
  23
  24 or both in parallel, as here.
  25
  26 The GNU MP Library is distributed in the hope that it will be useful, but
  27 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  28 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  29 for more details.
  30
  31 You should have received copies of the GNU General Public License and the
  32 GNU Lesser General Public License along with the GNU MP Library.  If not,
  33 see https://www.gnu.org/licenses/.  */
  34
  35 #include "gmp.h"
  36 #include "gmp-impl.h"
  37 #include "longlong.h"
  38
  39
  40 /* Size analysis for hgcd:
  41
  42    For the recursive calls, we have n1 <= ceil(n / 2). Then the
  43    storage need is determined by the storage for the recursive call
  44    computing M1, and hgcd_matrix_adjust and hgcd_matrix_mul calls that use M1
  45    (after this, the storage needed for M1 can be recycled).
  46
  47    Let S(r) denote the required storage. For M1 we need 4 * (ceil(n1/2) + 1)
  48    = 4 * (ceil(n/4) + 1), for the hgcd_matrix_adjust call, we need n + 2,
  49    and for the hgcd_matrix_mul, we may need 3 ceil(n/2) + 8. In total,
  50    4 * ceil(n/4) + 3 ceil(n/2) + 12 <= 10 ceil(n/4) + 12.
  51
  52    For the recursive call, we need S(n1) = S(ceil(n/2)).
  53
  54    S(n) <= 10*ceil(n/4) + 12 + S(ceil(n/2))
  55         <= 10*(ceil(n/4) + ... + ceil(n/2^(1+k))) + 12k + S(ceil(n/2^k))
  56         <= 10*(2 ceil(n/4) + k) + 12k + S(ceil(n/2^k))
  57         <= 20 ceil(n/4) + 22k + S(ceil(n/2^k))
  58 */
  59
  60 mp_size_t
  61 mpn_hgcd_itch (mp_size_t n)
  62 {
  63   unsigned k;
  64   int count;
  65   mp_size_t nscaled;
  66
  67   if (BELOW_THRESHOLD (n, HGCD_THRESHOLD))
  68     return n;
  69
  70   /* Get the recursion depth. */
  71   nscaled = (n - 1) / (HGCD_THRESHOLD - 1);
  72   count_leading_zeros (count, nscaled);
  73   k = GMP_LIMB_BITS - count;
  74
  75   return 20 * ((n+3) / 4) + 22 * k + HGCD_THRESHOLD;
  76 }
  77
  78 /* Reduces a,b until |a-b| fits in n/2 + 1 limbs. Constructs matrix M
  79    with elements of size at most (n+1)/2 - 1. Returns new size of a,
  80    b, or zero if no reduction is possible. */
  81
  82 mp_size_t
  83 mpn_hgcd (mp_ptr ap, mp_ptr bp, mp_size_t n,
  84           struct hgcd_matrix *M, mp_ptr tp)
  85 {
  86   mp_size_t s = n/2 + 1;
  87
  88   mp_size_t nn;
  89   int success = 0;
  90
  91   if (n <= s)
  92     /* Happens when n <= 2, a fairly uninteresting case but exercised
  93        by the random inputs of the testsuite. */
  94     return 0;
  95
  96   ASSERT ((ap[n-1] | bp[n-1]) > 0);
  97
  98   ASSERT ((n+1)/2 - 1 < M->alloc);
  99
 100   if (ABOVE_THRESHOLD (n, HGCD_THRESHOLD))
 101     {
 102       mp_size_t n2 = (3*n)/4 + 1;
 103       mp_size_t p = n/2;
 104
 105       nn = mpn_hgcd_reduce (M, ap, bp, n, p, tp);
 106       if (nn)
 107         {
 108           n = nn;
 109           success = 1;
 110         }
 111
 112       /* NOTE: It appears this loop never runs more than once (at
 113          least when not recursing to hgcd_appr). */
 114       while (n > n2)
 115         {
 116           /* Needs n + 1 storage */
 117           nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
 118           if (!nn)
 119             return success ? n : 0;
 120
 121           n = nn;
 122           success = 1;
 123         }
 124
 125       if (n > s + 2)
 126         {
 127           struct hgcd_matrix M1;
 128           mp_size_t scratch;
 129
 130           p = 2*s - n + 1;
 131           scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);
 132
 133           mpn_hgcd_matrix_init(&M1, n - p, tp);
 134
 135           /* FIXME: Should use hgcd_reduce, but that may require more
 136              scratch space, which requires review. */
 137
 138           nn = mpn_hgcd (ap + p, bp + p, n - p, &M1, tp + scratch);
 139           if (nn > 0)
 140             {
 141               /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */
 142               ASSERT (M->n + 2 >= M1.n);
 143
 144               /* Furthermore, assume M ends with a quotient (1, q; 0, 1),
 145                  then either q or q + 1 is a correct quotient, and M1 will
 146                  start with either (1, 0; 1, 1) or (2, 1; 1, 1). This
 147                  rules out the case that the size of M * M1 is much
 148                  smaller than the expected M->n + M1->n. */
 149
 150               ASSERT (M->n + M1.n < M->alloc);
 151
 152               /* Needs 2 (p + M->n) <= 2 (2*s - n2 + 1 + n2 - s - 1)
 153                  = 2*s <= 2*(floor(n/2) + 1) <= n + 2. */
 154               n = mpn_hgcd_matrix_adjust (&M1, p + nn, ap, bp, p, tp + scratch);
 155
 156               /* We need a bound for of M->n + M1.n. Let n be the original
 157                  input size. Then
 158
 159                  ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2
 160
 161                  and it follows that
 162
 163                  M.n + M1.n <= ceil(n/2) + 1
 164
 165                  Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the
 166                  amount of needed scratch space. */
 167               mpn_hgcd_matrix_mul (M, &M1, tp + scratch);
 168               success = 1;
 169             }
 170         }
 171     }
 172
 173   for (;;)
 174     {
 175       /* Needs s+3 < n */
 176       nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
 177       if (!nn)
 178         return success ? n : 0;
 179
 180       n = nn;
 181       success = 1;
 182     }
 183 }