source/libs/gmp/gmp-src/mpn/generic/jacobi.c

   1 /* jacobi.c
   2
   3    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
   4    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
   5    GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
   6
   7 Copyright 1996, 1998, 2000-2004, 2008, 2010, 2011 Free Software Foundation,
   8 Inc.
   9
  10 This file is part of the GNU MP Library.
  11
  12 The GNU MP Library is free software; you can redistribute it and/or modify
  13 it under the terms of either:
  14
  15   * the GNU Lesser General Public License as published by the Free
  16     Software Foundation; either version 3 of the License, or (at your
  17     option) any later version.
  18
  19 or
  20
  21   * the GNU General Public License as published by the Free Software
  22     Foundation; either version 2 of the License, or (at your option) any
  23     later version.
  24
  25 or both in parallel, as here.
  26
  27 The GNU MP Library is distributed in the hope that it will be useful, but
  28 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  29 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  30 for more details.
  31
  32 You should have received copies of the GNU General Public License and the
  33 GNU Lesser General Public License along with the GNU MP Library.  If not,
  34 see https://www.gnu.org/licenses/.  */
  35
  36 #include "gmp.h"
  37 #include "gmp-impl.h"
  38 #include "longlong.h"
  39
  40 #ifndef JACOBI_DC_THRESHOLD
  41 #define JACOBI_DC_THRESHOLD GCD_DC_THRESHOLD
  42 #endif
  43
  44 /* Schönhage's rules:
  45  *
  46  * Assume r0 = r1 q1 + r2, with r0 odd, and r1 = q2 r2 + r3
  47  *
  48  * If r1 is odd, then
  49  *
  50  *   (r1 | r0) = s(r1, r0) (r0 | r1) = s(r1, r0) (r2, r1)
  51  *
  52  * where s(x,y) = (-1)^{(x-1)(y-1)/4} = (-1)^[x = y = 3 (mod 4)].
  53  *
  54  * If r1 is even, r2 must be odd. We have
  55  *
  56  *   (r1 | r0) = (r1 - r0 | r0) = (-1)^(r0-1)/2 (r0 - r1 | r0)
  57  *             = (-1)^(r0-1)/2 s(r0, r0 - r1) (r0 | r0 - r1)
  58  *             = (-1)^(r0-1)/2 s(r0, r0 - r1) (r1 | r0 - r1)
  59  *
  60  * Now, if r1 = 0 (mod 4), then the sign factor is +1, and repeating
  61  * q1 times gives
  62  *
  63  *   (r1 | r0) = (r1 | r2) = (r3 | r2)
  64  *
  65  * On the other hand, if r1 = 2 (mod 4), the sign factor is
  66  * (-1)^{(r0-1)/2}, and repeating q1 times gives the exponent
  67  *
  68  *   (r0-1)/2 + (r0-r1-1)/2 + ... + (r0 - (q1-1) r1)/2
  69  *   = q1 (r0-1)/2 + q1 (q1-1)/2
  70  *
  71  * and we can summarize the even case as
  72  *
  73  *   (r1 | r0) = t(r1, r0, q1) (r3 | r2)
  74  *
  75  * where t(x,y,q) = (-1)^{[x = 2 (mod 4)] (q(y-1)/2 + y(q-1)/2)}
  76  *
  77  * What about termination? The remainder sequence ends with (0|1) = 1
  78  * (or (0 | r) = 0 if r != 1). What are the possible cases? If r1 is
  79  * odd, r2 may be zero. If r1 is even, then r2 = r0 - q1 r1 is odd and
  80  * hence non-zero. We may have r3 = r1 - q2 r2 = 0.
  81  *
  82  * Examples: (11|15) = - (15|11) = - (4|11)
  83  *            (4|11) =    (4| 3) =   (1| 3)
  84  *            (1| 3) = (3|1) = (0|1) = 1
  85  *
  86  *             (2|7) = (2|1) = (0|1) = 1
  87  *
  88  * Detail:     (2|7) = (2-7|7) = (-1|7)(5|7) = -(7|5) = -(2|5)
  89  *             (2|5) = (2-5|5) = (-1|5)(3|5) =  (5|3) =  (2|3)
  90  *             (2|3) = (2-3|3) = (-1|3)(1|3) = -(3|1) = -(2|1)
  91  *
  92  */
  93
  94 /* In principle, the state consists of four variables: e (one bit), a,
  95    b (two bits each), d (one bit). Collected factors are (-1)^e. a and
  96    b are the least significant bits of the current remainders. d
  97    (denominator) is 0 if we're currently subtracting multiplies of a
  98    from b, and 1 if we're subtracting b from a.
  99
 100    e is stored in the least significant bit, while a, b and d are
 101    coded as only 13 distinct values in bits 1-4, according to the
 102    following table. For rows not mentioning d, the value is either
 103    implied, or it doesn't matter. */
 104
 105 #if WANT_ASSERT
 106 static const struct
 107 {
 108   unsigned char a;
 109   unsigned char b;
 110 } decode_table[13] = {
 111   /*  0 */ { 0, 1 },
 112   /*  1 */ { 0, 3 },
 113   /*  2 */ { 1, 1 },
 114   /*  3 */ { 1, 3 },
 115   /*  4 */ { 2, 1 },
 116   /*  5 */ { 2, 3 },
 117   /*  6 */ { 3, 1 },
 118   /*  7 */ { 3, 3 }, /* d = 1 */
 119   /*  8 */ { 1, 0 },
 120   /*  9 */ { 1, 2 },
 121   /* 10 */ { 3, 0 },
 122   /* 11 */ { 3, 2 },
 123   /* 12 */ { 3, 3 }, /* d = 0 */
 124 };
 125 #define JACOBI_A(bits) (decode_table[(bits)>>1].a)
 126 #define JACOBI_B(bits) (decode_table[(bits)>>1].b)
 127 #endif /* WANT_ASSERT */
 128
 129 const unsigned char jacobi_table[208] = {
 130 #include "jacobitab.h"
 131 };
 132
 133 #define BITS_FAIL 31
 134
 135 static void
 136 jacobi_hook (void *p, mp_srcptr gp, mp_size_t gn,
 137              mp_srcptr qp, mp_size_t qn, int d)
 138 {
 139   unsigned *bitsp = (unsigned *) p;
 140
 141   if (gp)
 142     {
 143       ASSERT (gn > 0);
 144       if (gn != 1 || gp[0] != 1)
 145         {
 146           *bitsp = BITS_FAIL;
 147           return;
 148         }
 149     }
 150
 151   if (qp)
 152     {
 153       ASSERT (qn > 0);
 154       ASSERT (d >= 0);
 155       *bitsp = mpn_jacobi_update (*bitsp, d, qp[0] & 3);
 156     }
 157 }
 158
 159 #define CHOOSE_P(n) (2*(n) / 3)
 160
 161 int
 162 mpn_jacobi_n (mp_ptr ap, mp_ptr bp, mp_size_t n, unsigned bits)
 163 {
 164   mp_size_t scratch;
 165   mp_size_t matrix_scratch;
 166   mp_ptr tp;
 167
 168   TMP_DECL;
 169
 170   ASSERT (n > 0);
 171   ASSERT ( (ap[n-1] | bp[n-1]) > 0);
 172   ASSERT ( (bp[0] | ap[0]) & 1);
 173
 174   /* FIXME: Check for small sizes first, before setting up temporary
 175      storage etc. */
 176   scratch = MPN_GCD_SUBDIV_STEP_ITCH(n);
 177
 178   if (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
 179     {
 180       mp_size_t hgcd_scratch;
 181       mp_size_t update_scratch;
 182       mp_size_t p = CHOOSE_P (n);
 183       mp_size_t dc_scratch;
 184
 185       matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
 186       hgcd_scratch = mpn_hgcd_itch (n - p);
 187       update_scratch = p + n - 1;
 188
 189       dc_scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
 190       if (dc_scratch > scratch)
 191         scratch = dc_scratch;
 192     }
 193
 194   TMP_MARK;
 195   tp = TMP_ALLOC_LIMBS(scratch);
 196
 197   while (ABOVE_THRESHOLD (n, JACOBI_DC_THRESHOLD))
 198     {
 199       struct hgcd_matrix M;
 200       mp_size_t p = 2*n/3;
 201       mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
 202       mp_size_t nn;
 203       mpn_hgcd_matrix_init (&M, n - p, tp);
 204
 205       nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, &M, &bits,
 206                             tp + matrix_scratch);
 207       if (nn > 0)
 208         {
 209           ASSERT (M.n <= (n - p - 1)/2);
 210           ASSERT (M.n + p <= (p + n - 1) / 2);
 211           /* Temporary storage 2 (p + M->n) <= p + n - 1. */
 212           n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, tp + matrix_scratch);
 213         }
 214       else
 215         {
 216           /* Temporary storage n */
 217           n = mpn_gcd_subdiv_step (ap, bp, n, 0, jacobi_hook, &bits, tp);
 218           if (!n)
 219             {
 220               TMP_FREE;
 221               return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits);
 222             }
 223         }
 224     }
 225
 226   while (n > 2)
 227     {
 228       struct hgcd_matrix1 M;
 229       mp_limb_t ah, al, bh, bl;
 230       mp_limb_t mask;
 231
 232       mask = ap[n-1] | bp[n-1];
 233       ASSERT (mask > 0);
 234
 235       if (mask & GMP_NUMB_HIGHBIT)
 236         {
 237           ah = ap[n-1]; al = ap[n-2];
 238           bh = bp[n-1]; bl = bp[n-2];
 239         }
 240       else
 241         {
 242           int shift;
 243
 244           count_leading_zeros (shift, mask);
 245           ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
 246           al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
 247           bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
 248           bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
 249         }
 250
 251       /* Try an mpn_nhgcd2 step */
 252       if (mpn_hgcd2_jacobi (ah, al, bh, bl, &M, &bits))
 253         {
 254           n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n);
 255           MP_PTR_SWAP (ap, tp);
 256         }
 257       else
 258         {
 259           /* mpn_hgcd2 has failed. Then either one of a or b is very
 260              small, or the difference is very small. Perform one
 261              subtraction followed by one division. */
 262           n = mpn_gcd_subdiv_step (ap, bp, n, 0, &jacobi_hook, &bits, tp);
 263           if (!n)
 264             {
 265               TMP_FREE;
 266               return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits);
 267             }
 268         }
 269     }
 270
 271   if (bits >= 16)
 272     MP_PTR_SWAP (ap, bp);
 273
 274   ASSERT (bp[0] & 1);
 275
 276   if (n == 1)
 277     {
 278       mp_limb_t al, bl;
 279       al = ap[0];
 280       bl = bp[0];
 281
 282       TMP_FREE;
 283       if (bl == 1)
 284         return 1 - 2*(bits & 1);
 285       else
 286         return mpn_jacobi_base (al, bl, bits << 1);
 287     }
 288
 289   else
 290     {
 291       int res = mpn_jacobi_2 (ap, bp, bits & 1);
 292       TMP_FREE;
 293       return res;
 294     }
 295 }