source/libs/gmp/gmp-src/mpn/generic/sqrmod_bnm1.c

   1 /* sqrmod_bnm1.c -- squaring mod B^n-1.
   2
   3    Contributed to the GNU project by Niels Möller, Torbjorn Granlund and
   4    Marco Bodrato.
   5
   6    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
   7    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
   8    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
   9
  10 Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
  11
  12 This file is part of the GNU MP Library.
  13
  14 The GNU MP Library is free software; you can redistribute it and/or modify
  15 it under the terms of either:
  16
  17   * the GNU Lesser General Public License as published by the Free
  18     Software Foundation; either version 3 of the License, or (at your
  19     option) any later version.
  20
  21 or
  22
  23   * the GNU General Public License as published by the Free Software
  24     Foundation; either version 2 of the License, or (at your option) any
  25     later version.
  26
  27 or both in parallel, as here.
  28
  29 The GNU MP Library is distributed in the hope that it will be useful, but
  30 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  31 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  32 for more details.
  33
  34 You should have received copies of the GNU General Public License and the
  35 GNU Lesser General Public License along with the GNU MP Library.  If not,
  36 see https://www.gnu.org/licenses/.  */
  37
  38
  39 #include "gmp.h"
  40 #include "gmp-impl.h"
  41 #include "longlong.h"
  42
  43 /* Input is {ap,rn}; output is {rp,rn}, computation is
  44    mod B^rn - 1, and values are semi-normalised; zero is represented
  45    as either 0 or B^n - 1.  Needs a scratch of 2rn limbs at tp.
  46    tp==rp is allowed. */
  47 static void
  48 mpn_bc_sqrmod_bnm1 (mp_ptr rp, mp_srcptr ap, mp_size_t rn, mp_ptr tp)
  49 {
  50   mp_limb_t cy;
  51
  52   ASSERT (0 < rn);
  53
  54   mpn_sqr (tp, ap, rn);
  55   cy = mpn_add_n (rp, tp, tp + rn, rn);
  56   /* If cy == 1, then the value of rp is at most B^rn - 2, so there can
  57    * be no overflow when adding in the carry. */
  58   MPN_INCR_U (rp, rn, cy);
  59 }
  60
  61
  62 /* Input is {ap,rn+1}; output is {rp,rn+1}, in
  63    semi-normalised representation, computation is mod B^rn + 1. Needs
  64    a scratch area of 2rn + 2 limbs at tp; tp == rp is allowed.
  65    Output is normalised. */
  66 static void
  67 mpn_bc_sqrmod_bnp1 (mp_ptr rp, mp_srcptr ap, mp_size_t rn, mp_ptr tp)
  68 {
  69   mp_limb_t cy;
  70
  71   ASSERT (0 < rn);
  72
  73   mpn_sqr (tp, ap, rn + 1);
  74   ASSERT (tp[2*rn+1] == 0);
  75   ASSERT (tp[2*rn] < GMP_NUMB_MAX);
  76   cy = tp[2*rn] + mpn_sub_n (rp, tp, tp+rn, rn);
  77   rp[rn] = 0;
  78   MPN_INCR_U (rp, rn+1, cy );
  79 }
  80
  81
  82 /* Computes {rp,MIN(rn,2an)} <- {ap,an}^2 Mod(B^rn-1)
  83  *
  84  * The result is expected to be ZERO if and only if the operand
  85  * already is. Otherwise the class [0] Mod(B^rn-1) is represented by
  86  * B^rn-1.
  87  * It should not be a problem if sqrmod_bnm1 is used to
  88  * compute the full square with an <= 2*rn, because this condition
  89  * implies (B^an-1)^2 < (B^rn-1) .
  90  *
  91  * Requires rn/4 < an <= rn
  92  * Scratch need: rn/2 + (need for recursive call OR rn + 3). This gives
  93  *
  94  * S(n) <= rn/2 + MAX (rn + 4, S(n/2)) <= 3/2 rn + 4
  95  */
  96 void
  97 mpn_sqrmod_bnm1 (mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, mp_ptr tp)
  98 {
  99   ASSERT (0 < an);
 100   ASSERT (an <= rn);
 101
 102   if ((rn & 1) != 0 || BELOW_THRESHOLD (rn, SQRMOD_BNM1_THRESHOLD))
 103     {
 104       if (UNLIKELY (an < rn))
 105         {
 106           if (UNLIKELY (2*an <= rn))
 107             {
 108               mpn_sqr (rp, ap, an);
 109             }
 110           else
 111             {
 112               mp_limb_t cy;
 113               mpn_sqr (tp, ap, an);
 114               cy = mpn_add (rp, tp, rn, tp + rn, 2*an - rn);
 115               MPN_INCR_U (rp, rn, cy);
 116             }
 117         }
 118       else
 119         mpn_bc_sqrmod_bnm1 (rp, ap, rn, tp);
 120     }
 121   else
 122     {
 123       mp_size_t n;
 124       mp_limb_t cy;
 125       mp_limb_t hi;
 126
 127       n = rn >> 1;
 128
 129       ASSERT (2*an > n);
 130
 131       /* Compute xm = a^2 mod (B^n - 1), xp = a^2 mod (B^n + 1)
 132          and crt together as
 133
 134          x = -xp * B^n + (B^n + 1) * [ (xp + xm)/2 mod (B^n-1)]
 135       */
 136
 137 #define a0 ap
 138 #define a1 (ap + n)
 139
 140 #define xp  tp  /* 2n + 2 */
 141       /* am1  maybe in {xp, n} */
 142 #define sp1 (tp + 2*n + 2)
 143       /* ap1  maybe in {sp1, n + 1} */
 144
 145       {
 146         mp_srcptr am1;
 147         mp_size_t anm;
 148         mp_ptr so;
 149
 150         if (LIKELY (an > n))
 151           {
 152             so = xp + n;
 153             am1 = xp;
 154             cy = mpn_add (xp, a0, n, a1, an - n);
 155             MPN_INCR_U (xp, n, cy);
 156             anm = n;
 157           }
 158         else
 159           {
 160             so = xp;
 161             am1 = a0;
 162             anm = an;
 163           }
 164
 165         mpn_sqrmod_bnm1 (rp, n, am1, anm, so);
 166       }
 167
 168       {
 169         int       k;
 170         mp_srcptr ap1;
 171         mp_size_t anp;
 172
 173         if (LIKELY (an > n)) {
 174           ap1 = sp1;
 175           cy = mpn_sub (sp1, a0, n, a1, an - n);
 176           sp1[n] = 0;
 177           MPN_INCR_U (sp1, n + 1, cy);
 178           anp = n + ap1[n];
 179         } else {
 180           ap1 = a0;
 181           anp = an;
 182         }
 183
 184         if (BELOW_THRESHOLD (n, MUL_FFT_MODF_THRESHOLD))
 185           k=0;
 186         else
 187           {
 188             int mask;
 189             k = mpn_fft_best_k (n, 1);
 190             mask = (1<<k) -1;
 191             while (n & mask) {k--; mask >>=1;};
 192           }
 193         if (k >= FFT_FIRST_K)
 194           xp[n] = mpn_mul_fft (xp, n, ap1, anp, ap1, anp, k);
 195         else if (UNLIKELY (ap1 == a0))
 196           {
 197             ASSERT (anp <= n);
 198             ASSERT (2*anp > n);
 199             mpn_sqr (xp, a0, an);
 200             anp = 2*an - n;
 201             cy = mpn_sub (xp, xp, n, xp + n, anp);
 202             xp[n] = 0;
 203             MPN_INCR_U (xp, n+1, cy);
 204           }
 205         else
 206           mpn_bc_sqrmod_bnp1 (xp, ap1, n, xp);
 207       }
 208
 209       /* Here the CRT recomposition begins.
 210
 211          xm <- (xp + xm)/2 = (xp + xm)B^n/2 mod (B^n-1)
 212          Division by 2 is a bitwise rotation.
 213
 214          Assumes xp normalised mod (B^n+1).
 215
 216          The residue class [0] is represented by [B^n-1]; except when
 217          both input are ZERO.
 218       */
 219
 220 #if HAVE_NATIVE_mpn_rsh1add_n || HAVE_NATIVE_mpn_rsh1add_nc
 221 #if HAVE_NATIVE_mpn_rsh1add_nc
 222       cy = mpn_rsh1add_nc(rp, rp, xp, n, xp[n]); /* B^n = 1 */
 223       hi = cy << (GMP_NUMB_BITS - 1);
 224       cy = 0;
 225       /* next update of rp[n-1] will set cy = 1 only if rp[n-1]+=hi
 226          overflows, i.e. a further increment will not overflow again. */
 227 #else /* ! _nc */
 228       cy = xp[n] + mpn_rsh1add_n(rp, rp, xp, n); /* B^n = 1 */
 229       hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
 230       cy >>= 1;
 231       /* cy = 1 only if xp[n] = 1 i.e. {xp,n} = ZERO, this implies that
 232          the rsh1add was a simple rshift: the top bit is 0. cy=1 => hi=0. */
 233 #endif
 234 #if GMP_NAIL_BITS == 0
 235       add_ssaaaa(cy, rp[n-1], cy, rp[n-1], CNST_LIMB(0), hi);
 236 #else
 237       cy += (hi & rp[n-1]) >> (GMP_NUMB_BITS-1);
 238       rp[n-1] ^= hi;
 239 #endif
 240 #else /* ! HAVE_NATIVE_mpn_rsh1add_n */
 241 #if HAVE_NATIVE_mpn_add_nc
 242       cy = mpn_add_nc(rp, rp, xp, n, xp[n]);
 243 #else /* ! _nc */
 244       cy = xp[n] + mpn_add_n(rp, rp, xp, n); /* xp[n] == 1 implies {xp,n} == ZERO */
 245 #endif
 246       cy += (rp[0]&1);
 247       mpn_rshift(rp, rp, n, 1);
 248       ASSERT (cy <= 2);
 249       hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
 250       cy >>= 1;
 251       /* We can have cy != 0 only if hi = 0... */
 252       ASSERT ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0);
 253       rp[n-1] |= hi;
 254       /* ... rp[n-1] + cy can not overflow, the following INCR is correct. */
 255 #endif
 256       ASSERT (cy <= 1);
 257       /* Next increment can not overflow, read the previous comments about cy. */
 258       ASSERT ((cy == 0) || ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0));
 259       MPN_INCR_U(rp, n, cy);
 260
 261       /* Compute the highest half:
 262          ([(xp + xm)/2 mod (B^n-1)] - xp ) * B^n
 263        */
 264       if (UNLIKELY (2*an < rn))
 265         {
 266           /* Note that in this case, the only way the result can equal
 267              zero mod B^{rn} - 1 is if the input is zero, and
 268              then the output of both the recursive calls and this CRT
 269              reconstruction is zero, not B^{rn} - 1. */
 270           cy = mpn_sub_n (rp + n, rp, xp, 2*an - n);
 271
 272           /* FIXME: This subtraction of the high parts is not really
 273              necessary, we do it to get the carry out, and for sanity
 274              checking. */
 275           cy = xp[n] + mpn_sub_nc (xp + 2*an - n, rp + 2*an - n,
 276                                    xp + 2*an - n, rn - 2*an, cy);
 277           ASSERT (mpn_zero_p (xp + 2*an - n+1, rn - 1 - 2*an));
 278           cy = mpn_sub_1 (rp, rp, 2*an, cy);
 279           ASSERT (cy == (xp + 2*an - n)[0]);
 280         }
 281       else
 282         {
 283           cy = xp[n] + mpn_sub_n (rp + n, rp, xp, n);
 284           /* cy = 1 only if {xp,n+1} is not ZERO, i.e. {rp,n} is not ZERO.
 285              DECR will affect _at most_ the lowest n limbs. */
 286           MPN_DECR_U (rp, 2*n, cy);
 287         }
 288 #undef a0
 289 #undef a1
 290 #undef xp
 291 #undef sp1
 292     }
 293 }
 294
 295 mp_size_t
 296 mpn_sqrmod_bnm1_next_size (mp_size_t n)
 297 {
 298   mp_size_t nh;
 299
 300   if (BELOW_THRESHOLD (n,     SQRMOD_BNM1_THRESHOLD))
 301     return n;
 302   if (BELOW_THRESHOLD (n, 4 * (SQRMOD_BNM1_THRESHOLD - 1) + 1))
 303     return (n + (2-1)) & (-2);
 304   if (BELOW_THRESHOLD (n, 8 * (SQRMOD_BNM1_THRESHOLD - 1) + 1))
 305     return (n + (4-1)) & (-4);
 306
 307   nh = (n + 1) >> 1;
 308
 309   if (BELOW_THRESHOLD (nh, SQR_FFT_MODF_THRESHOLD))
 310     return (n + (8-1)) & (-8);
 311
 312   return 2 * mpn_fft_next_size (nh, mpn_fft_best_k (nh, 1));
 313 }