source/libs/gmp/gmp-src/mpn/generic/mulmod_bnm1.c

   1 /* mulmod_bnm1.c -- multiplication mod B^n-1.
   2
   3    Contributed to the GNU project by Niels Möller, Torbjorn Granlund and
   4    Marco Bodrato.
   5
   6    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
   7    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
   8    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
   9
  10 Copyright 2009, 2010, 2012, 2013 Free Software Foundation, Inc.
  11
  12 This file is part of the GNU MP Library.
  13
  14 The GNU MP Library is free software; you can redistribute it and/or modify
  15 it under the terms of either:
  16
  17   * the GNU Lesser General Public License as published by the Free
  18     Software Foundation; either version 3 of the License, or (at your
  19     option) any later version.
  20
  21 or
  22
  23   * the GNU General Public License as published by the Free Software
  24     Foundation; either version 2 of the License, or (at your option) any
  25     later version.
  26
  27 or both in parallel, as here.
  28
  29 The GNU MP Library is distributed in the hope that it will be useful, but
  30 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  31 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  32 for more details.
  33
  34 You should have received copies of the GNU General Public License and the
  35 GNU Lesser General Public License along with the GNU MP Library.  If not,
  36 see https://www.gnu.org/licenses/.  */
  37
  38
  39 #include "gmp.h"
  40 #include "gmp-impl.h"
  41 #include "longlong.h"
  42
  43 /* Inputs are {ap,rn} and {bp,rn}; output is {rp,rn}, computation is
  44    mod B^rn - 1, and values are semi-normalised; zero is represented
  45    as either 0 or B^n - 1.  Needs a scratch of 2rn limbs at tp.
  46    tp==rp is allowed. */
  47 void
  48 mpn_bc_mulmod_bnm1 (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t rn,
  49                     mp_ptr tp)
  50 {
  51   mp_limb_t cy;
  52
  53   ASSERT (0 < rn);
  54
  55   mpn_mul_n (tp, ap, bp, rn);
  56   cy = mpn_add_n (rp, tp, tp + rn, rn);
  57   /* If cy == 1, then the value of rp is at most B^rn - 2, so there can
  58    * be no overflow when adding in the carry. */
  59   MPN_INCR_U (rp, rn, cy);
  60 }
  61
  62
  63 /* Inputs are {ap,rn+1} and {bp,rn+1}; output is {rp,rn+1}, in
  64    semi-normalised representation, computation is mod B^rn + 1. Needs
  65    a scratch area of 2rn + 2 limbs at tp; tp == rp is allowed.
  66    Output is normalised. */
  67 static void
  68 mpn_bc_mulmod_bnp1 (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t rn,
  69                     mp_ptr tp)
  70 {
  71   mp_limb_t cy;
  72
  73   ASSERT (0 < rn);
  74
  75   mpn_mul_n (tp, ap, bp, rn + 1);
  76   ASSERT (tp[2*rn+1] == 0);
  77   ASSERT (tp[2*rn] < GMP_NUMB_MAX);
  78   cy = tp[2*rn] + mpn_sub_n (rp, tp, tp+rn, rn);
  79   rp[rn] = 0;
  80   MPN_INCR_U (rp, rn+1, cy );
  81 }
  82
  83
  84 /* Computes {rp,MIN(rn,an+bn)} <- {ap,an}*{bp,bn} Mod(B^rn-1)
  85  *
  86  * The result is expected to be ZERO if and only if one of the operand
  87  * already is. Otherwise the class [0] Mod(B^rn-1) is represented by
  88  * B^rn-1. This should not be a problem if mulmod_bnm1 is used to
  89  * combine results and obtain a natural number when one knows in
  90  * advance that the final value is less than (B^rn-1).
  91  * Moreover it should not be a problem if mulmod_bnm1 is used to
  92  * compute the full product with an+bn <= rn, because this condition
  93  * implies (B^an-1)(B^bn-1) < (B^rn-1) .
  94  *
  95  * Requires 0 < bn <= an <= rn and an + bn > rn/2
  96  * Scratch need: rn + (need for recursive call OR rn + 4). This gives
  97  *
  98  * S(n) <= rn + MAX (rn + 4, S(n/2)) <= 2rn + 4
  99  */
 100 void
 101 mpn_mulmod_bnm1 (mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn, mp_ptr tp)
 102 {
 103   ASSERT (0 < bn);
 104   ASSERT (bn <= an);
 105   ASSERT (an <= rn);
 106
 107   if ((rn & 1) != 0 || BELOW_THRESHOLD (rn, MULMOD_BNM1_THRESHOLD))
 108     {
 109       if (UNLIKELY (bn < rn))
 110         {
 111           if (UNLIKELY (an + bn <= rn))
 112             {
 113               mpn_mul (rp, ap, an, bp, bn);
 114             }
 115           else
 116             {
 117               mp_limb_t cy;
 118               mpn_mul (tp, ap, an, bp, bn);
 119               cy = mpn_add (rp, tp, rn, tp + rn, an + bn - rn);
 120               MPN_INCR_U (rp, rn, cy);
 121             }
 122         }
 123       else
 124         mpn_bc_mulmod_bnm1 (rp, ap, bp, rn, tp);
 125     }
 126   else
 127     {
 128       mp_size_t n;
 129       mp_limb_t cy;
 130       mp_limb_t hi;
 131
 132       n = rn >> 1;
 133
 134       /* We need at least an + bn >= n, to be able to fit one of the
 135          recursive products at rp. Requiring strict inequality makes
 136          the code slightly simpler. If desired, we could avoid this
 137          restriction by initially halving rn as long as rn is even and
 138          an + bn <= rn/2. */
 139
 140       ASSERT (an + bn > n);
 141
 142       /* Compute xm = a*b mod (B^n - 1), xp = a*b mod (B^n + 1)
 143          and crt together as
 144
 145          x = -xp * B^n + (B^n + 1) * [ (xp + xm)/2 mod (B^n-1)]
 146       */
 147
 148 #define a0 ap
 149 #define a1 (ap + n)
 150 #define b0 bp
 151 #define b1 (bp + n)
 152
 153 #define xp  tp  /* 2n + 2 */
 154       /* am1  maybe in {xp, n} */
 155       /* bm1  maybe in {xp + n, n} */
 156 #define sp1 (tp + 2*n + 2)
 157       /* ap1  maybe in {sp1, n + 1} */
 158       /* bp1  maybe in {sp1 + n + 1, n + 1} */
 159
 160       {
 161         mp_srcptr am1, bm1;
 162         mp_size_t anm, bnm;
 163         mp_ptr so;
 164
 165         bm1 = b0;
 166         bnm = bn;
 167         if (LIKELY (an > n))
 168           {
 169             am1 = xp;
 170             cy = mpn_add (xp, a0, n, a1, an - n);
 171             MPN_INCR_U (xp, n, cy);
 172             anm = n;
 173             so = xp + n;
 174             if (LIKELY (bn > n))
 175               {
 176                 bm1 = so;
 177                 cy = mpn_add (so, b0, n, b1, bn - n);
 178                 MPN_INCR_U (so, n, cy);
 179                 bnm = n;
 180                 so += n;
 181               }
 182           }
 183         else
 184           {
 185             so = xp;
 186             am1 = a0;
 187             anm = an;
 188           }
 189
 190         mpn_mulmod_bnm1 (rp, n, am1, anm, bm1, bnm, so);
 191       }
 192
 193       {
 194         int       k;
 195         mp_srcptr ap1, bp1;
 196         mp_size_t anp, bnp;
 197
 198         bp1 = b0;
 199         bnp = bn;
 200         if (LIKELY (an > n)) {
 201           ap1 = sp1;
 202           cy = mpn_sub (sp1, a0, n, a1, an - n);
 203           sp1[n] = 0;
 204           MPN_INCR_U (sp1, n + 1, cy);
 205           anp = n + ap1[n];
 206           if (LIKELY (bn > n)) {
 207             bp1 = sp1 + n + 1;
 208             cy = mpn_sub (sp1 + n + 1, b0, n, b1, bn - n);
 209             sp1[2*n+1] = 0;
 210             MPN_INCR_U (sp1 + n + 1, n + 1, cy);
 211             bnp = n + bp1[n];
 212           }
 213         } else {
 214           ap1 = a0;
 215           anp = an;
 216         }
 217
 218         if (BELOW_THRESHOLD (n, MUL_FFT_MODF_THRESHOLD))
 219           k=0;
 220         else
 221           {
 222             int mask;
 223             k = mpn_fft_best_k (n, 0);
 224             mask = (1<<k) - 1;
 225             while (n & mask) {k--; mask >>=1;};
 226           }
 227         if (k >= FFT_FIRST_K)
 228           xp[n] = mpn_mul_fft (xp, n, ap1, anp, bp1, bnp, k);
 229         else if (UNLIKELY (bp1 == b0))
 230           {
 231             ASSERT (anp + bnp <= 2*n+1);
 232             ASSERT (anp + bnp > n);
 233             ASSERT (anp >= bnp);
 234             mpn_mul (xp, ap1, anp, bp1, bnp);
 235             anp = anp + bnp - n;
 236             ASSERT (anp <= n || xp[2*n]==0);
 237             anp-= anp > n;
 238             cy = mpn_sub (xp, xp, n, xp + n, anp);
 239             xp[n] = 0;
 240             MPN_INCR_U (xp, n+1, cy);
 241           }
 242         else
 243           mpn_bc_mulmod_bnp1 (xp, ap1, bp1, n, xp);
 244       }
 245
 246       /* Here the CRT recomposition begins.
 247
 248          xm <- (xp + xm)/2 = (xp + xm)B^n/2 mod (B^n-1)
 249          Division by 2 is a bitwise rotation.
 250
 251          Assumes xp normalised mod (B^n+1).
 252
 253          The residue class [0] is represented by [B^n-1]; except when
 254          both input are ZERO.
 255       */
 256
 257 #if HAVE_NATIVE_mpn_rsh1add_n || HAVE_NATIVE_mpn_rsh1add_nc
 258 #if HAVE_NATIVE_mpn_rsh1add_nc
 259       cy = mpn_rsh1add_nc(rp, rp, xp, n, xp[n]); /* B^n = 1 */
 260       hi = cy << (GMP_NUMB_BITS - 1);
 261       cy = 0;
 262       /* next update of rp[n-1] will set cy = 1 only if rp[n-1]+=hi
 263          overflows, i.e. a further increment will not overflow again. */
 264 #else /* ! _nc */
 265       cy = xp[n] + mpn_rsh1add_n(rp, rp, xp, n); /* B^n = 1 */
 266       hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
 267       cy >>= 1;
 268       /* cy = 1 only if xp[n] = 1 i.e. {xp,n} = ZERO, this implies that
 269          the rsh1add was a simple rshift: the top bit is 0. cy=1 => hi=0. */
 270 #endif
 271 #if GMP_NAIL_BITS == 0
 272       add_ssaaaa(cy, rp[n-1], cy, rp[n-1], 0, hi);
 273 #else
 274       cy += (hi & rp[n-1]) >> (GMP_NUMB_BITS-1);
 275       rp[n-1] ^= hi;
 276 #endif
 277 #else /* ! HAVE_NATIVE_mpn_rsh1add_n */
 278 #if HAVE_NATIVE_mpn_add_nc
 279       cy = mpn_add_nc(rp, rp, xp, n, xp[n]);
 280 #else /* ! _nc */
 281       cy = xp[n] + mpn_add_n(rp, rp, xp, n); /* xp[n] == 1 implies {xp,n} == ZERO */
 282 #endif
 283       cy += (rp[0]&1);
 284       mpn_rshift(rp, rp, n, 1);
 285       ASSERT (cy <= 2);
 286       hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
 287       cy >>= 1;
 288       /* We can have cy != 0 only if hi = 0... */
 289       ASSERT ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0);
 290       rp[n-1] |= hi;
 291       /* ... rp[n-1] + cy can not overflow, the following INCR is correct. */
 292 #endif
 293       ASSERT (cy <= 1);
 294       /* Next increment can not overflow, read the previous comments about cy. */
 295       ASSERT ((cy == 0) || ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0));
 296       MPN_INCR_U(rp, n, cy);
 297
 298       /* Compute the highest half:
 299          ([(xp + xm)/2 mod (B^n-1)] - xp ) * B^n
 300        */
 301       if (UNLIKELY (an + bn < rn))
 302         {
 303           /* Note that in this case, the only way the result can equal
 304              zero mod B^{rn} - 1 is if one of the inputs is zero, and
 305              then the output of both the recursive calls and this CRT
 306              reconstruction is zero, not B^{rn} - 1. Which is good,
 307              since the latter representation doesn't fit in the output
 308              area.*/
 309           cy = mpn_sub_n (rp + n, rp, xp, an + bn - n);
 310
 311           /* FIXME: This subtraction of the high parts is not really
 312              necessary, we do it to get the carry out, and for sanity
 313              checking. */
 314           cy = xp[n] + mpn_sub_nc (xp + an + bn - n, rp + an + bn - n,
 315                                    xp + an + bn - n, rn - (an + bn), cy);
 316           ASSERT (an + bn == rn - 1 ||
 317                   mpn_zero_p (xp + an + bn - n + 1, rn - 1 - (an + bn)));
 318           cy = mpn_sub_1 (rp, rp, an + bn, cy);
 319           ASSERT (cy == (xp + an + bn - n)[0]);
 320         }
 321       else
 322         {
 323           cy = xp[n] + mpn_sub_n (rp + n, rp, xp, n);
 324           /* cy = 1 only if {xp,n+1} is not ZERO, i.e. {rp,n} is not ZERO.
 325              DECR will affect _at most_ the lowest n limbs. */
 326           MPN_DECR_U (rp, 2*n, cy);
 327         }
 328 #undef a0
 329 #undef a1
 330 #undef b0
 331 #undef b1
 332 #undef xp
 333 #undef sp1
 334     }
 335 }
 336
 337 mp_size_t
 338 mpn_mulmod_bnm1_next_size (mp_size_t n)
 339 {
 340   mp_size_t nh;
 341
 342   if (BELOW_THRESHOLD (n,     MULMOD_BNM1_THRESHOLD))
 343     return n;
 344   if (BELOW_THRESHOLD (n, 4 * (MULMOD_BNM1_THRESHOLD - 1) + 1))
 345     return (n + (2-1)) & (-2);
 346   if (BELOW_THRESHOLD (n, 8 * (MULMOD_BNM1_THRESHOLD - 1) + 1))
 347     return (n + (4-1)) & (-4);
 348
 349   nh = (n + 1) >> 1;
 350
 351   if (BELOW_THRESHOLD (nh, MUL_FFT_MODF_THRESHOLD))
 352     return (n + (8-1)) & (-8);
 353
 354   return 2 * mpn_fft_next_size (nh, mpn_fft_best_k (nh, 0));
 355 }