source/libs/gmp/gmp-src/mpn/generic/sec_powm.c

   1 /* mpn_sec_powm -- Compute R = U^E mod M.  Secure variant, side-channel silent
   2    under the assumption that the multiply instruction is side channel silent.
   3
   4    Contributed to the GNU project by Torbjörn Granlund.
   5
   6 Copyright 2007-2009, 2011-2014 Free Software Foundation, Inc.
   7
   8 This file is part of the GNU MP Library.
   9
  10 The GNU MP Library is free software; you can redistribute it and/or modify
  11 it under the terms of either:
  12
  13   * the GNU Lesser General Public License as published by the Free
  14     Software Foundation; either version 3 of the License, or (at your
  15     option) any later version.
  16
  17 or
  18
  19   * the GNU General Public License as published by the Free Software
  20     Foundation; either version 2 of the License, or (at your option) any
  21     later version.
  22
  23 or both in parallel, as here.
  24
  25 The GNU MP Library is distributed in the hope that it will be useful, but
  26 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  27 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  28 for more details.
  29
  30 You should have received copies of the GNU General Public License and the
  31 GNU Lesser General Public License along with the GNU MP Library.  If not,
  32 see https://www.gnu.org/licenses/.  */
  33
  34
  35 /*
  36   BASIC ALGORITHM, Compute U^E mod M, where M < B^n is odd.
  37
  38   1. T <- (B^n * U) mod M                Convert to REDC form
  39
  40   2. Compute table U^0, U^1, U^2... of E-dependent size
  41
  42   3. While there are more bits in E
  43        W <- power left-to-right base-k
  44
  45
  46   TODO:
  47
  48    * Make getbits a macro, thereby allowing it to update the index operand.
  49      That will simplify the code using getbits.  (Perhaps make getbits' sibling
  50      getbit then have similar form, for symmetry.)
  51
  52    * Choose window size without looping.  (Superoptimize or think(tm).)
  53
  54    * REDC_1_TO_REDC_2_THRESHOLD might actually represent the cutoff between
  55      redc_1 and redc_n.  On such systems, we will switch to redc_2 causing
  56      slowdown.
  57 */
  58
  59 #include "gmp.h"
  60 #include "gmp-impl.h"
  61 #include "longlong.h"
  62
  63 #undef MPN_REDC_1_SEC
  64 #define MPN_REDC_1_SEC(rp, up, mp, n, invm)                             \
  65   do {                                                                  \
  66     mp_limb_t cy;                                                       \
  67     cy = mpn_redc_1 (rp, up, mp, n, invm);                              \
  68     mpn_cnd_sub_n (cy, rp, rp, mp, n);                                  \
  69   } while (0)
  70
  71 #undef MPN_REDC_2_SEC
  72 #define MPN_REDC_2_SEC(rp, up, mp, n, mip)                              \
  73   do {                                                                  \
  74     mp_limb_t cy;                                                       \
  75     cy = mpn_redc_2 (rp, up, mp, n, mip);                               \
  76     mpn_cnd_sub_n (cy, rp, rp, mp, n);                                  \
  77   } while (0)
  78
  79 #if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
  80 #define WANT_REDC_2 1
  81 #endif
  82
  83 /* Define our own mpn squaring function.  We do this since we cannot use a
  84    native mpn_sqr_basecase over TUNE_SQR_TOOM2_MAX, or a non-native one over
  85    SQR_TOOM2_THRESHOLD.  This is so because of fixed size stack allocations
  86    made inside mpn_sqr_basecase.  */
  87
  88 #if HAVE_NATIVE_mpn_sqr_diagonal
  89 #define MPN_SQR_DIAGONAL(rp, up, n)                                     \
  90   mpn_sqr_diagonal (rp, up, n)
  91 #else
  92 #define MPN_SQR_DIAGONAL(rp, up, n)                                     \
  93   do {                                                                  \
  94     mp_size_t _i;                                                       \
  95     for (_i = 0; _i < (n); _i++)                                        \
  96       {                                                                 \
  97         mp_limb_t ul, lpl;                                              \
  98         ul = (up)[_i];                                                  \
  99         umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS);     \
 100         (rp)[2 * _i] = lpl >> GMP_NAIL_BITS;                            \
 101       }                                                                 \
 102   } while (0)
 103 #endif
 104
 105
 106 #if ! HAVE_NATIVE_mpn_sqr_basecase
 107 /* The limit of the generic code is SQR_TOOM2_THRESHOLD.  */
 108 #define SQR_BASECASE_LIM  SQR_TOOM2_THRESHOLD
 109 #endif
 110
 111 #if HAVE_NATIVE_mpn_sqr_basecase
 112 #ifdef TUNE_SQR_TOOM2_MAX
 113 /* We slightly abuse TUNE_SQR_TOOM2_MAX here.  If it is set for an assembly
 114    mpn_sqr_basecase, it comes from SQR_TOOM2_THRESHOLD_MAX in the assembly
 115    file.  An assembly mpn_sqr_basecase that does not define it should allow
 116    any size.  */
 117 #define SQR_BASECASE_LIM  SQR_TOOM2_THRESHOLD
 118 #endif
 119 #endif
 120
 121 #ifdef WANT_FAT_BINARY
 122 /* For fat builds, we use SQR_TOOM2_THRESHOLD which will expand to a read from
 123    __gmpn_cpuvec.  Perhaps any possible sqr_basecase.asm allow any size, and we
 124    limit the use unnecessarily.  We cannot tell, so play it safe.  FIXME.  */
 125 #define SQR_BASECASE_LIM  SQR_TOOM2_THRESHOLD
 126 #endif
 127
 128 #ifndef SQR_BASECASE_LIM
 129 /* If SQR_BASECASE_LIM is now not defined, use mpn_sqr_basecase for any operand
 130    size.  */
 131 #define mpn_local_sqr(rp,up,n,tp) mpn_sqr_basecase(rp,up,n)
 132 #else
 133 /* Else use mpn_sqr_basecase for its allowed sizes, else mpn_mul_basecase.  */
 134 #define mpn_local_sqr(rp,up,n,tp) \
 135   do {                                                                  \
 136     if (BELOW_THRESHOLD (n, SQR_BASECASE_LIM))                          \
 137       mpn_sqr_basecase (rp, up, n);                                     \
 138     else                                                                \
 139       mpn_mul_basecase(rp, up, n, up, n);                               \
 140   } while (0)
 141 #endif
 142
 143 #define getbit(p,bi) \
 144   ((p[(bi - 1) / GMP_NUMB_BITS] >> (bi - 1) % GMP_NUMB_BITS) & 1)
 145
 146 /* FIXME: Maybe some things would get simpler if all callers ensure
 147    that bi >= nbits. As far as I understand, with the current code bi
 148    < nbits can happen only for the final iteration. */
 149 static inline mp_limb_t
 150 getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits)
 151 {
 152   int nbits_in_r;
 153   mp_limb_t r;
 154   mp_size_t i;
 155
 156   if (bi < nbits)
 157     {
 158       return p[0] & (((mp_limb_t) 1 << bi) - 1);
 159     }
 160   else
 161     {
 162       bi -= nbits;                      /* bit index of low bit to extract */
 163       i = bi / GMP_NUMB_BITS;           /* word index of low bit to extract */
 164       bi %= GMP_NUMB_BITS;              /* bit index in low word */
 165       r = p[i] >> bi;                   /* extract (low) bits */
 166       nbits_in_r = GMP_NUMB_BITS - bi;  /* number of bits now in r */
 167       if (nbits_in_r < nbits)           /* did we get enough bits? */
 168         r += p[i + 1] << nbits_in_r;    /* prepend bits from higher word */
 169       return r & (((mp_limb_t ) 1 << nbits) - 1);
 170     }
 171 }
 172
 173 #ifndef POWM_SEC_TABLE
 174 #if GMP_NUMB_BITS < 50
 175 #define POWM_SEC_TABLE  2,33,96,780,2741
 176 #else
 177 #define POWM_SEC_TABLE  2,130,524,2578
 178 #endif
 179 #endif
 180
 181 #if TUNE_PROGRAM_BUILD
 182 extern int win_size (mp_bitcnt_t);
 183 #else
 184 static inline int
 185 win_size (mp_bitcnt_t enb)
 186 {
 187   int k;
 188   /* Find k, such that x[k-1] < enb <= x[k].
 189
 190      We require that x[k] >= k, then it follows that enb > x[k-1] >=
 191      k-1, which implies k <= enb.
 192   */
 193   static const mp_bitcnt_t x[] = {0,POWM_SEC_TABLE,~(mp_bitcnt_t)0};
 194   for (k = 1; enb > x[k]; k++)
 195     ;
 196   ASSERT (k <= enb);
 197   return k;
 198 }
 199 #endif
 200
 201 /* Convert U to REDC form, U_r = B^n * U mod M.
 202    Uses scratch space at tp of size 2un + n + 1.  */
 203 static void
 204 redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n, mp_ptr tp)
 205 {
 206   MPN_ZERO (tp, n);
 207   MPN_COPY (tp + n, up, un);
 208
 209   mpn_sec_div_r (tp, un + n, mp, n, tp + un + n);
 210   MPN_COPY (rp, tp, n);
 211 }
 212
 213 /* {rp, n} <-- {bp, bn} ^ {ep, en} mod {mp, n},
 214    where en = ceil (enb / GMP_NUMB_BITS)
 215    Requires that {mp, n} is odd (and hence also mp[0] odd).
 216    Uses scratch space at tp as defined by mpn_sec_powm_itch.  */
 217 void
 218 mpn_sec_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
 219               mp_srcptr ep, mp_bitcnt_t enb,
 220               mp_srcptr mp, mp_size_t n, mp_ptr tp)
 221 {
 222   mp_limb_t ip[2], *mip;
 223   int windowsize, this_windowsize;
 224   mp_limb_t expbits;
 225   mp_ptr pp, this_pp;
 226   long i;
 227   int cnd;
 228
 229   ASSERT (enb > 0);
 230   ASSERT (n > 0);
 231   /* The code works for bn = 0, but the defined scratch space is 2 limbs
 232      greater than we supply, when converting 1 to redc form .  */
 233   ASSERT (bn > 0);
 234   ASSERT ((mp[0] & 1) != 0);
 235
 236   windowsize = win_size (enb);
 237
 238 #if WANT_REDC_2
 239   if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
 240     {
 241       mip = ip;
 242       binvert_limb (mip[0], mp[0]);
 243       mip[0] = -mip[0];
 244     }
 245   else
 246     {
 247       mip = ip;
 248       mpn_binvert (mip, mp, 2, tp);
 249       mip[0] = -mip[0]; mip[1] = ~mip[1];
 250     }
 251 #else
 252   mip = ip;
 253   binvert_limb (mip[0], mp[0]);
 254   mip[0] = -mip[0];
 255 #endif
 256
 257   pp = tp;
 258   tp += (n << windowsize);      /* put tp after power table */
 259
 260   /* Compute pp[0] table entry */
 261   /* scratch: |   n   | 1 |   n+2    |  */
 262   /*          | pp[0] | 1 | redcify  |  */
 263   this_pp = pp;
 264   this_pp[n] = 1;
 265   redcify (this_pp, this_pp + n, 1, mp, n, this_pp + n + 1);
 266   this_pp += n;
 267
 268   /* Compute pp[1] table entry.  To avoid excessive scratch usage in the
 269      degenerate situation where B >> M, we let redcify use scratch space which
 270      will later be used by the pp table (element 2 and up).  */
 271   /* scratch: |   n   |   n   |  bn + n + 1  |  */
 272   /*          | pp[0] | pp[1] |   redcify    |  */
 273   redcify (this_pp, bp, bn, mp, n, this_pp + n);
 274
 275   /* Precompute powers of b and put them in the temporary area at pp.  */
 276   /* scratch: |   n   |   n   | ...  |                    |   2n      |  */
 277   /*          | pp[0] | pp[1] | ...  | pp[2^windowsize-1] |  product  |  */
 278   for (i = (1 << windowsize) - 2; i > 0; i--)
 279     {
 280       mpn_mul_basecase (tp, this_pp, n, pp + n, n);
 281       this_pp += n;
 282 #if WANT_REDC_2
 283       if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
 284         MPN_REDC_1_SEC (this_pp, tp, mp, n, mip[0]);
 285       else
 286         MPN_REDC_2_SEC (this_pp, tp, mp, n, mip);
 287 #else
 288       MPN_REDC_1_SEC (this_pp, tp, mp, n, mip[0]);
 289 #endif
 290     }
 291
 292   expbits = getbits (ep, enb, windowsize);
 293   ASSERT_ALWAYS (enb >= windowsize);
 294   enb -= windowsize;
 295
 296   mpn_sec_tabselect (rp, pp, n, 1 << windowsize, expbits);
 297
 298   /* Main exponentiation loop.  */
 299   /* scratch: |   n   |   n   | ...  |                    |     3n-4n     |  */
 300   /*          | pp[0] | pp[1] | ...  | pp[2^windowsize-1] |  loop scratch |  */
 301
 302 #define INNERLOOP                                                       \
 303   while (enb != 0)                                                      \
 304     {                                                                   \
 305       expbits = getbits (ep, enb, windowsize);                          \
 306       this_windowsize = windowsize;                                     \
 307       if (enb < windowsize)                                             \
 308         {                                                               \
 309           this_windowsize -= windowsize - enb;                          \
 310           enb = 0;                                                      \
 311         }                                                               \
 312       else                                                              \
 313         enb -= windowsize;                                              \
 314                                                                         \
 315       do                                                                \
 316         {                                                               \
 317           mpn_local_sqr (tp, rp, n, tp + 2 * n);                        \
 318           MPN_REDUCE (rp, tp, mp, n, mip);                              \
 319           this_windowsize--;                                            \
 320         }                                                               \
 321       while (this_windowsize != 0);                                     \
 322                                                                         \
 323       mpn_sec_tabselect (tp + 2*n, pp, n, 1 << windowsize, expbits);    \
 324       mpn_mul_basecase (tp, rp, n, tp + 2*n, n);                        \
 325                                                                         \
 326       MPN_REDUCE (rp, tp, mp, n, mip);                                  \
 327     }
 328
 329 #if WANT_REDC_2
 330   if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
 331     {
 332 #undef MPN_MUL_N
 333 #undef MPN_SQR
 334 #undef MPN_REDUCE
 335 #define MPN_MUL_N(r,a,b,n)              mpn_mul_basecase (r,a,n,b,n)
 336 #define MPN_SQR(r,a,n)                  mpn_sqr_basecase (r,a,n)
 337 #define MPN_REDUCE(rp,tp,mp,n,mip)      MPN_REDC_1_SEC (rp, tp, mp, n, mip[0])
 338       INNERLOOP;
 339     }
 340   else
 341     {
 342 #undef MPN_MUL_N
 343 #undef MPN_SQR
 344 #undef MPN_REDUCE
 345 #define MPN_MUL_N(r,a,b,n)              mpn_mul_basecase (r,a,n,b,n)
 346 #define MPN_SQR(r,a,n)                  mpn_sqr_basecase (r,a,n)
 347 #define MPN_REDUCE(rp,tp,mp,n,mip)      MPN_REDC_2_SEC (rp, tp, mp, n, mip)
 348       INNERLOOP;
 349     }
 350 #else
 351 #undef MPN_MUL_N
 352 #undef MPN_SQR
 353 #undef MPN_REDUCE
 354 #define MPN_MUL_N(r,a,b,n)              mpn_mul_basecase (r,a,n,b,n)
 355 #define MPN_SQR(r,a,n)                  mpn_sqr_basecase (r,a,n)
 356 #define MPN_REDUCE(rp,tp,mp,n,mip)      MPN_REDC_1_SEC (rp, tp, mp, n, mip[0])
 357   INNERLOOP;
 358 #endif
 359
 360   MPN_COPY (tp, rp, n);
 361   MPN_ZERO (tp + n, n);
 362
 363 #if WANT_REDC_2
 364   if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
 365     MPN_REDC_1_SEC (rp, tp, mp, n, mip[0]);
 366   else
 367     MPN_REDC_2_SEC (rp, tp, mp, n, mip);
 368 #else
 369   MPN_REDC_1_SEC (rp, tp, mp, n, mip[0]);
 370 #endif
 371   cnd = mpn_sub_n (tp, rp, mp, n);      /* we need just retval */
 372   mpn_cnd_sub_n (!cnd, rp, rp, mp, n);
 373 }
 374
 375 mp_size_t
 376 mpn_sec_powm_itch (mp_size_t bn, mp_bitcnt_t enb, mp_size_t n)
 377 {
 378   int windowsize;
 379   mp_size_t redcify_itch, itch;
 380
 381   /* The top scratch usage will either be when reducing B in the 2nd redcify
 382      call, or more typically n*2^windowsize + 3n or 4n, in the main loop.  (It
 383      is 3n or 4n depending on if we use mpn_local_sqr or a native
 384      mpn_sqr_basecase.  We assume 4n always for now.) */
 385
 386   windowsize = win_size (enb);
 387
 388   /* The 2n term is due to pp[0] and pp[1] at the time of the 2nd redcify call,
 389      the (bn + n) term is due to redcify's own usage, and the rest is due to
 390      mpn_sec_div_r's usage when called from redcify.  */
 391   redcify_itch = (2 * n) + (bn + n) + ((bn + n) + 2 * n + 2);
 392
 393   /* The n * 2^windowsize term is due to the power table, the 4n term is due to
 394      scratch needs of squaring/multiplication in the exponentiation loop.  */
 395   itch = (n << windowsize) + (4 * n);
 396
 397   return MAX (itch, redcify_itch);
 398 }