1 /* mpn_powm -- Compute R = U^E mod M.
3 Contributed to the GNU project by Torbjorn Granlund.
5 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
6 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7 GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
9 Copyright 2007-2012 Free Software Foundation, Inc.
11 This file is part of the GNU MP Library.
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of either:
16 * the GNU Lesser General Public License as published by the Free
17 Software Foundation; either version 3 of the License, or (at your
18 option) any later version.
22 * the GNU General Public License as published by the Free Software
23 Foundation; either version 2 of the License, or (at your option) any
26 or both in parallel, as here.
28 The GNU MP Library is distributed in the hope that it will be useful, but
29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
33 You should have received copies of the GNU General Public License and the
34 GNU Lesser General Public License along with the GNU MP Library. If not,
35 see https://www.gnu.org/licenses/. */
39 BASIC ALGORITHM, Compute U^E mod M, where M < B^n is odd.
43 2. T <- (B^n * U) mod M Convert to REDC form
45 3. Compute table U^1, U^3, U^5... of E-dependent size
47 4. While there are more bits in E
48 W <- power left-to-right base-k
53 * Make getbits a macro, thereby allowing it to update the index operand.
54 That will simplify the code using getbits. (Perhaps make getbits' sibling
55 getbit then have similar form, for symmetry.)
57 * Write an itch function. Or perhaps get rid of tp parameter since the huge
58 pp area is allocated locally anyway?
60 * Choose window size without looping. (Superoptimize or think(tm).)
62 * Handle small bases with initial, reduction-free exponentiation.
64 * Call new division functions, not mpn_tdiv_qr.
66 * Consider special code for one-limb M.
68 * How should we handle the redc1/redc2/redc_n choice?
69 - redc1: T(binvert_1limb) + e * (n) * (T(mullo-1x1) + n*T(addmul_1))
70 - redc2: T(binvert_2limbs) + e * (n/2) * (T(mullo-2x2) + n*T(addmul_2))
71 - redc_n: T(binvert_nlimbs) + e * (T(mullo-nxn) + T(M(n)))
72 This disregards the addmul_N constant term, but we could think of
73 that as part of the respective mullo.
75 * When U (the base) is small, we should start the exponentiation with plain
76 operations, then convert that partial result to REDC form.
78 * When U is just one limb, should it be handled without the k-ary tricks?
79 We could keep a factor of B^n in W, but use U' = BU as base. After
80 multiplying by this (pseudo two-limb) number, we need to multiply by 1/B
89 #define MPN_REDC_1(rp, up, mp, n, invm) \
92 cy = mpn_redc_1 (rp, up, mp, n, invm); \
94 mpn_sub_n (rp, rp, mp, n); \
98 #define MPN_REDC_2(rp, up, mp, n, mip) \
101 cy = mpn_redc_2 (rp, up, mp, n, mip); \
103 mpn_sub_n (rp, rp, mp, n); \
106 #if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
107 #define WANT_REDC_2 1
110 #define getbit(p,bi) \
111 ((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1)
113 static inline mp_limb_t
114 getbits (const mp_limb_t
*p
, mp_bitcnt_t bi
, int nbits
)
122 return p
[0] & (((mp_limb_t
) 1 << bi
) - 1);
126 bi
-= nbits
; /* bit index of low bit to extract */
127 i
= bi
/ GMP_NUMB_BITS
; /* word index of low bit to extract */
128 bi
%= GMP_NUMB_BITS
; /* bit index in low word */
129 r
= p
[i
] >> bi
; /* extract (low) bits */
130 nbits_in_r
= GMP_NUMB_BITS
- bi
; /* number of bits now in r */
131 if (nbits_in_r
< nbits
) /* did we get enough bits? */
132 r
+= p
[i
+ 1] << nbits_in_r
; /* prepend bits from higher word */
133 return r
& (((mp_limb_t
) 1 << nbits
) - 1);
138 win_size (mp_bitcnt_t eb
)
141 static mp_bitcnt_t x
[] = {0,7,25,81,241,673,1793,4609,11521,28161,~(mp_bitcnt_t
)0};
142 for (k
= 1; eb
> x
[k
]; k
++)
147 /* Convert U to REDC form, U_r = B^n * U mod M */
149 redcify (mp_ptr rp
, mp_srcptr up
, mp_size_t un
, mp_srcptr mp
, mp_size_t n
)
155 TMP_ALLOC_LIMBS_2 (tp
, un
+ n
, qp
, un
+ 1);
158 MPN_COPY (tp
+ n
, up
, un
);
159 mpn_tdiv_qr (qp
, rp
, 0L, tp
, un
+ n
, mp
, n
);
163 /* rp[n-1..0] = bp[bn-1..0] ^ ep[en-1..0] mod mp[n-1..0]
164 Requires that mp[n-1..0] is odd.
165 Requires that ep[en-1..0] is > 1.
166 Uses scratch space at tp of MAX(mpn_binvert_itch(n),2n) limbs. */
168 mpn_powm (mp_ptr rp
, mp_srcptr bp
, mp_size_t bn
,
169 mp_srcptr ep
, mp_size_t en
,
170 mp_srcptr mp
, mp_size_t n
, mp_ptr tp
)
172 mp_limb_t ip
[2], *mip
;
175 int windowsize
, this_windowsize
;
181 ASSERT (en
> 1 || (en
== 1 && ep
[0] > 1));
182 ASSERT (n
>= 1 && ((mp
[0] & 1) != 0));
186 MPN_SIZEINBASE_2EXP(ebi
, ep
, en
, 1);
191 /* Do the first few exponent bits without mod reductions,
192 until the result is greater than the mod argument. */
195 mpn_sqr (tp
, this_pp
, tn
);
196 tn
= tn
* 2 - 1, tn
+= tp
[tn
] != 0;
197 if (getbit (ep
, ebi
) != 0)
198 mpn_mul (..., tp
, tn
, bp
, bn
);
204 windowsize
= win_size (ebi
);
207 if (BELOW_THRESHOLD (n
, REDC_1_TO_REDC_2_THRESHOLD
))
210 binvert_limb (mip
[0], mp
[0]);
213 else if (BELOW_THRESHOLD (n
, REDC_2_TO_REDC_N_THRESHOLD
))
216 mpn_binvert (mip
, mp
, 2, tp
);
217 mip
[0] = -mip
[0]; mip
[1] = ~mip
[1];
220 if (BELOW_THRESHOLD (n
, REDC_1_TO_REDC_N_THRESHOLD
))
223 binvert_limb (mip
[0], mp
[0]);
229 mip
= TMP_ALLOC_LIMBS (n
);
230 mpn_binvert (mip
, mp
, n
, tp
);
233 pp
= TMP_ALLOC_LIMBS (n
<< (windowsize
- 1));
236 redcify (this_pp
, bp
, bn
, mp
, n
);
238 /* Store b^2 at rp. */
239 mpn_sqr (tp
, this_pp
, n
);
241 if (BELOW_THRESHOLD (n
, REDC_1_TO_REDC_2_THRESHOLD
))
242 MPN_REDC_1 (rp
, tp
, mp
, n
, mip
[0]);
243 else if (BELOW_THRESHOLD (n
, REDC_2_TO_REDC_N_THRESHOLD
))
244 MPN_REDC_2 (rp
, tp
, mp
, n
, mip
);
246 if (BELOW_THRESHOLD (n
, REDC_1_TO_REDC_N_THRESHOLD
))
247 MPN_REDC_1 (rp
, tp
, mp
, n
, mip
[0]);
250 mpn_redc_n (rp
, tp
, mp
, n
, mip
);
252 /* Precompute odd powers of b and put them in the temporary area at pp. */
253 for (i
= (1 << (windowsize
- 1)) - 1; i
> 0; i
--)
255 mpn_mul_n (tp
, this_pp
, rp
, n
);
258 if (BELOW_THRESHOLD (n
, REDC_1_TO_REDC_2_THRESHOLD
))
259 MPN_REDC_1 (this_pp
, tp
, mp
, n
, mip
[0]);
260 else if (BELOW_THRESHOLD (n
, REDC_2_TO_REDC_N_THRESHOLD
))
261 MPN_REDC_2 (this_pp
, tp
, mp
, n
, mip
);
263 if (BELOW_THRESHOLD (n
, REDC_1_TO_REDC_N_THRESHOLD
))
264 MPN_REDC_1 (this_pp
, tp
, mp
, n
, mip
[0]);
267 mpn_redc_n (this_pp
, tp
, mp
, n
, mip
);
270 expbits
= getbits (ep
, ebi
, windowsize
);
271 if (ebi
< windowsize
)
276 count_trailing_zeros (cnt
, expbits
);
280 MPN_COPY (rp
, pp
+ n
* (expbits
>> 1), n
);
285 while (getbit (ep, ebi) == 0) \
287 MPN_SQR (tp, rp, n); \
288 MPN_REDUCE (rp, tp, mp, n, mip); \
294 /* The next bit of the exponent is 1. Now extract the largest \
295 block of bits <= windowsize, and such that the least \
296 significant bit is 1. */ \
298 expbits = getbits (ep, ebi, windowsize); \
299 this_windowsize = windowsize; \
300 if (ebi < windowsize) \
302 this_windowsize -= windowsize - ebi; \
308 count_trailing_zeros (cnt, expbits); \
309 this_windowsize -= cnt; \
315 MPN_SQR (tp, rp, n); \
316 MPN_REDUCE (rp, tp, mp, n, mip); \
319 while (this_windowsize != 0); \
321 MPN_MUL_N (tp, rp, pp + n * (expbits >> 1), n); \
322 MPN_REDUCE (rp, tp, mp, n, mip); \
327 if (REDC_1_TO_REDC_2_THRESHOLD
< MUL_TOOM22_THRESHOLD
)
329 if (BELOW_THRESHOLD (n
, REDC_1_TO_REDC_2_THRESHOLD
))
331 if (REDC_1_TO_REDC_2_THRESHOLD
< SQR_BASECASE_THRESHOLD
332 || BELOW_THRESHOLD (n
, SQR_BASECASE_THRESHOLD
))
337 #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
338 #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
339 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
347 #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
348 #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
349 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
353 else if (BELOW_THRESHOLD (n
, MUL_TOOM22_THRESHOLD
))
355 if (MUL_TOOM22_THRESHOLD
< SQR_BASECASE_THRESHOLD
356 || BELOW_THRESHOLD (n
, SQR_BASECASE_THRESHOLD
))
361 #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
362 #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
363 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip)
371 #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
372 #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
373 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip)
377 else if (BELOW_THRESHOLD (n
, REDC_2_TO_REDC_N_THRESHOLD
))
382 #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
383 #define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
384 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip)
392 #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
393 #define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
394 #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
400 if (BELOW_THRESHOLD (n
, MUL_TOOM22_THRESHOLD
))
402 if (MUL_TOOM22_THRESHOLD
< SQR_BASECASE_THRESHOLD
403 || BELOW_THRESHOLD (n
, SQR_BASECASE_THRESHOLD
))
408 #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
409 #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
410 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
418 #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
419 #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
420 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
424 else if (BELOW_THRESHOLD (n
, REDC_1_TO_REDC_2_THRESHOLD
))
429 #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
430 #define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
431 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
434 else if (BELOW_THRESHOLD (n
, REDC_2_TO_REDC_N_THRESHOLD
))
439 #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
440 #define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
441 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip)
449 #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
450 #define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
451 #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
456 #else /* WANT_REDC_2 */
458 if (REDC_1_TO_REDC_N_THRESHOLD
< MUL_TOOM22_THRESHOLD
)
460 if (BELOW_THRESHOLD (n
, REDC_1_TO_REDC_N_THRESHOLD
))
462 if (REDC_1_TO_REDC_N_THRESHOLD
< SQR_BASECASE_THRESHOLD
463 || BELOW_THRESHOLD (n
, SQR_BASECASE_THRESHOLD
))
468 #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
469 #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
470 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
478 #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
479 #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
480 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
484 else if (BELOW_THRESHOLD (n
, MUL_TOOM22_THRESHOLD
))
486 if (MUL_TOOM22_THRESHOLD
< SQR_BASECASE_THRESHOLD
487 || BELOW_THRESHOLD (n
, SQR_BASECASE_THRESHOLD
))
492 #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
493 #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
494 #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
502 #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
503 #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
504 #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
513 #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
514 #define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
515 #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
521 if (BELOW_THRESHOLD (n
, MUL_TOOM22_THRESHOLD
))
523 if (MUL_TOOM22_THRESHOLD
< SQR_BASECASE_THRESHOLD
524 || BELOW_THRESHOLD (n
, SQR_BASECASE_THRESHOLD
))
529 #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
530 #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
531 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
539 #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
540 #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
541 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
545 else if (BELOW_THRESHOLD (n
, REDC_1_TO_REDC_N_THRESHOLD
))
550 #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
551 #define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
552 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
560 #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
561 #define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
562 #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
566 #endif /* WANT_REDC_2 */
570 MPN_COPY (tp
, rp
, n
);
571 MPN_ZERO (tp
+ n
, n
);
574 if (BELOW_THRESHOLD (n
, REDC_1_TO_REDC_2_THRESHOLD
))
575 MPN_REDC_1 (rp
, tp
, mp
, n
, mip
[0]);
576 else if (BELOW_THRESHOLD (n
, REDC_2_TO_REDC_N_THRESHOLD
))
577 MPN_REDC_2 (rp
, tp
, mp
, n
, mip
);
579 if (BELOW_THRESHOLD (n
, REDC_1_TO_REDC_N_THRESHOLD
))
580 MPN_REDC_1 (rp
, tp
, mp
, n
, mip
[0]);
583 mpn_redc_n (rp
, tp
, mp
, n
, mip
);
585 if (mpn_cmp (rp
, mp
, n
) >= 0)
586 mpn_sub_n (rp
, rp
, mp
, n
);