beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpn / generic / powm.c
blob0cd0da46ea935b2a6253dac3c8bb6ba611db9dbd
1 /* mpn_powm -- Compute R = U^E mod M.
3 Contributed to the GNU project by Torbjorn Granlund.
5 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
6 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
7 GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
9 Copyright 2007-2012 Free Software Foundation, Inc.
11 This file is part of the GNU MP Library.
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of either:
16 * the GNU Lesser General Public License as published by the Free
17 Software Foundation; either version 3 of the License, or (at your
18 option) any later version.
22 * the GNU General Public License as published by the Free Software
23 Foundation; either version 2 of the License, or (at your option) any
24 later version.
26 or both in parallel, as here.
28 The GNU MP Library is distributed in the hope that it will be useful, but
29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
31 for more details.
33 You should have received copies of the GNU General Public License and the
34 GNU Lesser General Public License along with the GNU MP Library. If not,
35 see https://www.gnu.org/licenses/. */
39 BASIC ALGORITHM, Compute U^E mod M, where M < B^n is odd.
41 1. W <- U
43 2. T <- (B^n * U) mod M Convert to REDC form
45 3. Compute table U^1, U^3, U^5... of E-dependent size
47 4. While there are more bits in E
48 W <- power left-to-right base-k
51 TODO:
53 * Make getbits a macro, thereby allowing it to update the index operand.
54 That will simplify the code using getbits. (Perhaps make getbits' sibling
55 getbit then have similar form, for symmetry.)
57 * Write an itch function. Or perhaps get rid of tp parameter since the huge
58 pp area is allocated locally anyway?
60 * Choose window size without looping. (Superoptimize or think(tm).)
62 * Handle small bases with initial, reduction-free exponentiation.
64 * Call new division functions, not mpn_tdiv_qr.
66 * Consider special code for one-limb M.
68 * How should we handle the redc1/redc2/redc_n choice?
69 - redc1: T(binvert_1limb) + e * (n) * (T(mullo-1x1) + n*T(addmul_1))
70 - redc2: T(binvert_2limbs) + e * (n/2) * (T(mullo-2x2) + n*T(addmul_2))
71 - redc_n: T(binvert_nlimbs) + e * (T(mullo-nxn) + T(M(n)))
72 This disregards the addmul_N constant term, but we could think of
73 that as part of the respective mullo.
75 * When U (the base) is small, we should start the exponentiation with plain
76 operations, then convert that partial result to REDC form.
78 * When U is just one limb, should it be handled without the k-ary tricks?
79 We could keep a factor of B^n in W, but use U' = BU as base. After
80 multiplying by this (pseudo two-limb) number, we need to multiply by 1/B
81 mod M.
84 #include "gmp.h"
85 #include "gmp-impl.h"
86 #include "longlong.h"
88 #undef MPN_REDC_1
89 #define MPN_REDC_1(rp, up, mp, n, invm) \
90 do { \
91 mp_limb_t cy; \
92 cy = mpn_redc_1 (rp, up, mp, n, invm); \
93 if (cy != 0) \
94 mpn_sub_n (rp, rp, mp, n); \
95 } while (0)
97 #undef MPN_REDC_2
98 #define MPN_REDC_2(rp, up, mp, n, mip) \
99 do { \
100 mp_limb_t cy; \
101 cy = mpn_redc_2 (rp, up, mp, n, mip); \
102 if (cy != 0) \
103 mpn_sub_n (rp, rp, mp, n); \
104 } while (0)
106 #if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
107 #define WANT_REDC_2 1
108 #endif
110 #define getbit(p,bi) \
111 ((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1)
113 static inline mp_limb_t
114 getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits)
116 int nbits_in_r;
117 mp_limb_t r;
118 mp_size_t i;
120 if (bi < nbits)
122 return p[0] & (((mp_limb_t) 1 << bi) - 1);
124 else
126 bi -= nbits; /* bit index of low bit to extract */
127 i = bi / GMP_NUMB_BITS; /* word index of low bit to extract */
128 bi %= GMP_NUMB_BITS; /* bit index in low word */
129 r = p[i] >> bi; /* extract (low) bits */
130 nbits_in_r = GMP_NUMB_BITS - bi; /* number of bits now in r */
131 if (nbits_in_r < nbits) /* did we get enough bits? */
132 r += p[i + 1] << nbits_in_r; /* prepend bits from higher word */
133 return r & (((mp_limb_t ) 1 << nbits) - 1);
137 static inline int
138 win_size (mp_bitcnt_t eb)
140 int k;
141 static mp_bitcnt_t x[] = {0,7,25,81,241,673,1793,4609,11521,28161,~(mp_bitcnt_t)0};
142 for (k = 1; eb > x[k]; k++)
144 return k;
147 /* Convert U to REDC form, U_r = B^n * U mod M */
148 static void
149 redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n)
151 mp_ptr tp, qp;
152 TMP_DECL;
153 TMP_MARK;
155 TMP_ALLOC_LIMBS_2 (tp, un + n, qp, un + 1);
157 MPN_ZERO (tp, n);
158 MPN_COPY (tp + n, up, un);
159 mpn_tdiv_qr (qp, rp, 0L, tp, un + n, mp, n);
160 TMP_FREE;
163 /* rp[n-1..0] = bp[bn-1..0] ^ ep[en-1..0] mod mp[n-1..0]
164 Requires that mp[n-1..0] is odd.
165 Requires that ep[en-1..0] is > 1.
166 Uses scratch space at tp of MAX(mpn_binvert_itch(n),2n) limbs. */
167 void
168 mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
169 mp_srcptr ep, mp_size_t en,
170 mp_srcptr mp, mp_size_t n, mp_ptr tp)
172 mp_limb_t ip[2], *mip;
173 int cnt;
174 mp_bitcnt_t ebi;
175 int windowsize, this_windowsize;
176 mp_limb_t expbits;
177 mp_ptr pp, this_pp;
178 long i;
179 TMP_DECL;
181 ASSERT (en > 1 || (en == 1 && ep[0] > 1));
182 ASSERT (n >= 1 && ((mp[0] & 1) != 0));
184 TMP_MARK;
186 MPN_SIZEINBASE_2EXP(ebi, ep, en, 1);
188 #if 0
189 if (bn < n)
191 /* Do the first few exponent bits without mod reductions,
192 until the result is greater than the mod argument. */
193 for (;;)
195 mpn_sqr (tp, this_pp, tn);
196 tn = tn * 2 - 1, tn += tp[tn] != 0;
197 if (getbit (ep, ebi) != 0)
198 mpn_mul (..., tp, tn, bp, bn);
199 ebi--;
202 #endif
204 windowsize = win_size (ebi);
206 #if WANT_REDC_2
207 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
209 mip = ip;
210 binvert_limb (mip[0], mp[0]);
211 mip[0] = -mip[0];
213 else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
215 mip = ip;
216 mpn_binvert (mip, mp, 2, tp);
217 mip[0] = -mip[0]; mip[1] = ~mip[1];
219 #else
220 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
222 mip = ip;
223 binvert_limb (mip[0], mp[0]);
224 mip[0] = -mip[0];
226 #endif
227 else
229 mip = TMP_ALLOC_LIMBS (n);
230 mpn_binvert (mip, mp, n, tp);
233 pp = TMP_ALLOC_LIMBS (n << (windowsize - 1));
235 this_pp = pp;
236 redcify (this_pp, bp, bn, mp, n);
238 /* Store b^2 at rp. */
239 mpn_sqr (tp, this_pp, n);
240 #if WANT_REDC_2
241 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
242 MPN_REDC_1 (rp, tp, mp, n, mip[0]);
243 else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
244 MPN_REDC_2 (rp, tp, mp, n, mip);
245 #else
246 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
247 MPN_REDC_1 (rp, tp, mp, n, mip[0]);
248 #endif
249 else
250 mpn_redc_n (rp, tp, mp, n, mip);
252 /* Precompute odd powers of b and put them in the temporary area at pp. */
253 for (i = (1 << (windowsize - 1)) - 1; i > 0; i--)
255 mpn_mul_n (tp, this_pp, rp, n);
256 this_pp += n;
257 #if WANT_REDC_2
258 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
259 MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
260 else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
261 MPN_REDC_2 (this_pp, tp, mp, n, mip);
262 #else
263 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
264 MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
265 #endif
266 else
267 mpn_redc_n (this_pp, tp, mp, n, mip);
270 expbits = getbits (ep, ebi, windowsize);
271 if (ebi < windowsize)
272 ebi = 0;
273 else
274 ebi -= windowsize;
276 count_trailing_zeros (cnt, expbits);
277 ebi += cnt;
278 expbits >>= cnt;
280 MPN_COPY (rp, pp + n * (expbits >> 1), n);
282 #define INNERLOOP \
283 while (ebi != 0) \
285 while (getbit (ep, ebi) == 0) \
287 MPN_SQR (tp, rp, n); \
288 MPN_REDUCE (rp, tp, mp, n, mip); \
289 ebi--; \
290 if (ebi == 0) \
291 goto done; \
294 /* The next bit of the exponent is 1. Now extract the largest \
295 block of bits <= windowsize, and such that the least \
296 significant bit is 1. */ \
298 expbits = getbits (ep, ebi, windowsize); \
299 this_windowsize = windowsize; \
300 if (ebi < windowsize) \
302 this_windowsize -= windowsize - ebi; \
303 ebi = 0; \
305 else \
306 ebi -= windowsize; \
308 count_trailing_zeros (cnt, expbits); \
309 this_windowsize -= cnt; \
310 ebi += cnt; \
311 expbits >>= cnt; \
313 do \
315 MPN_SQR (tp, rp, n); \
316 MPN_REDUCE (rp, tp, mp, n, mip); \
317 this_windowsize--; \
319 while (this_windowsize != 0); \
321 MPN_MUL_N (tp, rp, pp + n * (expbits >> 1), n); \
322 MPN_REDUCE (rp, tp, mp, n, mip); \
326 #if WANT_REDC_2
327 if (REDC_1_TO_REDC_2_THRESHOLD < MUL_TOOM22_THRESHOLD)
329 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
331 if (REDC_1_TO_REDC_2_THRESHOLD < SQR_BASECASE_THRESHOLD
332 || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
334 #undef MPN_MUL_N
335 #undef MPN_SQR
336 #undef MPN_REDUCE
337 #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
338 #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
339 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
340 INNERLOOP;
342 else
344 #undef MPN_MUL_N
345 #undef MPN_SQR
346 #undef MPN_REDUCE
347 #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
348 #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
349 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
350 INNERLOOP;
353 else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
355 if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
356 || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
358 #undef MPN_MUL_N
359 #undef MPN_SQR
360 #undef MPN_REDUCE
361 #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
362 #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
363 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip)
364 INNERLOOP;
366 else
368 #undef MPN_MUL_N
369 #undef MPN_SQR
370 #undef MPN_REDUCE
371 #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
372 #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
373 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip)
374 INNERLOOP;
377 else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
379 #undef MPN_MUL_N
380 #undef MPN_SQR
381 #undef MPN_REDUCE
382 #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
383 #define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
384 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip)
385 INNERLOOP;
387 else
389 #undef MPN_MUL_N
390 #undef MPN_SQR
391 #undef MPN_REDUCE
392 #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
393 #define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
394 #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
395 INNERLOOP;
398 else
400 if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
402 if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
403 || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
405 #undef MPN_MUL_N
406 #undef MPN_SQR
407 #undef MPN_REDUCE
408 #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
409 #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
410 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
411 INNERLOOP;
413 else
415 #undef MPN_MUL_N
416 #undef MPN_SQR
417 #undef MPN_REDUCE
418 #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
419 #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
420 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
421 INNERLOOP;
424 else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
426 #undef MPN_MUL_N
427 #undef MPN_SQR
428 #undef MPN_REDUCE
429 #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
430 #define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
431 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
432 INNERLOOP;
434 else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
436 #undef MPN_MUL_N
437 #undef MPN_SQR
438 #undef MPN_REDUCE
439 #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
440 #define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
441 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip)
442 INNERLOOP;
444 else
446 #undef MPN_MUL_N
447 #undef MPN_SQR
448 #undef MPN_REDUCE
449 #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
450 #define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
451 #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
452 INNERLOOP;
456 #else /* WANT_REDC_2 */
458 if (REDC_1_TO_REDC_N_THRESHOLD < MUL_TOOM22_THRESHOLD)
460 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
462 if (REDC_1_TO_REDC_N_THRESHOLD < SQR_BASECASE_THRESHOLD
463 || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
465 #undef MPN_MUL_N
466 #undef MPN_SQR
467 #undef MPN_REDUCE
468 #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
469 #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
470 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
471 INNERLOOP;
473 else
475 #undef MPN_MUL_N
476 #undef MPN_SQR
477 #undef MPN_REDUCE
478 #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
479 #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
480 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
481 INNERLOOP;
484 else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
486 if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
487 || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
489 #undef MPN_MUL_N
490 #undef MPN_SQR
491 #undef MPN_REDUCE
492 #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
493 #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
494 #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
495 INNERLOOP;
497 else
499 #undef MPN_MUL_N
500 #undef MPN_SQR
501 #undef MPN_REDUCE
502 #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
503 #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
504 #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
505 INNERLOOP;
508 else
510 #undef MPN_MUL_N
511 #undef MPN_SQR
512 #undef MPN_REDUCE
513 #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
514 #define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
515 #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
516 INNERLOOP;
519 else
521 if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
523 if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
524 || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
526 #undef MPN_MUL_N
527 #undef MPN_SQR
528 #undef MPN_REDUCE
529 #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
530 #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
531 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
532 INNERLOOP;
534 else
536 #undef MPN_MUL_N
537 #undef MPN_SQR
538 #undef MPN_REDUCE
539 #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
540 #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
541 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
542 INNERLOOP;
545 else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
547 #undef MPN_MUL_N
548 #undef MPN_SQR
549 #undef MPN_REDUCE
550 #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
551 #define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
552 #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
553 INNERLOOP;
555 else
557 #undef MPN_MUL_N
558 #undef MPN_SQR
559 #undef MPN_REDUCE
560 #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
561 #define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
562 #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
563 INNERLOOP;
566 #endif /* WANT_REDC_2 */
568 done:
570 MPN_COPY (tp, rp, n);
571 MPN_ZERO (tp + n, n);
573 #if WANT_REDC_2
574 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
575 MPN_REDC_1 (rp, tp, mp, n, mip[0]);
576 else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
577 MPN_REDC_2 (rp, tp, mp, n, mip);
578 #else
579 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
580 MPN_REDC_1 (rp, tp, mp, n, mip[0]);
581 #endif
582 else
583 mpn_redc_n (rp, tp, mp, n, mip);
585 if (mpn_cmp (rp, mp, n) >= 0)
586 mpn_sub_n (rp, rp, mp, n);
588 TMP_FREE;