top(1): Raise WARNS to 6 and fix warnings.
[dragonfly.git] / contrib / gmp / mpz / powm.c
blob8f3ce97cc4a1eaa70969612427535de74c1337d1
1 /* mpz_powm(res,base,exp,mod) -- Set RES to (base**exp) mod MOD.
3 Contributed by Paul Zimmermann.
5 Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2005, 2009
6 Free Software Foundation, Inc.
8 This file is part of the GNU MP Library.
10 The GNU MP Library is free software; you can redistribute it and/or modify
11 it under the terms of the GNU Lesser General Public License as published by
12 the Free Software Foundation; either version 3 of the License, or (at your
13 option) any later version.
15 The GNU MP Library is distributed in the hope that it will be useful, but
16 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
17 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
18 License for more details.
20 You should have received a copy of the GNU Lesser General Public License
21 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
24 #include "gmp.h"
25 #include "gmp-impl.h"
26 #include "longlong.h"
27 #ifdef BERKELEY_MP
28 #include "mp.h"
29 #endif
31 /* Compute t = a mod m, a is defined by (ap,an), m is defined by (mp,mn), and
32 t is defined by (tp,mn). */
33 static void
34 reduce (mp_ptr tp, mp_srcptr ap, mp_size_t an, mp_srcptr mp, mp_size_t mn)
36 mp_ptr qp;
37 TMP_DECL;
39 TMP_MARK;
40 qp = TMP_ALLOC_LIMBS (an - mn + 1);
42 mpn_tdiv_qr (qp, tp, 0L, ap, an, mp, mn);
44 TMP_FREE;
47 #if REDUCE_EXPONENT
48 /* Return the group order of the ring mod m. */
49 static mp_limb_t
50 phi (mp_limb_t t)
52 mp_limb_t d, m, go;
54 go = 1;
56 if (t % 2 == 0)
58 t = t / 2;
59 while (t % 2 == 0)
61 go *= 2;
62 t = t / 2;
65 for (d = 3;; d += 2)
67 m = d - 1;
68 for (;;)
70 unsigned long int q = t / d;
71 if (q < d)
73 if (t <= 1)
74 return go;
75 if (t == d)
76 return go * m;
77 return go * (t - 1);
79 if (t != q * d)
80 break;
81 go *= m;
82 m = d;
83 t = q;
87 #endif
89 /* average number of calls to redc for an exponent of n bits
90 with the sliding window algorithm of base 2^k: the optimal is
91 obtained for the value of k which minimizes 2^(k-1)+n/(k+1):
93 n\k 4 5 6 7 8
94 128 156* 159 171 200 261
95 256 309 307* 316 343 403
96 512 617 607* 610 632 688
97 1024 1231 1204 1195* 1207 1256
98 2048 2461 2399 2366 2360* 2396
99 4096 4918 4787 4707 4665* 4670
103 /* Use REDC instead of usual reduction for sizes < POWM_THRESHOLD. In REDC
104 each modular multiplication costs about 2*n^2 limbs operations, whereas
105 using usual reduction it costs 3*K(n), where K(n) is the cost of a
106 multiplication using Karatsuba, and a division is assumed to cost 2*K(n),
107 for example using Burnikel-Ziegler's algorithm. This gives a theoretical
108 threshold of a*SQR_KARATSUBA_THRESHOLD, with a=(3/2)^(1/(2-ln(3)/ln(2))) ~
109 2.66. */
110 /* For now, also disable REDC when MOD is even, as the inverse can't handle
111 that. At some point, we might want to make the code faster for that case,
112 perhaps using CRR. */
114 #ifndef POWM_THRESHOLD
115 #define POWM_THRESHOLD ((8 * SQR_KARATSUBA_THRESHOLD) / 3)
116 #endif
118 #define HANDLE_NEGATIVE_EXPONENT 1
119 #undef REDUCE_EXPONENT
121 void
122 #ifndef BERKELEY_MP
123 mpz_powm (mpz_ptr r, mpz_srcptr b, mpz_srcptr e, mpz_srcptr m)
124 #else /* BERKELEY_MP */
125 pow (mpz_srcptr b, mpz_srcptr e, mpz_srcptr m, mpz_ptr r)
126 #endif /* BERKELEY_MP */
128 mp_ptr xp, tp, qp, gp, this_gp;
129 mp_srcptr bp, ep, mp;
130 mp_size_t bn, es, en, mn, xn;
131 mp_limb_t invm, c;
132 unsigned long int enb;
133 mp_size_t i, K, j, l, k;
134 int m_zero_cnt, e_zero_cnt;
135 int sh;
136 int use_redc;
137 #if HANDLE_NEGATIVE_EXPONENT
138 mpz_t new_b;
139 #endif
140 #if REDUCE_EXPONENT
141 mpz_t new_e;
142 #endif
143 TMP_DECL;
145 mp = PTR(m);
146 mn = ABSIZ (m);
147 if (mn == 0)
148 DIVIDE_BY_ZERO;
150 TMP_MARK;
152 es = SIZ (e);
153 if (es <= 0)
155 if (es == 0)
157 /* Exponent is zero, result is 1 mod m, i.e., 1 or 0 depending on if
158 m equals 1. */
159 SIZ(r) = (mn == 1 && mp[0] == 1) ? 0 : 1;
160 PTR(r)[0] = 1;
161 TMP_FREE; /* we haven't really allocated anything here */
162 return;
164 #if HANDLE_NEGATIVE_EXPONENT
165 MPZ_TMP_INIT (new_b, mn + 1);
167 if (! mpz_invert (new_b, b, m))
168 DIVIDE_BY_ZERO;
169 b = new_b;
170 es = -es;
171 #else
172 DIVIDE_BY_ZERO;
173 #endif
175 en = es;
177 #if REDUCE_EXPONENT
178 /* Reduce exponent by dividing it by phi(m) when m small. */
179 if (mn == 1 && mp[0] < 0x7fffffffL && en * GMP_NUMB_BITS > 150)
181 MPZ_TMP_INIT (new_e, 2);
182 mpz_mod_ui (new_e, e, phi (mp[0]));
183 e = new_e;
185 #endif
187 use_redc = mn < POWM_THRESHOLD && mp[0] % 2 != 0;
188 if (use_redc)
190 /* invm = -1/m mod 2^BITS_PER_MP_LIMB, must have m odd */
191 modlimb_invert (invm, mp[0]);
192 invm = -invm;
194 else
196 /* Normalize m (i.e. make its most significant bit set) as required by
197 division functions below. */
198 count_leading_zeros (m_zero_cnt, mp[mn - 1]);
199 m_zero_cnt -= GMP_NAIL_BITS;
200 if (m_zero_cnt != 0)
202 mp_ptr new_mp;
203 new_mp = TMP_ALLOC_LIMBS (mn);
204 mpn_lshift (new_mp, mp, mn, m_zero_cnt);
205 mp = new_mp;
209 /* Determine optimal value of k, the number of exponent bits we look at
210 at a time. */
211 count_leading_zeros (e_zero_cnt, PTR(e)[en - 1]);
212 e_zero_cnt -= GMP_NAIL_BITS;
213 enb = en * GMP_NUMB_BITS - e_zero_cnt; /* number of bits of exponent */
214 k = 1;
215 K = 2;
216 while (2 * enb > K * (2 + k * (3 + k)))
218 k++;
219 K *= 2;
220 if (k == 10) /* cap allocation */
221 break;
224 tp = TMP_ALLOC_LIMBS (2 * mn);
225 qp = TMP_ALLOC_LIMBS (mn + 1);
227 gp = __GMP_ALLOCATE_FUNC_LIMBS (K / 2 * mn);
229 /* Compute x*R^n where R=2^BITS_PER_MP_LIMB. */
230 bn = ABSIZ (b);
231 bp = PTR(b);
232 /* Handle |b| >= m by computing b mod m. FIXME: It is not strictly necessary
233 for speed or correctness to do this when b and m have the same number of
234 limbs, perhaps remove mpn_cmp call. */
235 if (bn > mn || (bn == mn && mpn_cmp (bp, mp, mn) >= 0))
237 /* Reduce possibly huge base while moving it to gp[0]. Use a function
238 call to reduce, since we don't want the quotient allocation to
239 live until function return. */
240 if (use_redc)
242 reduce (tp + mn, bp, bn, mp, mn); /* b mod m */
243 MPN_ZERO (tp, mn);
244 mpn_tdiv_qr (qp, gp, 0L, tp, 2 * mn, mp, mn); /* unnormnalized! */
246 else
248 reduce (gp, bp, bn, mp, mn);
251 else
253 /* |b| < m. We pad out operands to become mn limbs, which simplifies
254 the rest of the function, but slows things down when |b| << m. */
255 if (use_redc)
257 MPN_ZERO (tp, mn);
258 MPN_COPY (tp + mn, bp, bn);
259 MPN_ZERO (tp + mn + bn, mn - bn);
260 mpn_tdiv_qr (qp, gp, 0L, tp, 2 * mn, mp, mn);
262 else
264 MPN_COPY (gp, bp, bn);
265 MPN_ZERO (gp + bn, mn - bn);
269 /* Compute xx^i for odd g < 2^i. */
271 xp = TMP_ALLOC_LIMBS (mn);
272 mpn_sqr_n (tp, gp, mn);
273 if (use_redc)
274 mpn_redc_1 (xp, tp, mp, mn, invm); /* xx = x^2*R^n */
275 else
276 mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
277 this_gp = gp;
278 for (i = 1; i < K / 2; i++)
280 mpn_mul_n (tp, this_gp, xp, mn);
281 this_gp += mn;
282 if (use_redc)
283 mpn_redc_1 (this_gp, tp, mp, mn, invm); /* g[i] = x^(2i+1)*R^n */
284 else
285 mpn_tdiv_qr (qp, this_gp, 0L, tp, 2 * mn, mp, mn);
288 /* Start the real stuff. */
289 ep = PTR (e);
290 i = en - 1; /* current index */
291 c = ep[i]; /* current limb */
292 sh = GMP_NUMB_BITS - e_zero_cnt; /* significant bits in ep[i] */
293 sh -= k; /* index of lower bit of ep[i] to take into account */
294 if (sh < 0)
295 { /* k-sh extra bits are needed */
296 if (i > 0)
298 i--;
299 c <<= (-sh);
300 sh += GMP_NUMB_BITS;
301 c |= ep[i] >> sh;
304 else
305 c >>= sh;
307 for (j = 0; c % 2 == 0; j++)
308 c >>= 1;
310 MPN_COPY (xp, gp + mn * (c >> 1), mn);
311 while (--j >= 0)
313 mpn_sqr_n (tp, xp, mn);
314 if (use_redc)
315 mpn_redc_1 (xp, tp, mp, mn, invm);
316 else
317 mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
320 while (i > 0 || sh > 0)
322 c = ep[i];
323 l = k; /* number of bits treated */
324 sh -= l;
325 if (sh < 0)
327 if (i > 0)
329 i--;
330 c <<= (-sh);
331 sh += GMP_NUMB_BITS;
332 c |= ep[i] >> sh;
334 else
336 l += sh; /* last chunk of bits from e; l < k */
339 else
340 c >>= sh;
341 c &= ((mp_limb_t) 1 << l) - 1;
343 /* This while loop implements the sliding window improvement--loop while
344 the most significant bit of c is zero, squaring xx as we go. */
345 while ((c >> (l - 1)) == 0 && (i > 0 || sh > 0))
347 mpn_sqr_n (tp, xp, mn);
348 if (use_redc)
349 mpn_redc_1 (xp, tp, mp, mn, invm);
350 else
351 mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
352 if (sh != 0)
354 sh--;
355 c = (c << 1) + ((ep[i] >> sh) & 1);
357 else
359 i--;
360 sh = GMP_NUMB_BITS - 1;
361 c = (c << 1) + (ep[i] >> sh);
365 /* Replace xx by xx^(2^l)*x^c. */
366 if (c != 0)
368 for (j = 0; c % 2 == 0; j++)
369 c >>= 1;
371 /* c0 = c * 2^j, i.e. xx^(2^l)*x^c = (A^(2^(l - j))*c)^(2^j) */
372 l -= j;
373 while (--l >= 0)
375 mpn_sqr_n (tp, xp, mn);
376 if (use_redc)
377 mpn_redc_1 (xp, tp, mp, mn, invm);
378 else
379 mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
381 mpn_mul_n (tp, xp, gp + mn * (c >> 1), mn);
382 if (use_redc)
383 mpn_redc_1 (xp, tp, mp, mn, invm);
384 else
385 mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
387 else
388 j = l; /* case c=0 */
389 while (--j >= 0)
391 mpn_sqr_n (tp, xp, mn);
392 if (use_redc)
393 mpn_redc_1 (xp, tp, mp, mn, invm);
394 else
395 mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
399 if (use_redc)
401 /* Convert back xx to xx/R^n. */
402 MPN_COPY (tp, xp, mn);
403 MPN_ZERO (tp + mn, mn);
404 mpn_redc_1 (xp, tp, mp, mn, invm);
405 if (mpn_cmp (xp, mp, mn) >= 0)
406 mpn_sub_n (xp, xp, mp, mn);
408 else
410 if (m_zero_cnt != 0)
412 mp_limb_t cy;
413 cy = mpn_lshift (tp, xp, mn, m_zero_cnt);
414 tp[mn] = cy;
415 mpn_tdiv_qr (qp, xp, 0L, tp, mn + (cy != 0), mp, mn);
416 mpn_rshift (xp, xp, mn, m_zero_cnt);
419 xn = mn;
420 MPN_NORMALIZE (xp, xn);
422 if ((ep[0] & 1) && SIZ(b) < 0 && xn != 0)
424 mp = PTR(m); /* want original, unnormalized m */
425 mpn_sub (xp, mp, mn, xp, xn);
426 xn = mn;
427 MPN_NORMALIZE (xp, xn);
429 MPZ_REALLOC (r, xn);
430 SIZ (r) = xn;
431 MPN_COPY (PTR(r), xp, xn);
433 __GMP_FREE_FUNC_LIMBS (gp, K / 2 * mn);
434 TMP_FREE;