beta-0.89.2
[luatex.git] / source / libs / gmp / gmp-src / mpz / powm_ui.c
blobe04bb0ad711f79eb17c47fedc60a038fccc9c19c
1 /* mpz_powm_ui(res,base,exp,mod) -- Set R to (B^E) mod M.
3 Contributed to the GNU project by Torbjörn Granlund.
5 Copyright 1991, 1993, 1994, 1996, 1997, 2000-2002, 2005, 2008, 2009, 2011-2013
6 Free Software Foundation, Inc.
8 This file is part of the GNU MP Library.
10 The GNU MP Library is free software; you can redistribute it and/or modify
11 it under the terms of either:
13 * the GNU Lesser General Public License as published by the Free
14 Software Foundation; either version 3 of the License, or (at your
15 option) any later version.
19 * the GNU General Public License as published by the Free Software
20 Foundation; either version 2 of the License, or (at your option) any
21 later version.
23 or both in parallel, as here.
25 The GNU MP Library is distributed in the hope that it will be useful, but
26 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
27 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
28 for more details.
30 You should have received copies of the GNU General Public License and the
31 GNU Lesser General Public License along with the GNU MP Library. If not,
32 see https://www.gnu.org/licenses/. */
35 #include "gmp.h"
36 #include "gmp-impl.h"
37 #include "longlong.h"
40 /* This code is very old, and should be rewritten to current GMP standard. It
41 is slower than mpz_powm for large exponents, but also for small exponents
42 when the mod argument is small.
44 As an intermediate solution, we now deflect to mpz_powm for exponents >= 20.
48 b ^ e mod m res
49 0 0 0 ?
50 0 e 0 ?
51 0 0 m ?
52 0 e m 0
53 b 0 0 ?
54 b e 0 ?
55 b 0 m 1 mod m
56 b e m b^e mod m
59 static void
60 mod (mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, gmp_pi1_t *dinv, mp_ptr tp)
62 mp_ptr qp;
63 TMP_DECL;
64 TMP_MARK;
66 qp = tp;
68 if (dn == 1)
70 np[0] = mpn_divrem_1 (qp, (mp_size_t) 0, np, nn, dp[0]);
72 else if (dn == 2)
74 mpn_div_qr_2n_pi1 (qp, np, np, nn, dp[1], dp[0], dinv->inv32);
76 else if (BELOW_THRESHOLD (dn, DC_DIV_QR_THRESHOLD) ||
77 BELOW_THRESHOLD (nn - dn, DC_DIV_QR_THRESHOLD))
79 mpn_sbpi1_div_qr (qp, np, nn, dp, dn, dinv->inv32);
81 else if (BELOW_THRESHOLD (dn, MUPI_DIV_QR_THRESHOLD) || /* fast condition */
82 BELOW_THRESHOLD (nn, 2 * MU_DIV_QR_THRESHOLD) || /* fast condition */
83 (double) (2 * (MU_DIV_QR_THRESHOLD - MUPI_DIV_QR_THRESHOLD)) * dn /* slow... */
84 + (double) MUPI_DIV_QR_THRESHOLD * nn > (double) dn * nn) /* ...condition */
86 mpn_dcpi1_div_qr (qp, np, nn, dp, dn, dinv);
88 else
90 /* We need to allocate separate remainder area, since mpn_mu_div_qr does
91 not handle overlap between the numerator and remainder areas.
92 FIXME: Make it handle such overlap. */
93 mp_ptr rp = TMP_BALLOC_LIMBS (dn);
94 mp_size_t itch = mpn_mu_div_qr_itch (nn, dn, 0);
95 mp_ptr scratch = TMP_BALLOC_LIMBS (itch);
96 mpn_mu_div_qr (qp, rp, np, nn, dp, dn, scratch);
97 MPN_COPY (np, rp, dn);
100 TMP_FREE;
103 /* Compute t = a mod m, a is defined by (ap,an), m is defined by (mp,mn), and
104 t is defined by (tp,mn). */
105 static void
106 reduce (mp_ptr tp, mp_srcptr ap, mp_size_t an, mp_srcptr mp, mp_size_t mn, gmp_pi1_t *dinv)
108 mp_ptr rp, scratch;
109 TMP_DECL;
110 TMP_MARK;
112 rp = TMP_ALLOC_LIMBS (an);
113 scratch = TMP_ALLOC_LIMBS (an - mn + 1);
114 MPN_COPY (rp, ap, an);
115 mod (rp, an, mp, mn, dinv, scratch);
116 MPN_COPY (tp, rp, mn);
118 TMP_FREE;
121 void
122 mpz_powm_ui (mpz_ptr r, mpz_srcptr b, unsigned long int el, mpz_srcptr m)
124 if (el < 20)
126 mp_ptr xp, tp, mp, bp, scratch;
127 mp_size_t xn, tn, mn, bn;
128 int m_zero_cnt;
129 int c;
130 mp_limb_t e, m2;
131 gmp_pi1_t dinv;
132 TMP_DECL;
134 mp = PTR(m);
135 mn = ABSIZ(m);
136 if (UNLIKELY (mn == 0))
137 DIVIDE_BY_ZERO;
139 if (el == 0)
141 /* Exponent is zero, result is 1 mod M, i.e., 1 or 0 depending on if
142 M equals 1. */
143 SIZ(r) = (mn == 1 && mp[0] == 1) ? 0 : 1;
144 PTR(r)[0] = 1;
145 return;
148 TMP_MARK;
150 /* Normalize m (i.e. make its most significant bit set) as required by
151 division functions below. */
152 count_leading_zeros (m_zero_cnt, mp[mn - 1]);
153 m_zero_cnt -= GMP_NAIL_BITS;
154 if (m_zero_cnt != 0)
156 mp_ptr new_mp = TMP_ALLOC_LIMBS (mn);
157 mpn_lshift (new_mp, mp, mn, m_zero_cnt);
158 mp = new_mp;
161 m2 = mn == 1 ? 0 : mp[mn - 2];
162 invert_pi1 (dinv, mp[mn - 1], m2);
164 bn = ABSIZ(b);
165 bp = PTR(b);
166 if (bn > mn)
168 /* Reduce possibly huge base. Use a function call to reduce, since we
169 don't want the quotient allocation to live until function return. */
170 mp_ptr new_bp = TMP_ALLOC_LIMBS (mn);
171 reduce (new_bp, bp, bn, mp, mn, &dinv);
172 bp = new_bp;
173 bn = mn;
174 /* Canonicalize the base, since we are potentially going to multiply with
175 it quite a few times. */
176 MPN_NORMALIZE (bp, bn);
179 if (bn == 0)
181 SIZ(r) = 0;
182 TMP_FREE;
183 return;
186 tp = TMP_ALLOC_LIMBS (2 * mn + 1);
187 xp = TMP_ALLOC_LIMBS (mn);
188 scratch = TMP_ALLOC_LIMBS (mn + 1);
190 MPN_COPY (xp, bp, bn);
191 xn = bn;
193 e = el;
194 count_leading_zeros (c, e);
195 e = (e << c) << 1; /* shift the exp bits to the left, lose msb */
196 c = GMP_LIMB_BITS - 1 - c;
198 if (c == 0)
200 /* If m is already normalized (high bit of high limb set), and b is
201 the same size, but a bigger value, and e==1, then there's no
202 modular reductions done and we can end up with a result out of
203 range at the end. */
204 if (xn == mn && mpn_cmp (xp, mp, mn) >= 0)
205 mpn_sub_n (xp, xp, mp, mn);
207 else
209 /* Main loop. */
212 mpn_sqr (tp, xp, xn);
213 tn = 2 * xn; tn -= tp[tn - 1] == 0;
214 if (tn < mn)
216 MPN_COPY (xp, tp, tn);
217 xn = tn;
219 else
221 mod (tp, tn, mp, mn, &dinv, scratch);
222 MPN_COPY (xp, tp, mn);
223 xn = mn;
226 if ((mp_limb_signed_t) e < 0)
228 mpn_mul (tp, xp, xn, bp, bn);
229 tn = xn + bn; tn -= tp[tn - 1] == 0;
230 if (tn < mn)
232 MPN_COPY (xp, tp, tn);
233 xn = tn;
235 else
237 mod (tp, tn, mp, mn, &dinv, scratch);
238 MPN_COPY (xp, tp, mn);
239 xn = mn;
242 e <<= 1;
243 c--;
245 while (c != 0);
248 /* We shifted m left m_zero_cnt steps. Adjust the result by reducing it
249 with the original M. */
250 if (m_zero_cnt != 0)
252 mp_limb_t cy;
253 cy = mpn_lshift (tp, xp, xn, m_zero_cnt);
254 tp[xn] = cy; xn += cy != 0;
256 if (xn < mn)
258 MPN_COPY (xp, tp, xn);
260 else
262 mod (tp, xn, mp, mn, &dinv, scratch);
263 MPN_COPY (xp, tp, mn);
264 xn = mn;
266 mpn_rshift (xp, xp, xn, m_zero_cnt);
268 MPN_NORMALIZE (xp, xn);
270 if ((el & 1) != 0 && SIZ(b) < 0 && xn != 0)
272 mp = PTR(m); /* want original, unnormalized m */
273 mpn_sub (xp, mp, mn, xp, xn);
274 xn = mn;
275 MPN_NORMALIZE (xp, xn);
277 MPZ_REALLOC (r, xn);
278 SIZ (r) = xn;
279 MPN_COPY (PTR(r), xp, xn);
281 TMP_FREE;
283 else
285 /* For large exponents, fake an mpz_t exponent and deflect to the more
286 sophisticated mpz_powm. */
287 mpz_t e;
288 mp_limb_t ep[LIMBS_PER_ULONG];
289 MPZ_FAKE_UI (e, ep, el);
290 mpz_powm (r, b, e, m);