push 01c92aa04da32804c1907882688df6441b0d3929
[wine/hacks.git] / dlls / rsaenh / mpi.c
blobc6f083b6e2e42838e59e2d87448e85ce41be79f6
1 /*
2 * dlls/rsaenh/mpi.c
3 * Multi Precision Integer functions
5 * Copyright 2004 Michael Jung
6 * Based on public domain code by Tom St Denis (tomstdenis@iahu.ca)
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
24 * This file contains code from the LibTomCrypt cryptographic
25 * library written by Tom St Denis (tomstdenis@iahu.ca). LibTomCrypt
26 * is in the public domain. The code in this file is tailored to
27 * special requirements. Take a look at http://libtomcrypt.org for the
28 * original version.
31 #include <stdarg.h>
32 #include "tomcrypt.h"
34 /* table of first PRIME_SIZE primes */
35 static const mp_digit __prime_tab[];
37 /* Known optimal configurations
38 CPU /Compiler /MUL CUTOFF/SQR CUTOFF
39 -------------------------------------------------------------
40 Intel P4 Northwood /GCC v3.4.1 / 88/ 128/LTM 0.32 ;-)
42 static const int KARATSUBA_MUL_CUTOFF = 88, /* Min. number of digits before Karatsuba multiplication is used. */
43 KARATSUBA_SQR_CUTOFF = 128; /* Min. number of digits before Karatsuba squaring is used. */
45 /* computes the modular inverse via binary extended euclidean algorithm,
46 * that is c = 1/a mod b
48 * Based on slow invmod except this is optimized for the case where b is
49 * odd as per HAC Note 14.64 on pp. 610
51 int
52 fast_mp_invmod (const mp_int * a, mp_int * b, mp_int * c)
54 mp_int x, y, u, v, B, D;
55 int res, neg;
57 /* 2. [modified] b must be odd */
58 if (mp_iseven (b) == 1) {
59 return MP_VAL;
62 /* init all our temps */
63 if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D, NULL)) != MP_OKAY) {
64 return res;
67 /* x == modulus, y == value to invert */
68 if ((res = mp_copy (b, &x)) != MP_OKAY) {
69 goto __ERR;
72 /* we need y = |a| */
73 if ((res = mp_abs (a, &y)) != MP_OKAY) {
74 goto __ERR;
77 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
78 if ((res = mp_copy (&x, &u)) != MP_OKAY) {
79 goto __ERR;
81 if ((res = mp_copy (&y, &v)) != MP_OKAY) {
82 goto __ERR;
84 mp_set (&D, 1);
86 top:
87 /* 4. while u is even do */
88 while (mp_iseven (&u) == 1) {
89 /* 4.1 u = u/2 */
90 if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
91 goto __ERR;
93 /* 4.2 if B is odd then */
94 if (mp_isodd (&B) == 1) {
95 if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
96 goto __ERR;
99 /* B = B/2 */
100 if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
101 goto __ERR;
105 /* 5. while v is even do */
106 while (mp_iseven (&v) == 1) {
107 /* 5.1 v = v/2 */
108 if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
109 goto __ERR;
111 /* 5.2 if D is odd then */
112 if (mp_isodd (&D) == 1) {
113 /* D = (D-x)/2 */
114 if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
115 goto __ERR;
118 /* D = D/2 */
119 if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
120 goto __ERR;
124 /* 6. if u >= v then */
125 if (mp_cmp (&u, &v) != MP_LT) {
126 /* u = u - v, B = B - D */
127 if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
128 goto __ERR;
131 if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
132 goto __ERR;
134 } else {
135 /* v - v - u, D = D - B */
136 if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
137 goto __ERR;
140 if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
141 goto __ERR;
145 /* if not zero goto step 4 */
146 if (mp_iszero (&u) == 0) {
147 goto top;
150 /* now a = C, b = D, gcd == g*v */
152 /* if v != 1 then there is no inverse */
153 if (mp_cmp_d (&v, 1) != MP_EQ) {
154 res = MP_VAL;
155 goto __ERR;
158 /* b is now the inverse */
159 neg = a->sign;
160 while (D.sign == MP_NEG) {
161 if ((res = mp_add (&D, b, &D)) != MP_OKAY) {
162 goto __ERR;
165 mp_exch (&D, c);
166 c->sign = neg;
167 res = MP_OKAY;
169 __ERR:mp_clear_multi (&x, &y, &u, &v, &B, &D, NULL);
170 return res;
173 /* computes xR**-1 == x (mod N) via Montgomery Reduction
175 * This is an optimized implementation of montgomery_reduce
176 * which uses the comba method to quickly calculate the columns of the
177 * reduction.
179 * Based on Algorithm 14.32 on pp.601 of HAC.
182 fast_mp_montgomery_reduce (mp_int * x, const mp_int * n, mp_digit rho)
184 int ix, res, olduse;
185 mp_word W[MP_WARRAY];
187 /* get old used count */
188 olduse = x->used;
190 /* grow a as required */
191 if (x->alloc < n->used + 1) {
192 if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) {
193 return res;
197 /* first we have to get the digits of the input into
198 * an array of double precision words W[...]
201 register mp_word *_W;
202 register mp_digit *tmpx;
204 /* alias for the W[] array */
205 _W = W;
207 /* alias for the digits of x*/
208 tmpx = x->dp;
210 /* copy the digits of a into W[0..a->used-1] */
211 for (ix = 0; ix < x->used; ix++) {
212 *_W++ = *tmpx++;
215 /* zero the high words of W[a->used..m->used*2] */
216 for (; ix < n->used * 2 + 1; ix++) {
217 *_W++ = 0;
221 /* now we proceed to zero successive digits
222 * from the least significant upwards
224 for (ix = 0; ix < n->used; ix++) {
225 /* mu = ai * m' mod b
227 * We avoid a double precision multiplication (which isn't required)
228 * by casting the value down to a mp_digit. Note this requires
229 * that W[ix-1] have the carry cleared (see after the inner loop)
231 register mp_digit mu;
232 mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK);
234 /* a = a + mu * m * b**i
236 * This is computed in place and on the fly. The multiplication
237 * by b**i is handled by offseting which columns the results
238 * are added to.
240 * Note the comba method normally doesn't handle carries in the
241 * inner loop In this case we fix the carry from the previous
242 * column since the Montgomery reduction requires digits of the
243 * result (so far) [see above] to work. This is
244 * handled by fixing up one carry after the inner loop. The
245 * carry fixups are done in order so after these loops the
246 * first m->used words of W[] have the carries fixed
249 register int iy;
250 register mp_digit *tmpn;
251 register mp_word *_W;
253 /* alias for the digits of the modulus */
254 tmpn = n->dp;
256 /* Alias for the columns set by an offset of ix */
257 _W = W + ix;
259 /* inner loop */
260 for (iy = 0; iy < n->used; iy++) {
261 *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++);
265 /* now fix carry for next digit, W[ix+1] */
266 W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT);
269 /* now we have to propagate the carries and
270 * shift the words downward [all those least
271 * significant digits we zeroed].
274 register mp_digit *tmpx;
275 register mp_word *_W, *_W1;
277 /* nox fix rest of carries */
279 /* alias for current word */
280 _W1 = W + ix;
282 /* alias for next word, where the carry goes */
283 _W = W + ++ix;
285 for (; ix <= n->used * 2 + 1; ix++) {
286 *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT);
289 /* copy out, A = A/b**n
291 * The result is A/b**n but instead of converting from an
292 * array of mp_word to mp_digit than calling mp_rshd
293 * we just copy them in the right order
296 /* alias for destination word */
297 tmpx = x->dp;
299 /* alias for shifted double precision result */
300 _W = W + n->used;
302 for (ix = 0; ix < n->used + 1; ix++) {
303 *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK));
306 /* zero oldused digits, if the input a was larger than
307 * m->used+1 we'll have to clear the digits
309 for (; ix < olduse; ix++) {
310 *tmpx++ = 0;
314 /* set the max used and clamp */
315 x->used = n->used + 1;
316 mp_clamp (x);
318 /* if A >= m then A = A - m */
319 if (mp_cmp_mag (x, n) != MP_LT) {
320 return s_mp_sub (x, n, x);
322 return MP_OKAY;
325 /* Fast (comba) multiplier
327 * This is the fast column-array [comba] multiplier. It is
328 * designed to compute the columns of the product first
329 * then handle the carries afterwards. This has the effect
330 * of making the nested loops that compute the columns very
331 * simple and schedulable on super-scalar processors.
333 * This has been modified to produce a variable number of
334 * digits of output so if say only a half-product is required
335 * you don't have to compute the upper half (a feature
336 * required for fast Barrett reduction).
338 * Based on Algorithm 14.12 on pp.595 of HAC.
342 fast_s_mp_mul_digs (const mp_int * a, const mp_int * b, mp_int * c, int digs)
344 int olduse, res, pa, ix, iz;
345 mp_digit W[MP_WARRAY];
346 register mp_word _W;
348 /* grow the destination as required */
349 if (c->alloc < digs) {
350 if ((res = mp_grow (c, digs)) != MP_OKAY) {
351 return res;
355 /* number of output digits to produce */
356 pa = MIN(digs, a->used + b->used);
358 /* clear the carry */
359 _W = 0;
360 for (ix = 0; ix <= pa; ix++) {
361 int tx, ty;
362 int iy;
363 mp_digit *tmpx, *tmpy;
365 /* get offsets into the two bignums */
366 ty = MIN(b->used-1, ix);
367 tx = ix - ty;
369 /* setup temp aliases */
370 tmpx = a->dp + tx;
371 tmpy = b->dp + ty;
373 /* this is the number of times the loop will iterrate, essentially its
374 while (tx++ < a->used && ty-- >= 0) { ... }
376 iy = MIN(a->used-tx, ty+1);
378 /* execute loop */
379 for (iz = 0; iz < iy; ++iz) {
380 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
383 /* store term */
384 W[ix] = ((mp_digit)_W) & MP_MASK;
386 /* make next carry */
387 _W = _W >> ((mp_word)DIGIT_BIT);
390 /* setup dest */
391 olduse = c->used;
392 c->used = digs;
395 register mp_digit *tmpc;
396 tmpc = c->dp;
397 for (ix = 0; ix < digs; ix++) {
398 /* now extract the previous digit [below the carry] */
399 *tmpc++ = W[ix];
402 /* clear unused digits [that existed in the old copy of c] */
403 for (; ix < olduse; ix++) {
404 *tmpc++ = 0;
407 mp_clamp (c);
408 return MP_OKAY;
411 /* this is a modified version of fast_s_mul_digs that only produces
412 * output digits *above* digs. See the comments for fast_s_mul_digs
413 * to see how it works.
415 * This is used in the Barrett reduction since for one of the multiplications
416 * only the higher digits were needed. This essentially halves the work.
418 * Based on Algorithm 14.12 on pp.595 of HAC.
421 fast_s_mp_mul_high_digs (const mp_int * a, const mp_int * b, mp_int * c, int digs)
423 int olduse, res, pa, ix, iz;
424 mp_digit W[MP_WARRAY];
425 mp_word _W;
427 /* grow the destination as required */
428 pa = a->used + b->used;
429 if (c->alloc < pa) {
430 if ((res = mp_grow (c, pa)) != MP_OKAY) {
431 return res;
435 /* number of output digits to produce */
436 pa = a->used + b->used;
437 _W = 0;
438 for (ix = digs; ix <= pa; ix++) {
439 int tx, ty, iy;
440 mp_digit *tmpx, *tmpy;
442 /* get offsets into the two bignums */
443 ty = MIN(b->used-1, ix);
444 tx = ix - ty;
446 /* setup temp aliases */
447 tmpx = a->dp + tx;
448 tmpy = b->dp + ty;
450 /* this is the number of times the loop will iterrate, essentially its
451 while (tx++ < a->used && ty-- >= 0) { ... }
453 iy = MIN(a->used-tx, ty+1);
455 /* execute loop */
456 for (iz = 0; iz < iy; iz++) {
457 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
460 /* store term */
461 W[ix] = ((mp_digit)_W) & MP_MASK;
463 /* make next carry */
464 _W = _W >> ((mp_word)DIGIT_BIT);
467 /* setup dest */
468 olduse = c->used;
469 c->used = pa;
472 register mp_digit *tmpc;
474 tmpc = c->dp + digs;
475 for (ix = digs; ix <= pa; ix++) {
476 /* now extract the previous digit [below the carry] */
477 *tmpc++ = W[ix];
480 /* clear unused digits [that existed in the old copy of c] */
481 for (; ix < olduse; ix++) {
482 *tmpc++ = 0;
485 mp_clamp (c);
486 return MP_OKAY;
489 /* fast squaring
491 * This is the comba method where the columns of the product
492 * are computed first then the carries are computed. This
493 * has the effect of making a very simple inner loop that
494 * is executed the most
496 * W2 represents the outer products and W the inner.
498 * A further optimizations is made because the inner
499 * products are of the form "A * B * 2". The *2 part does
500 * not need to be computed until the end which is good
501 * because 64-bit shifts are slow!
503 * Based on Algorithm 14.16 on pp.597 of HAC.
506 /* the jist of squaring...
508 you do like mult except the offset of the tmpx [one that starts closer to zero]
509 can't equal the offset of tmpy. So basically you set up iy like before then you min it with
510 (ty-tx) so that it never happens. You double all those you add in the inner loop
512 After that loop you do the squares and add them in.
514 Remove W2 and don't memset W
518 int fast_s_mp_sqr (const mp_int * a, mp_int * b)
520 int olduse, res, pa, ix, iz;
521 mp_digit W[MP_WARRAY], *tmpx;
522 mp_word W1;
524 /* grow the destination as required */
525 pa = a->used + a->used;
526 if (b->alloc < pa) {
527 if ((res = mp_grow (b, pa)) != MP_OKAY) {
528 return res;
532 /* number of output digits to produce */
533 W1 = 0;
534 for (ix = 0; ix <= pa; ix++) {
535 int tx, ty, iy;
536 mp_word _W;
537 mp_digit *tmpy;
539 /* clear counter */
540 _W = 0;
542 /* get offsets into the two bignums */
543 ty = MIN(a->used-1, ix);
544 tx = ix - ty;
546 /* setup temp aliases */
547 tmpx = a->dp + tx;
548 tmpy = a->dp + ty;
550 /* this is the number of times the loop will iterrate, essentially its
551 while (tx++ < a->used && ty-- >= 0) { ... }
553 iy = MIN(a->used-tx, ty+1);
555 /* now for squaring tx can never equal ty
556 * we halve the distance since they approach at a rate of 2x
557 * and we have to round because odd cases need to be executed
559 iy = MIN(iy, (ty-tx+1)>>1);
561 /* execute loop */
562 for (iz = 0; iz < iy; iz++) {
563 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
566 /* double the inner product and add carry */
567 _W = _W + _W + W1;
569 /* even columns have the square term in them */
570 if ((ix&1) == 0) {
571 _W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]);
574 /* store it */
575 W[ix] = _W;
577 /* make next carry */
578 W1 = _W >> ((mp_word)DIGIT_BIT);
581 /* setup dest */
582 olduse = b->used;
583 b->used = a->used+a->used;
586 mp_digit *tmpb;
587 tmpb = b->dp;
588 for (ix = 0; ix < pa; ix++) {
589 *tmpb++ = W[ix] & MP_MASK;
592 /* clear unused digits [that existed in the old copy of c] */
593 for (; ix < olduse; ix++) {
594 *tmpb++ = 0;
597 mp_clamp (b);
598 return MP_OKAY;
601 /* computes a = 2**b
603 * Simple algorithm which zeroes the int, grows it then just sets one bit
604 * as required.
607 mp_2expt (mp_int * a, int b)
609 int res;
611 /* zero a as per default */
612 mp_zero (a);
614 /* grow a to accommodate the single bit */
615 if ((res = mp_grow (a, b / DIGIT_BIT + 1)) != MP_OKAY) {
616 return res;
619 /* set the used count of where the bit will go */
620 a->used = b / DIGIT_BIT + 1;
622 /* put the single bit in its place */
623 a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT);
625 return MP_OKAY;
628 /* b = |a|
630 * Simple function copies the input and fixes the sign to positive
633 mp_abs (const mp_int * a, mp_int * b)
635 int res;
637 /* copy a to b */
638 if (a != b) {
639 if ((res = mp_copy (a, b)) != MP_OKAY) {
640 return res;
644 /* force the sign of b to positive */
645 b->sign = MP_ZPOS;
647 return MP_OKAY;
650 /* high level addition (handles signs) */
651 int mp_add (mp_int * a, mp_int * b, mp_int * c)
653 int sa, sb, res;
655 /* get sign of both inputs */
656 sa = a->sign;
657 sb = b->sign;
659 /* handle two cases, not four */
660 if (sa == sb) {
661 /* both positive or both negative */
662 /* add their magnitudes, copy the sign */
663 c->sign = sa;
664 res = s_mp_add (a, b, c);
665 } else {
666 /* one positive, the other negative */
667 /* subtract the one with the greater magnitude from */
668 /* the one of the lesser magnitude. The result gets */
669 /* the sign of the one with the greater magnitude. */
670 if (mp_cmp_mag (a, b) == MP_LT) {
671 c->sign = sb;
672 res = s_mp_sub (b, a, c);
673 } else {
674 c->sign = sa;
675 res = s_mp_sub (a, b, c);
678 return res;
682 /* single digit addition */
684 mp_add_d (mp_int * a, mp_digit b, mp_int * c)
686 int res, ix, oldused;
687 mp_digit *tmpa, *tmpc, mu;
689 /* grow c as required */
690 if (c->alloc < a->used + 1) {
691 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
692 return res;
696 /* if a is negative and |a| >= b, call c = |a| - b */
697 if (a->sign == MP_NEG && (a->used > 1 || a->dp[0] >= b)) {
698 /* temporarily fix sign of a */
699 a->sign = MP_ZPOS;
701 /* c = |a| - b */
702 res = mp_sub_d(a, b, c);
704 /* fix sign */
705 a->sign = c->sign = MP_NEG;
707 return res;
710 /* old number of used digits in c */
711 oldused = c->used;
713 /* sign always positive */
714 c->sign = MP_ZPOS;
716 /* source alias */
717 tmpa = a->dp;
719 /* destination alias */
720 tmpc = c->dp;
722 /* if a is positive */
723 if (a->sign == MP_ZPOS) {
724 /* add digit, after this we're propagating
725 * the carry.
727 *tmpc = *tmpa++ + b;
728 mu = *tmpc >> DIGIT_BIT;
729 *tmpc++ &= MP_MASK;
731 /* now handle rest of the digits */
732 for (ix = 1; ix < a->used; ix++) {
733 *tmpc = *tmpa++ + mu;
734 mu = *tmpc >> DIGIT_BIT;
735 *tmpc++ &= MP_MASK;
737 /* set final carry */
738 ix++;
739 *tmpc++ = mu;
741 /* setup size */
742 c->used = a->used + 1;
743 } else {
744 /* a was negative and |a| < b */
745 c->used = 1;
747 /* the result is a single digit */
748 if (a->used == 1) {
749 *tmpc++ = b - a->dp[0];
750 } else {
751 *tmpc++ = b;
754 /* setup count so the clearing of oldused
755 * can fall through correctly
757 ix = 1;
760 /* now zero to oldused */
761 while (ix++ < oldused) {
762 *tmpc++ = 0;
764 mp_clamp(c);
766 return MP_OKAY;
769 /* trim unused digits
771 * This is used to ensure that leading zero digits are
772 * trimed and the leading "used" digit will be non-zero
773 * Typically very fast. Also fixes the sign if there
774 * are no more leading digits
776 void
777 mp_clamp (mp_int * a)
779 /* decrease used while the most significant digit is
780 * zero.
782 while (a->used > 0 && a->dp[a->used - 1] == 0) {
783 --(a->used);
786 /* reset the sign flag if used == 0 */
787 if (a->used == 0) {
788 a->sign = MP_ZPOS;
792 /* clear one (frees) */
793 void
794 mp_clear (mp_int * a)
796 int i;
798 /* only do anything if a hasn't been freed previously */
799 if (a->dp != NULL) {
800 /* first zero the digits */
801 for (i = 0; i < a->used; i++) {
802 a->dp[i] = 0;
805 /* free ram */
806 free(a->dp);
808 /* reset members to make debugging easier */
809 a->dp = NULL;
810 a->alloc = a->used = 0;
811 a->sign = MP_ZPOS;
816 void mp_clear_multi(mp_int *mp, ...)
818 mp_int* next_mp = mp;
819 va_list args;
820 va_start(args, mp);
821 while (next_mp != NULL) {
822 mp_clear(next_mp);
823 next_mp = va_arg(args, mp_int*);
825 va_end(args);
828 /* compare two ints (signed)*/
830 mp_cmp (const mp_int * a, const mp_int * b)
832 /* compare based on sign */
833 if (a->sign != b->sign) {
834 if (a->sign == MP_NEG) {
835 return MP_LT;
836 } else {
837 return MP_GT;
841 /* compare digits */
842 if (a->sign == MP_NEG) {
843 /* if negative compare opposite direction */
844 return mp_cmp_mag(b, a);
845 } else {
846 return mp_cmp_mag(a, b);
850 /* compare a digit */
851 int mp_cmp_d(const mp_int * a, mp_digit b)
853 /* compare based on sign */
854 if (a->sign == MP_NEG) {
855 return MP_LT;
858 /* compare based on magnitude */
859 if (a->used > 1) {
860 return MP_GT;
863 /* compare the only digit of a to b */
864 if (a->dp[0] > b) {
865 return MP_GT;
866 } else if (a->dp[0] < b) {
867 return MP_LT;
868 } else {
869 return MP_EQ;
873 /* compare maginitude of two ints (unsigned) */
874 int mp_cmp_mag (const mp_int * a, const mp_int * b)
876 int n;
877 mp_digit *tmpa, *tmpb;
879 /* compare based on # of non-zero digits */
880 if (a->used > b->used) {
881 return MP_GT;
884 if (a->used < b->used) {
885 return MP_LT;
888 /* alias for a */
889 tmpa = a->dp + (a->used - 1);
891 /* alias for b */
892 tmpb = b->dp + (a->used - 1);
894 /* compare based on digits */
895 for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {
896 if (*tmpa > *tmpb) {
897 return MP_GT;
900 if (*tmpa < *tmpb) {
901 return MP_LT;
904 return MP_EQ;
907 static const int lnz[16] = {
908 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
911 /* Counts the number of lsbs which are zero before the first zero bit */
912 int mp_cnt_lsb(const mp_int *a)
914 int x;
915 mp_digit q, qq;
917 /* easy out */
918 if (mp_iszero(a) == 1) {
919 return 0;
922 /* scan lower digits until non-zero */
923 for (x = 0; x < a->used && a->dp[x] == 0; x++);
924 q = a->dp[x];
925 x *= DIGIT_BIT;
927 /* now scan this digit until a 1 is found */
928 if ((q & 1) == 0) {
929 do {
930 qq = q & 15;
931 x += lnz[qq];
932 q >>= 4;
933 } while (qq == 0);
935 return x;
938 /* copy, b = a */
940 mp_copy (const mp_int * a, mp_int * b)
942 int res, n;
944 /* if dst == src do nothing */
945 if (a == b) {
946 return MP_OKAY;
949 /* grow dest */
950 if (b->alloc < a->used) {
951 if ((res = mp_grow (b, a->used)) != MP_OKAY) {
952 return res;
956 /* zero b and copy the parameters over */
958 register mp_digit *tmpa, *tmpb;
960 /* pointer aliases */
962 /* source */
963 tmpa = a->dp;
965 /* destination */
966 tmpb = b->dp;
968 /* copy all the digits */
969 for (n = 0; n < a->used; n++) {
970 *tmpb++ = *tmpa++;
973 /* clear high digits */
974 for (; n < b->used; n++) {
975 *tmpb++ = 0;
979 /* copy used count and sign */
980 b->used = a->used;
981 b->sign = a->sign;
982 return MP_OKAY;
985 /* returns the number of bits in an int */
987 mp_count_bits (const mp_int * a)
989 int r;
990 mp_digit q;
992 /* shortcut */
993 if (a->used == 0) {
994 return 0;
997 /* get number of digits and add that */
998 r = (a->used - 1) * DIGIT_BIT;
1000 /* take the last digit and count the bits in it */
1001 q = a->dp[a->used - 1];
1002 while (q > ((mp_digit) 0)) {
1003 ++r;
1004 q >>= ((mp_digit) 1);
1006 return r;
1009 /* integer signed division.
1010 * c*b + d == a [e.g. a/b, c=quotient, d=remainder]
1011 * HAC pp.598 Algorithm 14.20
1013 * Note that the description in HAC is horribly
1014 * incomplete. For example, it doesn't consider
1015 * the case where digits are removed from 'x' in
1016 * the inner loop. It also doesn't consider the
1017 * case that y has fewer than three digits, etc..
1019 * The overall algorithm is as described as
1020 * 14.20 from HAC but fixed to treat these cases.
1022 int mp_div (const mp_int * a, const mp_int * b, mp_int * c, mp_int * d)
1024 mp_int q, x, y, t1, t2;
1025 int res, n, t, i, norm, neg;
1027 /* is divisor zero ? */
1028 if (mp_iszero (b) == 1) {
1029 return MP_VAL;
1032 /* if a < b then q=0, r = a */
1033 if (mp_cmp_mag (a, b) == MP_LT) {
1034 if (d != NULL) {
1035 res = mp_copy (a, d);
1036 } else {
1037 res = MP_OKAY;
1039 if (c != NULL) {
1040 mp_zero (c);
1042 return res;
1045 if ((res = mp_init_size (&q, a->used + 2)) != MP_OKAY) {
1046 return res;
1048 q.used = a->used + 2;
1050 if ((res = mp_init (&t1)) != MP_OKAY) {
1051 goto __Q;
1054 if ((res = mp_init (&t2)) != MP_OKAY) {
1055 goto __T1;
1058 if ((res = mp_init_copy (&x, a)) != MP_OKAY) {
1059 goto __T2;
1062 if ((res = mp_init_copy (&y, b)) != MP_OKAY) {
1063 goto __X;
1066 /* fix the sign */
1067 neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
1068 x.sign = y.sign = MP_ZPOS;
1070 /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */
1071 norm = mp_count_bits(&y) % DIGIT_BIT;
1072 if (norm < (int)(DIGIT_BIT-1)) {
1073 norm = (DIGIT_BIT-1) - norm;
1074 if ((res = mp_mul_2d (&x, norm, &x)) != MP_OKAY) {
1075 goto __Y;
1077 if ((res = mp_mul_2d (&y, norm, &y)) != MP_OKAY) {
1078 goto __Y;
1080 } else {
1081 norm = 0;
1084 /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */
1085 n = x.used - 1;
1086 t = y.used - 1;
1088 /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */
1089 if ((res = mp_lshd (&y, n - t)) != MP_OKAY) { /* y = y*b**{n-t} */
1090 goto __Y;
1093 while (mp_cmp (&x, &y) != MP_LT) {
1094 ++(q.dp[n - t]);
1095 if ((res = mp_sub (&x, &y, &x)) != MP_OKAY) {
1096 goto __Y;
1100 /* reset y by shifting it back down */
1101 mp_rshd (&y, n - t);
1103 /* step 3. for i from n down to (t + 1) */
1104 for (i = n; i >= (t + 1); i--) {
1105 if (i > x.used) {
1106 continue;
1109 /* step 3.1 if xi == yt then set q{i-t-1} to b-1,
1110 * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */
1111 if (x.dp[i] == y.dp[t]) {
1112 q.dp[i - t - 1] = ((((mp_digit)1) << DIGIT_BIT) - 1);
1113 } else {
1114 mp_word tmp;
1115 tmp = ((mp_word) x.dp[i]) << ((mp_word) DIGIT_BIT);
1116 tmp |= ((mp_word) x.dp[i - 1]);
1117 tmp /= ((mp_word) y.dp[t]);
1118 if (tmp > (mp_word) MP_MASK)
1119 tmp = MP_MASK;
1120 q.dp[i - t - 1] = (mp_digit) (tmp & (mp_word) (MP_MASK));
1123 /* while (q{i-t-1} * (yt * b + y{t-1})) >
1124 xi * b**2 + xi-1 * b + xi-2
1126 do q{i-t-1} -= 1;
1128 q.dp[i - t - 1] = (q.dp[i - t - 1] + 1) & MP_MASK;
1129 do {
1130 q.dp[i - t - 1] = (q.dp[i - t - 1] - 1) & MP_MASK;
1132 /* find left hand */
1133 mp_zero (&t1);
1134 t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1];
1135 t1.dp[1] = y.dp[t];
1136 t1.used = 2;
1137 if ((res = mp_mul_d (&t1, q.dp[i - t - 1], &t1)) != MP_OKAY) {
1138 goto __Y;
1141 /* find right hand */
1142 t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2];
1143 t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1];
1144 t2.dp[2] = x.dp[i];
1145 t2.used = 3;
1146 } while (mp_cmp_mag(&t1, &t2) == MP_GT);
1148 /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */
1149 if ((res = mp_mul_d (&y, q.dp[i - t - 1], &t1)) != MP_OKAY) {
1150 goto __Y;
1153 if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
1154 goto __Y;
1157 if ((res = mp_sub (&x, &t1, &x)) != MP_OKAY) {
1158 goto __Y;
1161 /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */
1162 if (x.sign == MP_NEG) {
1163 if ((res = mp_copy (&y, &t1)) != MP_OKAY) {
1164 goto __Y;
1166 if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
1167 goto __Y;
1169 if ((res = mp_add (&x, &t1, &x)) != MP_OKAY) {
1170 goto __Y;
1173 q.dp[i - t - 1] = (q.dp[i - t - 1] - 1UL) & MP_MASK;
1177 /* now q is the quotient and x is the remainder
1178 * [which we have to normalize]
1181 /* get sign before writing to c */
1182 x.sign = x.used == 0 ? MP_ZPOS : a->sign;
1184 if (c != NULL) {
1185 mp_clamp (&q);
1186 mp_exch (&q, c);
1187 c->sign = neg;
1190 if (d != NULL) {
1191 mp_div_2d (&x, norm, &x, NULL);
1192 mp_exch (&x, d);
1195 res = MP_OKAY;
1197 __Y:mp_clear (&y);
1198 __X:mp_clear (&x);
1199 __T2:mp_clear (&t2);
1200 __T1:mp_clear (&t1);
1201 __Q:mp_clear (&q);
1202 return res;
1205 /* b = a/2 */
1206 int mp_div_2(const mp_int * a, mp_int * b)
1208 int x, res, oldused;
1210 /* copy */
1211 if (b->alloc < a->used) {
1212 if ((res = mp_grow (b, a->used)) != MP_OKAY) {
1213 return res;
1217 oldused = b->used;
1218 b->used = a->used;
1220 register mp_digit r, rr, *tmpa, *tmpb;
1222 /* source alias */
1223 tmpa = a->dp + b->used - 1;
1225 /* dest alias */
1226 tmpb = b->dp + b->used - 1;
1228 /* carry */
1229 r = 0;
1230 for (x = b->used - 1; x >= 0; x--) {
1231 /* get the carry for the next iteration */
1232 rr = *tmpa & 1;
1234 /* shift the current digit, add in carry and store */
1235 *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
1237 /* forward carry to next iteration */
1238 r = rr;
1241 /* zero excess digits */
1242 tmpb = b->dp + b->used;
1243 for (x = b->used; x < oldused; x++) {
1244 *tmpb++ = 0;
1247 b->sign = a->sign;
1248 mp_clamp (b);
1249 return MP_OKAY;
1252 /* shift right by a certain bit count (store quotient in c, optional remainder in d) */
1253 int mp_div_2d (const mp_int * a, int b, mp_int * c, mp_int * d)
1255 mp_digit D, r, rr;
1256 int x, res;
1257 mp_int t;
1260 /* if the shift count is <= 0 then we do no work */
1261 if (b <= 0) {
1262 res = mp_copy (a, c);
1263 if (d != NULL) {
1264 mp_zero (d);
1266 return res;
1269 if ((res = mp_init (&t)) != MP_OKAY) {
1270 return res;
1273 /* get the remainder */
1274 if (d != NULL) {
1275 if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) {
1276 mp_clear (&t);
1277 return res;
1281 /* copy */
1282 if ((res = mp_copy (a, c)) != MP_OKAY) {
1283 mp_clear (&t);
1284 return res;
1287 /* shift by as many digits in the bit count */
1288 if (b >= (int)DIGIT_BIT) {
1289 mp_rshd (c, b / DIGIT_BIT);
1292 /* shift any bit count < DIGIT_BIT */
1293 D = (mp_digit) (b % DIGIT_BIT);
1294 if (D != 0) {
1295 register mp_digit *tmpc, mask, shift;
1297 /* mask */
1298 mask = (((mp_digit)1) << D) - 1;
1300 /* shift for lsb */
1301 shift = DIGIT_BIT - D;
1303 /* alias */
1304 tmpc = c->dp + (c->used - 1);
1306 /* carry */
1307 r = 0;
1308 for (x = c->used - 1; x >= 0; x--) {
1309 /* get the lower bits of this word in a temp */
1310 rr = *tmpc & mask;
1312 /* shift the current word and mix in the carry bits from the previous word */
1313 *tmpc = (*tmpc >> D) | (r << shift);
1314 --tmpc;
1316 /* set the carry to the carry bits of the current word found above */
1317 r = rr;
1320 mp_clamp (c);
1321 if (d != NULL) {
1322 mp_exch (&t, d);
1324 mp_clear (&t);
1325 return MP_OKAY;
1328 static int s_is_power_of_two(mp_digit b, int *p)
1330 int x;
1332 for (x = 1; x < DIGIT_BIT; x++) {
1333 if (b == (((mp_digit)1)<<x)) {
1334 *p = x;
1335 return 1;
1338 return 0;
1341 /* single digit division (based on routine from MPI) */
1342 int mp_div_d (const mp_int * a, mp_digit b, mp_int * c, mp_digit * d)
1344 mp_int q;
1345 mp_word w;
1346 mp_digit t;
1347 int res, ix;
1349 /* cannot divide by zero */
1350 if (b == 0) {
1351 return MP_VAL;
1354 /* quick outs */
1355 if (b == 1 || mp_iszero(a) == 1) {
1356 if (d != NULL) {
1357 *d = 0;
1359 if (c != NULL) {
1360 return mp_copy(a, c);
1362 return MP_OKAY;
1365 /* power of two ? */
1366 if (s_is_power_of_two(b, &ix) == 1) {
1367 if (d != NULL) {
1368 *d = a->dp[0] & ((((mp_digit)1)<<ix) - 1);
1370 if (c != NULL) {
1371 return mp_div_2d(a, ix, c, NULL);
1373 return MP_OKAY;
1376 /* no easy answer [c'est la vie]. Just division */
1377 if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
1378 return res;
1381 q.used = a->used;
1382 q.sign = a->sign;
1383 w = 0;
1384 for (ix = a->used - 1; ix >= 0; ix--) {
1385 w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
1387 if (w >= b) {
1388 t = (mp_digit)(w / b);
1389 w -= ((mp_word)t) * ((mp_word)b);
1390 } else {
1391 t = 0;
1393 q.dp[ix] = (mp_digit)t;
1396 if (d != NULL) {
1397 *d = (mp_digit)w;
1400 if (c != NULL) {
1401 mp_clamp(&q);
1402 mp_exch(&q, c);
1404 mp_clear(&q);
1406 return res;
1409 /* reduce "x" in place modulo "n" using the Diminished Radix algorithm.
1411 * Based on algorithm from the paper
1413 * "Generating Efficient Primes for Discrete Log Cryptosystems"
1414 * Chae Hoon Lim, Pil Loong Lee,
1415 * POSTECH Information Research Laboratories
1417 * The modulus must be of a special format [see manual]
1419 * Has been modified to use algorithm 7.10 from the LTM book instead
1421 * Input x must be in the range 0 <= x <= (n-1)**2
1424 mp_dr_reduce (mp_int * x, const mp_int * n, mp_digit k)
1426 int err, i, m;
1427 mp_word r;
1428 mp_digit mu, *tmpx1, *tmpx2;
1430 /* m = digits in modulus */
1431 m = n->used;
1433 /* ensure that "x" has at least 2m digits */
1434 if (x->alloc < m + m) {
1435 if ((err = mp_grow (x, m + m)) != MP_OKAY) {
1436 return err;
1440 /* top of loop, this is where the code resumes if
1441 * another reduction pass is required.
1443 top:
1444 /* aliases for digits */
1445 /* alias for lower half of x */
1446 tmpx1 = x->dp;
1448 /* alias for upper half of x, or x/B**m */
1449 tmpx2 = x->dp + m;
1451 /* set carry to zero */
1452 mu = 0;
1454 /* compute (x mod B**m) + k * [x/B**m] inline and inplace */
1455 for (i = 0; i < m; i++) {
1456 r = ((mp_word)*tmpx2++) * ((mp_word)k) + *tmpx1 + mu;
1457 *tmpx1++ = (mp_digit)(r & MP_MASK);
1458 mu = (mp_digit)(r >> ((mp_word)DIGIT_BIT));
1461 /* set final carry */
1462 *tmpx1++ = mu;
1464 /* zero words above m */
1465 for (i = m + 1; i < x->used; i++) {
1466 *tmpx1++ = 0;
1469 /* clamp, sub and return */
1470 mp_clamp (x);
1472 /* if x >= n then subtract and reduce again
1473 * Each successive "recursion" makes the input smaller and smaller.
1475 if (mp_cmp_mag (x, n) != MP_LT) {
1476 s_mp_sub(x, n, x);
1477 goto top;
1479 return MP_OKAY;
1482 /* determines the setup value */
1483 void mp_dr_setup(const mp_int *a, mp_digit *d)
1485 /* the casts are required if DIGIT_BIT is one less than
1486 * the number of bits in a mp_digit [e.g. DIGIT_BIT==31]
1488 *d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) -
1489 ((mp_word)a->dp[0]));
1492 /* swap the elements of two integers, for cases where you can't simply swap the
1493 * mp_int pointers around
1495 void
1496 mp_exch (mp_int * a, mp_int * b)
1498 mp_int t;
1500 t = *a;
1501 *a = *b;
1502 *b = t;
1505 /* this is a shell function that calls either the normal or Montgomery
1506 * exptmod functions. Originally the call to the montgomery code was
1507 * embedded in the normal function but that wasted a lot of stack space
1508 * for nothing (since 99% of the time the Montgomery code would be called)
1510 int mp_exptmod (const mp_int * G, const mp_int * X, mp_int * P, mp_int * Y)
1512 int dr;
1514 /* modulus P must be positive */
1515 if (P->sign == MP_NEG) {
1516 return MP_VAL;
1519 /* if exponent X is negative we have to recurse */
1520 if (X->sign == MP_NEG) {
1521 mp_int tmpG, tmpX;
1522 int err;
1524 /* first compute 1/G mod P */
1525 if ((err = mp_init(&tmpG)) != MP_OKAY) {
1526 return err;
1528 if ((err = mp_invmod(G, P, &tmpG)) != MP_OKAY) {
1529 mp_clear(&tmpG);
1530 return err;
1533 /* now get |X| */
1534 if ((err = mp_init(&tmpX)) != MP_OKAY) {
1535 mp_clear(&tmpG);
1536 return err;
1538 if ((err = mp_abs(X, &tmpX)) != MP_OKAY) {
1539 mp_clear_multi(&tmpG, &tmpX, NULL);
1540 return err;
1543 /* and now compute (1/G)**|X| instead of G**X [X < 0] */
1544 err = mp_exptmod(&tmpG, &tmpX, P, Y);
1545 mp_clear_multi(&tmpG, &tmpX, NULL);
1546 return err;
1549 dr = 0;
1551 /* if the modulus is odd or dr != 0 use the fast method */
1552 if (mp_isodd (P) == 1 || dr != 0) {
1553 return mp_exptmod_fast (G, X, P, Y, dr);
1554 } else {
1555 /* otherwise use the generic Barrett reduction technique */
1556 return s_mp_exptmod (G, X, P, Y);
1560 /* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
1562 * Uses a left-to-right k-ary sliding window to compute the modular exponentiation.
1563 * The value of k changes based on the size of the exponent.
1565 * Uses Montgomery or Diminished Radix reduction [whichever appropriate]
1569 mp_exptmod_fast (const mp_int * G, const mp_int * X, mp_int * P, mp_int * Y, int redmode)
1571 mp_int M[256], res;
1572 mp_digit buf, mp;
1573 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
1575 /* use a pointer to the reduction algorithm. This allows us to use
1576 * one of many reduction algorithms without modding the guts of
1577 * the code with if statements everywhere.
1579 int (*redux)(mp_int*,const mp_int*,mp_digit);
1581 /* find window size */
1582 x = mp_count_bits (X);
1583 if (x <= 7) {
1584 winsize = 2;
1585 } else if (x <= 36) {
1586 winsize = 3;
1587 } else if (x <= 140) {
1588 winsize = 4;
1589 } else if (x <= 450) {
1590 winsize = 5;
1591 } else if (x <= 1303) {
1592 winsize = 6;
1593 } else if (x <= 3529) {
1594 winsize = 7;
1595 } else {
1596 winsize = 8;
1599 /* init M array */
1600 /* init first cell */
1601 if ((err = mp_init(&M[1])) != MP_OKAY) {
1602 return err;
1605 /* now init the second half of the array */
1606 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
1607 if ((err = mp_init(&M[x])) != MP_OKAY) {
1608 for (y = 1<<(winsize-1); y < x; y++) {
1609 mp_clear (&M[y]);
1611 mp_clear(&M[1]);
1612 return err;
1616 /* determine and setup reduction code */
1617 if (redmode == 0) {
1618 /* now setup montgomery */
1619 if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
1620 goto __M;
1623 /* automatically pick the comba one if available (saves quite a few calls/ifs) */
1624 if (((P->used * 2 + 1) < MP_WARRAY) &&
1625 P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
1626 redux = fast_mp_montgomery_reduce;
1627 } else {
1628 /* use slower baseline Montgomery method */
1629 redux = mp_montgomery_reduce;
1631 } else if (redmode == 1) {
1632 /* setup DR reduction for moduli of the form B**k - b */
1633 mp_dr_setup(P, &mp);
1634 redux = mp_dr_reduce;
1635 } else {
1636 /* setup DR reduction for moduli of the form 2**k - b */
1637 if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
1638 goto __M;
1640 redux = mp_reduce_2k;
1643 /* setup result */
1644 if ((err = mp_init (&res)) != MP_OKAY) {
1645 goto __M;
1648 /* create M table
1652 * The first half of the table is not computed though accept for M[0] and M[1]
1655 if (redmode == 0) {
1656 /* now we need R mod m */
1657 if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) {
1658 goto __RES;
1661 /* now set M[1] to G * R mod m */
1662 if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) {
1663 goto __RES;
1665 } else {
1666 mp_set(&res, 1);
1667 if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
1668 goto __RES;
1672 /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
1673 if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
1674 goto __RES;
1677 for (x = 0; x < (winsize - 1); x++) {
1678 if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) {
1679 goto __RES;
1681 if ((err = redux (&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) {
1682 goto __RES;
1686 /* create upper table */
1687 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
1688 if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
1689 goto __RES;
1691 if ((err = redux (&M[x], P, mp)) != MP_OKAY) {
1692 goto __RES;
1696 /* set initial mode and bit cnt */
1697 mode = 0;
1698 bitcnt = 1;
1699 buf = 0;
1700 digidx = X->used - 1;
1701 bitcpy = 0;
1702 bitbuf = 0;
1704 for (;;) {
1705 /* grab next digit as required */
1706 if (--bitcnt == 0) {
1707 /* if digidx == -1 we are out of digits so break */
1708 if (digidx == -1) {
1709 break;
1711 /* read next digit and reset bitcnt */
1712 buf = X->dp[digidx--];
1713 bitcnt = (int)DIGIT_BIT;
1716 /* grab the next msb from the exponent */
1717 y = (mp_digit)(buf >> (DIGIT_BIT - 1)) & 1;
1718 buf <<= (mp_digit)1;
1720 /* if the bit is zero and mode == 0 then we ignore it
1721 * These represent the leading zero bits before the first 1 bit
1722 * in the exponent. Technically this opt is not required but it
1723 * does lower the # of trivial squaring/reductions used
1725 if (mode == 0 && y == 0) {
1726 continue;
1729 /* if the bit is zero and mode == 1 then we square */
1730 if (mode == 1 && y == 0) {
1731 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
1732 goto __RES;
1734 if ((err = redux (&res, P, mp)) != MP_OKAY) {
1735 goto __RES;
1737 continue;
1740 /* else we add it to the window */
1741 bitbuf |= (y << (winsize - ++bitcpy));
1742 mode = 2;
1744 if (bitcpy == winsize) {
1745 /* ok window is filled so square as required and multiply */
1746 /* square first */
1747 for (x = 0; x < winsize; x++) {
1748 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
1749 goto __RES;
1751 if ((err = redux (&res, P, mp)) != MP_OKAY) {
1752 goto __RES;
1756 /* then multiply */
1757 if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
1758 goto __RES;
1760 if ((err = redux (&res, P, mp)) != MP_OKAY) {
1761 goto __RES;
1764 /* empty window and reset */
1765 bitcpy = 0;
1766 bitbuf = 0;
1767 mode = 1;
1771 /* if bits remain then square/multiply */
1772 if (mode == 2 && bitcpy > 0) {
1773 /* square then multiply if the bit is set */
1774 for (x = 0; x < bitcpy; x++) {
1775 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
1776 goto __RES;
1778 if ((err = redux (&res, P, mp)) != MP_OKAY) {
1779 goto __RES;
1782 /* get next bit of the window */
1783 bitbuf <<= 1;
1784 if ((bitbuf & (1 << winsize)) != 0) {
1785 /* then multiply */
1786 if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
1787 goto __RES;
1789 if ((err = redux (&res, P, mp)) != MP_OKAY) {
1790 goto __RES;
1796 if (redmode == 0) {
1797 /* fixup result if Montgomery reduction is used
1798 * recall that any value in a Montgomery system is
1799 * actually multiplied by R mod n. So we have
1800 * to reduce one more time to cancel out the factor
1801 * of R.
1803 if ((err = redux(&res, P, mp)) != MP_OKAY) {
1804 goto __RES;
1808 /* swap res with Y */
1809 mp_exch (&res, Y);
1810 err = MP_OKAY;
1811 __RES:mp_clear (&res);
1812 __M:
1813 mp_clear(&M[1]);
1814 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
1815 mp_clear (&M[x]);
1817 return err;
1820 /* Greatest Common Divisor using the binary method */
1821 int mp_gcd (const mp_int * a, const mp_int * b, mp_int * c)
1823 mp_int u, v;
1824 int k, u_lsb, v_lsb, res;
1826 /* either zero than gcd is the largest */
1827 if (mp_iszero (a) == 1 && mp_iszero (b) == 0) {
1828 return mp_abs (b, c);
1830 if (mp_iszero (a) == 0 && mp_iszero (b) == 1) {
1831 return mp_abs (a, c);
1834 /* optimized. At this point if a == 0 then
1835 * b must equal zero too
1837 if (mp_iszero (a) == 1) {
1838 mp_zero(c);
1839 return MP_OKAY;
1842 /* get copies of a and b we can modify */
1843 if ((res = mp_init_copy (&u, a)) != MP_OKAY) {
1844 return res;
1847 if ((res = mp_init_copy (&v, b)) != MP_OKAY) {
1848 goto __U;
1851 /* must be positive for the remainder of the algorithm */
1852 u.sign = v.sign = MP_ZPOS;
1854 /* B1. Find the common power of two for u and v */
1855 u_lsb = mp_cnt_lsb(&u);
1856 v_lsb = mp_cnt_lsb(&v);
1857 k = MIN(u_lsb, v_lsb);
1859 if (k > 0) {
1860 /* divide the power of two out */
1861 if ((res = mp_div_2d(&u, k, &u, NULL)) != MP_OKAY) {
1862 goto __V;
1865 if ((res = mp_div_2d(&v, k, &v, NULL)) != MP_OKAY) {
1866 goto __V;
1870 /* divide any remaining factors of two out */
1871 if (u_lsb != k) {
1872 if ((res = mp_div_2d(&u, u_lsb - k, &u, NULL)) != MP_OKAY) {
1873 goto __V;
1877 if (v_lsb != k) {
1878 if ((res = mp_div_2d(&v, v_lsb - k, &v, NULL)) != MP_OKAY) {
1879 goto __V;
1883 while (mp_iszero(&v) == 0) {
1884 /* make sure v is the largest */
1885 if (mp_cmp_mag(&u, &v) == MP_GT) {
1886 /* swap u and v to make sure v is >= u */
1887 mp_exch(&u, &v);
1890 /* subtract smallest from largest */
1891 if ((res = s_mp_sub(&v, &u, &v)) != MP_OKAY) {
1892 goto __V;
1895 /* Divide out all factors of two */
1896 if ((res = mp_div_2d(&v, mp_cnt_lsb(&v), &v, NULL)) != MP_OKAY) {
1897 goto __V;
1901 /* multiply by 2**k which we divided out at the beginning */
1902 if ((res = mp_mul_2d (&u, k, c)) != MP_OKAY) {
1903 goto __V;
1905 c->sign = MP_ZPOS;
1906 res = MP_OKAY;
1907 __V:mp_clear (&u);
1908 __U:mp_clear (&v);
1909 return res;
1912 /* get the lower 32-bits of an mp_int */
1913 unsigned long mp_get_int(const mp_int * a)
1915 int i;
1916 unsigned long res;
1918 if (a->used == 0) {
1919 return 0;
1922 /* get number of digits of the lsb we have to read */
1923 i = MIN(a->used,(int)((sizeof(unsigned long)*CHAR_BIT+DIGIT_BIT-1)/DIGIT_BIT))-1;
1925 /* get most significant digit of result */
1926 res = DIGIT(a,i);
1928 while (--i >= 0) {
1929 res = (res << DIGIT_BIT) | DIGIT(a,i);
1932 /* force result to 32-bits always so it is consistent on non 32-bit platforms */
1933 return res & 0xFFFFFFFFUL;
1936 /* grow as required */
1937 int mp_grow (mp_int * a, int size)
1939 int i;
1940 mp_digit *tmp;
1942 /* if the alloc size is smaller alloc more ram */
1943 if (a->alloc < size) {
1944 /* ensure there are always at least MP_PREC digits extra on top */
1945 size += (MP_PREC * 2) - (size % MP_PREC);
1947 /* reallocate the array a->dp
1949 * We store the return in a temporary variable
1950 * in case the operation failed we don't want
1951 * to overwrite the dp member of a.
1953 tmp = realloc (a->dp, sizeof (mp_digit) * size);
1954 if (tmp == NULL) {
1955 /* reallocation failed but "a" is still valid [can be freed] */
1956 return MP_MEM;
1959 /* reallocation succeeded so set a->dp */
1960 a->dp = tmp;
1962 /* zero excess digits */
1963 i = a->alloc;
1964 a->alloc = size;
1965 for (; i < a->alloc; i++) {
1966 a->dp[i] = 0;
1969 return MP_OKAY;
1972 /* init a new mp_int */
1973 int mp_init (mp_int * a)
1975 int i;
1977 /* allocate memory required and clear it */
1978 a->dp = malloc (sizeof (mp_digit) * MP_PREC);
1979 if (a->dp == NULL) {
1980 return MP_MEM;
1983 /* set the digits to zero */
1984 for (i = 0; i < MP_PREC; i++) {
1985 a->dp[i] = 0;
1988 /* set the used to zero, allocated digits to the default precision
1989 * and sign to positive */
1990 a->used = 0;
1991 a->alloc = MP_PREC;
1992 a->sign = MP_ZPOS;
1994 return MP_OKAY;
1997 /* creates "a" then copies b into it */
1998 int mp_init_copy (mp_int * a, const mp_int * b)
2000 int res;
2002 if ((res = mp_init (a)) != MP_OKAY) {
2003 return res;
2005 return mp_copy (b, a);
2008 int mp_init_multi(mp_int *mp, ...)
2010 mp_err res = MP_OKAY; /* Assume ok until proven otherwise */
2011 int n = 0; /* Number of ok inits */
2012 mp_int* cur_arg = mp;
2013 va_list args;
2015 va_start(args, mp); /* init args to next argument from caller */
2016 while (cur_arg != NULL) {
2017 if (mp_init(cur_arg) != MP_OKAY) {
2018 /* Oops - error! Back-track and mp_clear what we already
2019 succeeded in init-ing, then return error.
2021 va_list clean_args;
2023 /* end the current list */
2024 va_end(args);
2026 /* now start cleaning up */
2027 cur_arg = mp;
2028 va_start(clean_args, mp);
2029 while (n--) {
2030 mp_clear(cur_arg);
2031 cur_arg = va_arg(clean_args, mp_int*);
2033 va_end(clean_args);
2034 res = MP_MEM;
2035 break;
2037 n++;
2038 cur_arg = va_arg(args, mp_int*);
2040 va_end(args);
2041 return res; /* Assumed ok, if error flagged above. */
2044 /* init an mp_init for a given size */
2045 int mp_init_size (mp_int * a, int size)
2047 int x;
2049 /* pad size so there are always extra digits */
2050 size += (MP_PREC * 2) - (size % MP_PREC);
2052 /* alloc mem */
2053 a->dp = malloc (sizeof (mp_digit) * size);
2054 if (a->dp == NULL) {
2055 return MP_MEM;
2058 /* set the members */
2059 a->used = 0;
2060 a->alloc = size;
2061 a->sign = MP_ZPOS;
2063 /* zero the digits */
2064 for (x = 0; x < size; x++) {
2065 a->dp[x] = 0;
2068 return MP_OKAY;
2071 /* hac 14.61, pp608 */
2072 int mp_invmod (const mp_int * a, mp_int * b, mp_int * c)
2074 /* b cannot be negative */
2075 if (b->sign == MP_NEG || mp_iszero(b) == 1) {
2076 return MP_VAL;
2079 /* if the modulus is odd we can use a faster routine instead */
2080 if (mp_isodd (b) == 1) {
2081 return fast_mp_invmod (a, b, c);
2084 return mp_invmod_slow(a, b, c);
2087 /* hac 14.61, pp608 */
2088 int mp_invmod_slow (const mp_int * a, mp_int * b, mp_int * c)
2090 mp_int x, y, u, v, A, B, C, D;
2091 int res;
2093 /* b cannot be negative */
2094 if (b->sign == MP_NEG || mp_iszero(b) == 1) {
2095 return MP_VAL;
2098 /* init temps */
2099 if ((res = mp_init_multi(&x, &y, &u, &v,
2100 &A, &B, &C, &D, NULL)) != MP_OKAY) {
2101 return res;
2104 /* x = a, y = b */
2105 if ((res = mp_copy (a, &x)) != MP_OKAY) {
2106 goto __ERR;
2108 if ((res = mp_copy (b, &y)) != MP_OKAY) {
2109 goto __ERR;
2112 /* 2. [modified] if x,y are both even then return an error! */
2113 if (mp_iseven (&x) == 1 && mp_iseven (&y) == 1) {
2114 res = MP_VAL;
2115 goto __ERR;
2118 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
2119 if ((res = mp_copy (&x, &u)) != MP_OKAY) {
2120 goto __ERR;
2122 if ((res = mp_copy (&y, &v)) != MP_OKAY) {
2123 goto __ERR;
2125 mp_set (&A, 1);
2126 mp_set (&D, 1);
2128 top:
2129 /* 4. while u is even do */
2130 while (mp_iseven (&u) == 1) {
2131 /* 4.1 u = u/2 */
2132 if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
2133 goto __ERR;
2135 /* 4.2 if A or B is odd then */
2136 if (mp_isodd (&A) == 1 || mp_isodd (&B) == 1) {
2137 /* A = (A+y)/2, B = (B-x)/2 */
2138 if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
2139 goto __ERR;
2141 if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
2142 goto __ERR;
2145 /* A = A/2, B = B/2 */
2146 if ((res = mp_div_2 (&A, &A)) != MP_OKAY) {
2147 goto __ERR;
2149 if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
2150 goto __ERR;
2154 /* 5. while v is even do */
2155 while (mp_iseven (&v) == 1) {
2156 /* 5.1 v = v/2 */
2157 if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
2158 goto __ERR;
2160 /* 5.2 if C or D is odd then */
2161 if (mp_isodd (&C) == 1 || mp_isodd (&D) == 1) {
2162 /* C = (C+y)/2, D = (D-x)/2 */
2163 if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
2164 goto __ERR;
2166 if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
2167 goto __ERR;
2170 /* C = C/2, D = D/2 */
2171 if ((res = mp_div_2 (&C, &C)) != MP_OKAY) {
2172 goto __ERR;
2174 if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
2175 goto __ERR;
2179 /* 6. if u >= v then */
2180 if (mp_cmp (&u, &v) != MP_LT) {
2181 /* u = u - v, A = A - C, B = B - D */
2182 if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
2183 goto __ERR;
2186 if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) {
2187 goto __ERR;
2190 if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
2191 goto __ERR;
2193 } else {
2194 /* v - v - u, C = C - A, D = D - B */
2195 if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
2196 goto __ERR;
2199 if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) {
2200 goto __ERR;
2203 if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
2204 goto __ERR;
2208 /* if not zero goto step 4 */
2209 if (mp_iszero (&u) == 0)
2210 goto top;
2212 /* now a = C, b = D, gcd == g*v */
2214 /* if v != 1 then there is no inverse */
2215 if (mp_cmp_d (&v, 1) != MP_EQ) {
2216 res = MP_VAL;
2217 goto __ERR;
2220 /* if its too low */
2221 while (mp_cmp_d(&C, 0) == MP_LT) {
2222 if ((res = mp_add(&C, b, &C)) != MP_OKAY) {
2223 goto __ERR;
2227 /* too big */
2228 while (mp_cmp_mag(&C, b) != MP_LT) {
2229 if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {
2230 goto __ERR;
2234 /* C is now the inverse */
2235 mp_exch (&C, c);
2236 res = MP_OKAY;
2237 __ERR:mp_clear_multi (&x, &y, &u, &v, &A, &B, &C, &D, NULL);
2238 return res;
2241 /* c = |a| * |b| using Karatsuba Multiplication using
2242 * three half size multiplications
2244 * Let B represent the radix [e.g. 2**DIGIT_BIT] and
2245 * let n represent half of the number of digits in
2246 * the min(a,b)
2248 * a = a1 * B**n + a0
2249 * b = b1 * B**n + b0
2251 * Then, a * b =>
2252 a1b1 * B**2n + ((a1 - a0)(b1 - b0) + a0b0 + a1b1) * B + a0b0
2254 * Note that a1b1 and a0b0 are used twice and only need to be
2255 * computed once. So in total three half size (half # of
2256 * digit) multiplications are performed, a0b0, a1b1 and
2257 * (a1-b1)(a0-b0)
2259 * Note that a multiplication of half the digits requires
2260 * 1/4th the number of single precision multiplications so in
2261 * total after one call 25% of the single precision multiplications
2262 * are saved. Note also that the call to mp_mul can end up back
2263 * in this function if the a0, a1, b0, or b1 are above the threshold.
2264 * This is known as divide-and-conquer and leads to the famous
2265 * O(N**lg(3)) or O(N**1.584) work which is asymptopically lower than
2266 * the standard O(N**2) that the baseline/comba methods use.
2267 * Generally though the overhead of this method doesn't pay off
2268 * until a certain size (N ~ 80) is reached.
2270 int mp_karatsuba_mul (const mp_int * a, const mp_int * b, mp_int * c)
2272 mp_int x0, x1, y0, y1, t1, x0y0, x1y1;
2273 int B, err;
2275 /* default the return code to an error */
2276 err = MP_MEM;
2278 /* min # of digits */
2279 B = MIN (a->used, b->used);
2281 /* now divide in two */
2282 B = B >> 1;
2284 /* init copy all the temps */
2285 if (mp_init_size (&x0, B) != MP_OKAY)
2286 goto ERR;
2287 if (mp_init_size (&x1, a->used - B) != MP_OKAY)
2288 goto X0;
2289 if (mp_init_size (&y0, B) != MP_OKAY)
2290 goto X1;
2291 if (mp_init_size (&y1, b->used - B) != MP_OKAY)
2292 goto Y0;
2294 /* init temps */
2295 if (mp_init_size (&t1, B * 2) != MP_OKAY)
2296 goto Y1;
2297 if (mp_init_size (&x0y0, B * 2) != MP_OKAY)
2298 goto T1;
2299 if (mp_init_size (&x1y1, B * 2) != MP_OKAY)
2300 goto X0Y0;
2302 /* now shift the digits */
2303 x0.used = y0.used = B;
2304 x1.used = a->used - B;
2305 y1.used = b->used - B;
2308 register int x;
2309 register mp_digit *tmpa, *tmpb, *tmpx, *tmpy;
2311 /* we copy the digits directly instead of using higher level functions
2312 * since we also need to shift the digits
2314 tmpa = a->dp;
2315 tmpb = b->dp;
2317 tmpx = x0.dp;
2318 tmpy = y0.dp;
2319 for (x = 0; x < B; x++) {
2320 *tmpx++ = *tmpa++;
2321 *tmpy++ = *tmpb++;
2324 tmpx = x1.dp;
2325 for (x = B; x < a->used; x++) {
2326 *tmpx++ = *tmpa++;
2329 tmpy = y1.dp;
2330 for (x = B; x < b->used; x++) {
2331 *tmpy++ = *tmpb++;
2335 /* only need to clamp the lower words since by definition the
2336 * upper words x1/y1 must have a known number of digits
2338 mp_clamp (&x0);
2339 mp_clamp (&y0);
2341 /* now calc the products x0y0 and x1y1 */
2342 /* after this x0 is no longer required, free temp [x0==t2]! */
2343 if (mp_mul (&x0, &y0, &x0y0) != MP_OKAY)
2344 goto X1Y1; /* x0y0 = x0*y0 */
2345 if (mp_mul (&x1, &y1, &x1y1) != MP_OKAY)
2346 goto X1Y1; /* x1y1 = x1*y1 */
2348 /* now calc x1-x0 and y1-y0 */
2349 if (mp_sub (&x1, &x0, &t1) != MP_OKAY)
2350 goto X1Y1; /* t1 = x1 - x0 */
2351 if (mp_sub (&y1, &y0, &x0) != MP_OKAY)
2352 goto X1Y1; /* t2 = y1 - y0 */
2353 if (mp_mul (&t1, &x0, &t1) != MP_OKAY)
2354 goto X1Y1; /* t1 = (x1 - x0) * (y1 - y0) */
2356 /* add x0y0 */
2357 if (mp_add (&x0y0, &x1y1, &x0) != MP_OKAY)
2358 goto X1Y1; /* t2 = x0y0 + x1y1 */
2359 if (mp_sub (&x0, &t1, &t1) != MP_OKAY)
2360 goto X1Y1; /* t1 = x0y0 + x1y1 - (x1-x0)*(y1-y0) */
2362 /* shift by B */
2363 if (mp_lshd (&t1, B) != MP_OKAY)
2364 goto X1Y1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */
2365 if (mp_lshd (&x1y1, B * 2) != MP_OKAY)
2366 goto X1Y1; /* x1y1 = x1y1 << 2*B */
2368 if (mp_add (&x0y0, &t1, &t1) != MP_OKAY)
2369 goto X1Y1; /* t1 = x0y0 + t1 */
2370 if (mp_add (&t1, &x1y1, c) != MP_OKAY)
2371 goto X1Y1; /* t1 = x0y0 + t1 + x1y1 */
2373 /* Algorithm succeeded set the return code to MP_OKAY */
2374 err = MP_OKAY;
2376 X1Y1:mp_clear (&x1y1);
2377 X0Y0:mp_clear (&x0y0);
2378 T1:mp_clear (&t1);
2379 Y1:mp_clear (&y1);
2380 Y0:mp_clear (&y0);
2381 X1:mp_clear (&x1);
2382 X0:mp_clear (&x0);
2383 ERR:
2384 return err;
2387 /* Karatsuba squaring, computes b = a*a using three
2388 * half size squarings
2390 * See comments of karatsuba_mul for details. It
2391 * is essentially the same algorithm but merely
2392 * tuned to perform recursive squarings.
2394 int mp_karatsuba_sqr (const mp_int * a, mp_int * b)
2396 mp_int x0, x1, t1, t2, x0x0, x1x1;
2397 int B, err;
2399 err = MP_MEM;
2401 /* min # of digits */
2402 B = a->used;
2404 /* now divide in two */
2405 B = B >> 1;
2407 /* init copy all the temps */
2408 if (mp_init_size (&x0, B) != MP_OKAY)
2409 goto ERR;
2410 if (mp_init_size (&x1, a->used - B) != MP_OKAY)
2411 goto X0;
2413 /* init temps */
2414 if (mp_init_size (&t1, a->used * 2) != MP_OKAY)
2415 goto X1;
2416 if (mp_init_size (&t2, a->used * 2) != MP_OKAY)
2417 goto T1;
2418 if (mp_init_size (&x0x0, B * 2) != MP_OKAY)
2419 goto T2;
2420 if (mp_init_size (&x1x1, (a->used - B) * 2) != MP_OKAY)
2421 goto X0X0;
2424 register int x;
2425 register mp_digit *dst, *src;
2427 src = a->dp;
2429 /* now shift the digits */
2430 dst = x0.dp;
2431 for (x = 0; x < B; x++) {
2432 *dst++ = *src++;
2435 dst = x1.dp;
2436 for (x = B; x < a->used; x++) {
2437 *dst++ = *src++;
2441 x0.used = B;
2442 x1.used = a->used - B;
2444 mp_clamp (&x0);
2446 /* now calc the products x0*x0 and x1*x1 */
2447 if (mp_sqr (&x0, &x0x0) != MP_OKAY)
2448 goto X1X1; /* x0x0 = x0*x0 */
2449 if (mp_sqr (&x1, &x1x1) != MP_OKAY)
2450 goto X1X1; /* x1x1 = x1*x1 */
2452 /* now calc (x1-x0)**2 */
2453 if (mp_sub (&x1, &x0, &t1) != MP_OKAY)
2454 goto X1X1; /* t1 = x1 - x0 */
2455 if (mp_sqr (&t1, &t1) != MP_OKAY)
2456 goto X1X1; /* t1 = (x1 - x0) * (x1 - x0) */
2458 /* add x0y0 */
2459 if (s_mp_add (&x0x0, &x1x1, &t2) != MP_OKAY)
2460 goto X1X1; /* t2 = x0x0 + x1x1 */
2461 if (mp_sub (&t2, &t1, &t1) != MP_OKAY)
2462 goto X1X1; /* t1 = x0x0 + x1x1 - (x1-x0)*(x1-x0) */
2464 /* shift by B */
2465 if (mp_lshd (&t1, B) != MP_OKAY)
2466 goto X1X1; /* t1 = (x0x0 + x1x1 - (x1-x0)*(x1-x0))<<B */
2467 if (mp_lshd (&x1x1, B * 2) != MP_OKAY)
2468 goto X1X1; /* x1x1 = x1x1 << 2*B */
2470 if (mp_add (&x0x0, &t1, &t1) != MP_OKAY)
2471 goto X1X1; /* t1 = x0x0 + t1 */
2472 if (mp_add (&t1, &x1x1, b) != MP_OKAY)
2473 goto X1X1; /* t1 = x0x0 + t1 + x1x1 */
2475 err = MP_OKAY;
2477 X1X1:mp_clear (&x1x1);
2478 X0X0:mp_clear (&x0x0);
2479 T2:mp_clear (&t2);
2480 T1:mp_clear (&t1);
2481 X1:mp_clear (&x1);
2482 X0:mp_clear (&x0);
2483 ERR:
2484 return err;
2487 /* computes least common multiple as |a*b|/(a, b) */
2488 int mp_lcm (const mp_int * a, const mp_int * b, mp_int * c)
2490 int res;
2491 mp_int t1, t2;
2494 if ((res = mp_init_multi (&t1, &t2, NULL)) != MP_OKAY) {
2495 return res;
2498 /* t1 = get the GCD of the two inputs */
2499 if ((res = mp_gcd (a, b, &t1)) != MP_OKAY) {
2500 goto __T;
2503 /* divide the smallest by the GCD */
2504 if (mp_cmp_mag(a, b) == MP_LT) {
2505 /* store quotient in t2 such that t2 * b is the LCM */
2506 if ((res = mp_div(a, &t1, &t2, NULL)) != MP_OKAY) {
2507 goto __T;
2509 res = mp_mul(b, &t2, c);
2510 } else {
2511 /* store quotient in t2 such that t2 * a is the LCM */
2512 if ((res = mp_div(b, &t1, &t2, NULL)) != MP_OKAY) {
2513 goto __T;
2515 res = mp_mul(a, &t2, c);
2518 /* fix the sign to positive */
2519 c->sign = MP_ZPOS;
2521 __T:
2522 mp_clear_multi (&t1, &t2, NULL);
2523 return res;
2526 /* shift left a certain amount of digits */
2527 int mp_lshd (mp_int * a, int b)
2529 int x, res;
2531 /* if its less than zero return */
2532 if (b <= 0) {
2533 return MP_OKAY;
2536 /* grow to fit the new digits */
2537 if (a->alloc < a->used + b) {
2538 if ((res = mp_grow (a, a->used + b)) != MP_OKAY) {
2539 return res;
2544 register mp_digit *top, *bottom;
2546 /* increment the used by the shift amount then copy upwards */
2547 a->used += b;
2549 /* top */
2550 top = a->dp + a->used - 1;
2552 /* base */
2553 bottom = a->dp + a->used - 1 - b;
2555 /* much like mp_rshd this is implemented using a sliding window
2556 * except the window goes the otherway around. Copying from
2557 * the bottom to the top. see bn_mp_rshd.c for more info.
2559 for (x = a->used - 1; x >= b; x--) {
2560 *top-- = *bottom--;
2563 /* zero the lower digits */
2564 top = a->dp;
2565 for (x = 0; x < b; x++) {
2566 *top++ = 0;
2569 return MP_OKAY;
2572 /* c = a mod b, 0 <= c < b */
2574 mp_mod (const mp_int * a, mp_int * b, mp_int * c)
2576 mp_int t;
2577 int res;
2579 if ((res = mp_init (&t)) != MP_OKAY) {
2580 return res;
2583 if ((res = mp_div (a, b, NULL, &t)) != MP_OKAY) {
2584 mp_clear (&t);
2585 return res;
2588 if (t.sign != b->sign) {
2589 res = mp_add (b, &t, c);
2590 } else {
2591 res = MP_OKAY;
2592 mp_exch (&t, c);
2595 mp_clear (&t);
2596 return res;
2599 /* calc a value mod 2**b */
2601 mp_mod_2d (const mp_int * a, int b, mp_int * c)
2603 int x, res;
2605 /* if b is <= 0 then zero the int */
2606 if (b <= 0) {
2607 mp_zero (c);
2608 return MP_OKAY;
2611 /* if the modulus is larger than the value than return */
2612 if (b > (int) (a->used * DIGIT_BIT)) {
2613 res = mp_copy (a, c);
2614 return res;
2617 /* copy */
2618 if ((res = mp_copy (a, c)) != MP_OKAY) {
2619 return res;
2622 /* zero digits above the last digit of the modulus */
2623 for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
2624 c->dp[x] = 0;
2626 /* clear the digit that is not completely outside/inside the modulus */
2627 c->dp[b / DIGIT_BIT] &=
2628 (mp_digit) ((((mp_digit) 1) << (((mp_digit) b) % DIGIT_BIT)) - ((mp_digit) 1));
2629 mp_clamp (c);
2630 return MP_OKAY;
2634 mp_mod_d (const mp_int * a, mp_digit b, mp_digit * c)
2636 return mp_div_d(a, b, NULL, c);
2640 * shifts with subtractions when the result is greater than b.
2642 * The method is slightly modified to shift B unconditionally up to just under
2643 * the leading bit of b. This saves a lot of multiple precision shifting.
2645 int mp_montgomery_calc_normalization (mp_int * a, const mp_int * b)
2647 int x, bits, res;
2649 /* how many bits of last digit does b use */
2650 bits = mp_count_bits (b) % DIGIT_BIT;
2653 if (b->used > 1) {
2654 if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) {
2655 return res;
2657 } else {
2658 mp_set(a, 1);
2659 bits = 1;
2663 /* now compute C = A * B mod b */
2664 for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
2665 if ((res = mp_mul_2 (a, a)) != MP_OKAY) {
2666 return res;
2668 if (mp_cmp_mag (a, b) != MP_LT) {
2669 if ((res = s_mp_sub (a, b, a)) != MP_OKAY) {
2670 return res;
2675 return MP_OKAY;
2678 /* computes xR**-1 == x (mod N) via Montgomery Reduction */
2680 mp_montgomery_reduce (mp_int * x, const mp_int * n, mp_digit rho)
2682 int ix, res, digs;
2683 mp_digit mu;
2685 /* can the fast reduction [comba] method be used?
2687 * Note that unlike in mul you're safely allowed *less*
2688 * than the available columns [255 per default] since carries
2689 * are fixed up in the inner loop.
2691 digs = n->used * 2 + 1;
2692 if ((digs < MP_WARRAY) &&
2693 n->used <
2694 (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
2695 return fast_mp_montgomery_reduce (x, n, rho);
2698 /* grow the input as required */
2699 if (x->alloc < digs) {
2700 if ((res = mp_grow (x, digs)) != MP_OKAY) {
2701 return res;
2704 x->used = digs;
2706 for (ix = 0; ix < n->used; ix++) {
2707 /* mu = ai * rho mod b
2709 * The value of rho must be precalculated via
2710 * montgomery_setup() such that
2711 * it equals -1/n0 mod b this allows the
2712 * following inner loop to reduce the
2713 * input one digit at a time
2715 mu = (mp_digit) (((mp_word)x->dp[ix]) * ((mp_word)rho) & MP_MASK);
2717 /* a = a + mu * m * b**i */
2719 register int iy;
2720 register mp_digit *tmpn, *tmpx, u;
2721 register mp_word r;
2723 /* alias for digits of the modulus */
2724 tmpn = n->dp;
2726 /* alias for the digits of x [the input] */
2727 tmpx = x->dp + ix;
2729 /* set the carry to zero */
2730 u = 0;
2732 /* Multiply and add in place */
2733 for (iy = 0; iy < n->used; iy++) {
2734 /* compute product and sum */
2735 r = ((mp_word)mu) * ((mp_word)*tmpn++) +
2736 ((mp_word) u) + ((mp_word) * tmpx);
2738 /* get carry */
2739 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
2741 /* fix digit */
2742 *tmpx++ = (mp_digit)(r & ((mp_word) MP_MASK));
2744 /* At this point the ix'th digit of x should be zero */
2747 /* propagate carries upwards as required*/
2748 while (u) {
2749 *tmpx += u;
2750 u = *tmpx >> DIGIT_BIT;
2751 *tmpx++ &= MP_MASK;
2756 /* at this point the n.used'th least
2757 * significant digits of x are all zero
2758 * which means we can shift x to the
2759 * right by n.used digits and the
2760 * residue is unchanged.
2763 /* x = x/b**n.used */
2764 mp_clamp(x);
2765 mp_rshd (x, n->used);
2767 /* if x >= n then x = x - n */
2768 if (mp_cmp_mag (x, n) != MP_LT) {
2769 return s_mp_sub (x, n, x);
2772 return MP_OKAY;
2775 /* setups the montgomery reduction stuff */
2777 mp_montgomery_setup (const mp_int * n, mp_digit * rho)
2779 mp_digit x, b;
2781 /* fast inversion mod 2**k
2783 * Based on the fact that
2785 * XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n)
2786 * => 2*X*A - X*X*A*A = 1
2787 * => 2*(1) - (1) = 1
2789 b = n->dp[0];
2791 if ((b & 1) == 0) {
2792 return MP_VAL;
2795 x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
2796 x *= 2 - b * x; /* here x*a==1 mod 2**8 */
2797 x *= 2 - b * x; /* here x*a==1 mod 2**16 */
2798 x *= 2 - b * x; /* here x*a==1 mod 2**32 */
2800 /* rho = -1/m mod b */
2801 *rho = (((mp_word)1 << ((mp_word) DIGIT_BIT)) - x) & MP_MASK;
2803 return MP_OKAY;
2806 /* high level multiplication (handles sign) */
2807 int mp_mul (const mp_int * a, const mp_int * b, mp_int * c)
2809 int res, neg;
2810 neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
2812 /* use Karatsuba? */
2813 if (MIN (a->used, b->used) >= KARATSUBA_MUL_CUTOFF) {
2814 res = mp_karatsuba_mul (a, b, c);
2815 } else
2817 /* can we use the fast multiplier?
2819 * The fast multiplier can be used if the output will
2820 * have less than MP_WARRAY digits and the number of
2821 * digits won't affect carry propagation
2823 int digs = a->used + b->used + 1;
2825 if ((digs < MP_WARRAY) &&
2826 MIN(a->used, b->used) <=
2827 (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
2828 res = fast_s_mp_mul_digs (a, b, c, digs);
2829 } else
2830 res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */
2832 c->sign = (c->used > 0) ? neg : MP_ZPOS;
2833 return res;
2836 /* b = a*2 */
2837 int mp_mul_2(const mp_int * a, mp_int * b)
2839 int x, res, oldused;
2841 /* grow to accommodate result */
2842 if (b->alloc < a->used + 1) {
2843 if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) {
2844 return res;
2848 oldused = b->used;
2849 b->used = a->used;
2852 register mp_digit r, rr, *tmpa, *tmpb;
2854 /* alias for source */
2855 tmpa = a->dp;
2857 /* alias for dest */
2858 tmpb = b->dp;
2860 /* carry */
2861 r = 0;
2862 for (x = 0; x < a->used; x++) {
2864 /* get what will be the *next* carry bit from the
2865 * MSB of the current digit
2867 rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1));
2869 /* now shift up this digit, add in the carry [from the previous] */
2870 *tmpb++ = ((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK;
2872 /* copy the carry that would be from the source
2873 * digit into the next iteration
2875 r = rr;
2878 /* new leading digit? */
2879 if (r != 0) {
2880 /* add a MSB which is always 1 at this point */
2881 *tmpb = 1;
2882 ++(b->used);
2885 /* now zero any excess digits on the destination
2886 * that we didn't write to
2888 tmpb = b->dp + b->used;
2889 for (x = b->used; x < oldused; x++) {
2890 *tmpb++ = 0;
2893 b->sign = a->sign;
2894 return MP_OKAY;
2897 /* shift left by a certain bit count */
2898 int mp_mul_2d (const mp_int * a, int b, mp_int * c)
2900 mp_digit d;
2901 int res;
2903 /* copy */
2904 if (a != c) {
2905 if ((res = mp_copy (a, c)) != MP_OKAY) {
2906 return res;
2910 if (c->alloc < (int)(c->used + b/DIGIT_BIT + 1)) {
2911 if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) {
2912 return res;
2916 /* shift by as many digits in the bit count */
2917 if (b >= (int)DIGIT_BIT) {
2918 if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) {
2919 return res;
2923 /* shift any bit count < DIGIT_BIT */
2924 d = (mp_digit) (b % DIGIT_BIT);
2925 if (d != 0) {
2926 register mp_digit *tmpc, shift, mask, r, rr;
2927 register int x;
2929 /* bitmask for carries */
2930 mask = (((mp_digit)1) << d) - 1;
2932 /* shift for msbs */
2933 shift = DIGIT_BIT - d;
2935 /* alias */
2936 tmpc = c->dp;
2938 /* carry */
2939 r = 0;
2940 for (x = 0; x < c->used; x++) {
2941 /* get the higher bits of the current word */
2942 rr = (*tmpc >> shift) & mask;
2944 /* shift the current word and OR in the carry */
2945 *tmpc = ((*tmpc << d) | r) & MP_MASK;
2946 ++tmpc;
2948 /* set the carry to the carry bits of the current word */
2949 r = rr;
2952 /* set final carry */
2953 if (r != 0) {
2954 c->dp[(c->used)++] = r;
2957 mp_clamp (c);
2958 return MP_OKAY;
2961 /* multiply by a digit */
2963 mp_mul_d (const mp_int * a, mp_digit b, mp_int * c)
2965 mp_digit u, *tmpa, *tmpc;
2966 mp_word r;
2967 int ix, res, olduse;
2969 /* make sure c is big enough to hold a*b */
2970 if (c->alloc < a->used + 1) {
2971 if ((res = mp_grow (c, a->used + 1)) != MP_OKAY) {
2972 return res;
2976 /* get the original destinations used count */
2977 olduse = c->used;
2979 /* set the sign */
2980 c->sign = a->sign;
2982 /* alias for a->dp [source] */
2983 tmpa = a->dp;
2985 /* alias for c->dp [dest] */
2986 tmpc = c->dp;
2988 /* zero carry */
2989 u = 0;
2991 /* compute columns */
2992 for (ix = 0; ix < a->used; ix++) {
2993 /* compute product and carry sum for this term */
2994 r = ((mp_word) u) + ((mp_word)*tmpa++) * ((mp_word)b);
2996 /* mask off higher bits to get a single digit */
2997 *tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK));
2999 /* send carry into next iteration */
3000 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
3003 /* store final carry [if any] */
3004 *tmpc++ = u;
3006 /* now zero digits above the top */
3007 while (ix++ < olduse) {
3008 *tmpc++ = 0;
3011 /* set used count */
3012 c->used = a->used + 1;
3013 mp_clamp(c);
3015 return MP_OKAY;
3018 /* d = a * b (mod c) */
3020 mp_mulmod (const mp_int * a, const mp_int * b, mp_int * c, mp_int * d)
3022 int res;
3023 mp_int t;
3025 if ((res = mp_init (&t)) != MP_OKAY) {
3026 return res;
3029 if ((res = mp_mul (a, b, &t)) != MP_OKAY) {
3030 mp_clear (&t);
3031 return res;
3033 res = mp_mod (&t, c, d);
3034 mp_clear (&t);
3035 return res;
3038 /* determines if an integers is divisible by one
3039 * of the first PRIME_SIZE primes or not
3041 * sets result to 0 if not, 1 if yes
3043 int mp_prime_is_divisible (const mp_int * a, int *result)
3045 int err, ix;
3046 mp_digit res;
3048 /* default to not */
3049 *result = MP_NO;
3051 for (ix = 0; ix < PRIME_SIZE; ix++) {
3052 /* what is a mod __prime_tab[ix] */
3053 if ((err = mp_mod_d (a, __prime_tab[ix], &res)) != MP_OKAY) {
3054 return err;
3057 /* is the residue zero? */
3058 if (res == 0) {
3059 *result = MP_YES;
3060 return MP_OKAY;
3064 return MP_OKAY;
3067 /* performs a variable number of rounds of Miller-Rabin
3069 * Probability of error after t rounds is no more than
3072 * Sets result to 1 if probably prime, 0 otherwise
3074 int mp_prime_is_prime (mp_int * a, int t, int *result)
3076 mp_int b;
3077 int ix, err, res;
3079 /* default to no */
3080 *result = MP_NO;
3082 /* valid value of t? */
3083 if (t <= 0 || t > PRIME_SIZE) {
3084 return MP_VAL;
3087 /* is the input equal to one of the primes in the table? */
3088 for (ix = 0; ix < PRIME_SIZE; ix++) {
3089 if (mp_cmp_d(a, __prime_tab[ix]) == MP_EQ) {
3090 *result = 1;
3091 return MP_OKAY;
3095 /* first perform trial division */
3096 if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) {
3097 return err;
3100 /* return if it was trivially divisible */
3101 if (res == MP_YES) {
3102 return MP_OKAY;
3105 /* now perform the miller-rabin rounds */
3106 if ((err = mp_init (&b)) != MP_OKAY) {
3107 return err;
3110 for (ix = 0; ix < t; ix++) {
3111 /* set the prime */
3112 mp_set (&b, __prime_tab[ix]);
3114 if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) {
3115 goto __B;
3118 if (res == MP_NO) {
3119 goto __B;
3123 /* passed the test */
3124 *result = MP_YES;
3125 __B:mp_clear (&b);
3126 return err;
3129 /* Miller-Rabin test of "a" to the base of "b" as described in
3130 * HAC pp. 139 Algorithm 4.24
3132 * Sets result to 0 if definitely composite or 1 if probably prime.
3133 * Randomly the chance of error is no more than 1/4 and often
3134 * very much lower.
3136 int mp_prime_miller_rabin (mp_int * a, const mp_int * b, int *result)
3138 mp_int n1, y, r;
3139 int s, j, err;
3141 /* default */
3142 *result = MP_NO;
3144 /* ensure b > 1 */
3145 if (mp_cmp_d(b, 1) != MP_GT) {
3146 return MP_VAL;
3149 /* get n1 = a - 1 */
3150 if ((err = mp_init_copy (&n1, a)) != MP_OKAY) {
3151 return err;
3153 if ((err = mp_sub_d (&n1, 1, &n1)) != MP_OKAY) {
3154 goto __N1;
3157 /* set 2**s * r = n1 */
3158 if ((err = mp_init_copy (&r, &n1)) != MP_OKAY) {
3159 goto __N1;
3162 /* count the number of least significant bits
3163 * which are zero
3165 s = mp_cnt_lsb(&r);
3167 /* now divide n - 1 by 2**s */
3168 if ((err = mp_div_2d (&r, s, &r, NULL)) != MP_OKAY) {
3169 goto __R;
3172 /* compute y = b**r mod a */
3173 if ((err = mp_init (&y)) != MP_OKAY) {
3174 goto __R;
3176 if ((err = mp_exptmod (b, &r, a, &y)) != MP_OKAY) {
3177 goto __Y;
3180 /* if y != 1 and y != n1 do */
3181 if (mp_cmp_d (&y, 1) != MP_EQ && mp_cmp (&y, &n1) != MP_EQ) {
3182 j = 1;
3183 /* while j <= s-1 and y != n1 */
3184 while ((j <= (s - 1)) && mp_cmp (&y, &n1) != MP_EQ) {
3185 if ((err = mp_sqrmod (&y, a, &y)) != MP_OKAY) {
3186 goto __Y;
3189 /* if y == 1 then composite */
3190 if (mp_cmp_d (&y, 1) == MP_EQ) {
3191 goto __Y;
3194 ++j;
3197 /* if y != n1 then composite */
3198 if (mp_cmp (&y, &n1) != MP_EQ) {
3199 goto __Y;
3203 /* probably prime now */
3204 *result = MP_YES;
3205 __Y:mp_clear (&y);
3206 __R:mp_clear (&r);
3207 __N1:mp_clear (&n1);
3208 return err;
3211 static const struct {
3212 int k, t;
3213 } sizes[] = {
3214 { 128, 28 },
3215 { 256, 16 },
3216 { 384, 10 },
3217 { 512, 7 },
3218 { 640, 6 },
3219 { 768, 5 },
3220 { 896, 4 },
3221 { 1024, 4 }
3224 /* returns # of RM trials required for a given bit size */
3225 int mp_prime_rabin_miller_trials(int size)
3227 int x;
3229 for (x = 0; x < (int)(sizeof(sizes)/(sizeof(sizes[0]))); x++) {
3230 if (sizes[x].k == size) {
3231 return sizes[x].t;
3232 } else if (sizes[x].k > size) {
3233 return (x == 0) ? sizes[0].t : sizes[x - 1].t;
3236 return sizes[x-1].t + 1;
3239 /* makes a truly random prime of a given size (bits),
3241 * Flags are as follows:
3243 * LTM_PRIME_BBS - make prime congruent to 3 mod 4
3244 * LTM_PRIME_SAFE - make sure (p-1)/2 is prime as well (implies LTM_PRIME_BBS)
3245 * LTM_PRIME_2MSB_OFF - make the 2nd highest bit zero
3246 * LTM_PRIME_2MSB_ON - make the 2nd highest bit one
3248 * You have to supply a callback which fills in a buffer with random bytes. "dat" is a parameter you can
3249 * have passed to the callback (e.g. a state or something). This function doesn't use "dat" itself
3250 * so it can be NULL
3254 /* This is possibly the mother of all prime generation functions, muahahahahaha! */
3255 int mp_prime_random_ex(mp_int *a, int t, int size, int flags, ltm_prime_callback cb, void *dat)
3257 unsigned char *tmp, maskAND, maskOR_msb, maskOR_lsb;
3258 int res, err, bsize, maskOR_msb_offset;
3260 /* sanity check the input */
3261 if (size <= 1 || t <= 0) {
3262 return MP_VAL;
3265 /* LTM_PRIME_SAFE implies LTM_PRIME_BBS */
3266 if (flags & LTM_PRIME_SAFE) {
3267 flags |= LTM_PRIME_BBS;
3270 /* calc the byte size */
3271 bsize = (size>>3)+((size&7)?1:0);
3273 /* we need a buffer of bsize bytes */
3274 tmp = malloc(bsize);
3275 if (tmp == NULL) {
3276 return MP_MEM;
3279 /* calc the maskAND value for the MSbyte*/
3280 maskAND = ((size&7) == 0) ? 0xFF : (0xFF >> (8 - (size & 7)));
3282 /* calc the maskOR_msb */
3283 maskOR_msb = 0;
3284 maskOR_msb_offset = ((size & 7) == 1) ? 1 : 0;
3285 if (flags & LTM_PRIME_2MSB_ON) {
3286 maskOR_msb |= 1 << ((size - 2) & 7);
3287 } else if (flags & LTM_PRIME_2MSB_OFF) {
3288 maskAND &= ~(1 << ((size - 2) & 7));
3291 /* get the maskOR_lsb */
3292 maskOR_lsb = 0;
3293 if (flags & LTM_PRIME_BBS) {
3294 maskOR_lsb |= 3;
3297 do {
3298 /* read the bytes */
3299 if (cb(tmp, bsize, dat) != bsize) {
3300 err = MP_VAL;
3301 goto error;
3304 /* work over the MSbyte */
3305 tmp[0] &= maskAND;
3306 tmp[0] |= 1 << ((size - 1) & 7);
3308 /* mix in the maskORs */
3309 tmp[maskOR_msb_offset] |= maskOR_msb;
3310 tmp[bsize-1] |= maskOR_lsb;
3312 /* read it in */
3313 if ((err = mp_read_unsigned_bin(a, tmp, bsize)) != MP_OKAY) { goto error; }
3315 /* is it prime? */
3316 if ((err = mp_prime_is_prime(a, t, &res)) != MP_OKAY) { goto error; }
3317 if (res == MP_NO) {
3318 continue;
3321 if (flags & LTM_PRIME_SAFE) {
3322 /* see if (a-1)/2 is prime */
3323 if ((err = mp_sub_d(a, 1, a)) != MP_OKAY) { goto error; }
3324 if ((err = mp_div_2(a, a)) != MP_OKAY) { goto error; }
3326 /* is it prime? */
3327 if ((err = mp_prime_is_prime(a, t, &res)) != MP_OKAY) { goto error; }
3329 } while (res == MP_NO);
3331 if (flags & LTM_PRIME_SAFE) {
3332 /* restore a to the original value */
3333 if ((err = mp_mul_2(a, a)) != MP_OKAY) { goto error; }
3334 if ((err = mp_add_d(a, 1, a)) != MP_OKAY) { goto error; }
3337 err = MP_OKAY;
3338 error:
3339 free(tmp);
3340 return err;
3343 /* reads an unsigned char array, assumes the msb is stored first [big endian] */
3345 mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
3347 int res;
3349 /* make sure there are at least two digits */
3350 if (a->alloc < 2) {
3351 if ((res = mp_grow(a, 2)) != MP_OKAY) {
3352 return res;
3356 /* zero the int */
3357 mp_zero (a);
3359 /* read the bytes in */
3360 while (c-- > 0) {
3361 if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) {
3362 return res;
3365 a->dp[0] |= *b++;
3366 a->used += 1;
3368 mp_clamp (a);
3369 return MP_OKAY;
3372 /* reduces x mod m, assumes 0 < x < m**2, mu is
3373 * precomputed via mp_reduce_setup.
3374 * From HAC pp.604 Algorithm 14.42
3377 mp_reduce (mp_int * x, const mp_int * m, const mp_int * mu)
3379 mp_int q;
3380 int res, um = m->used;
3382 /* q = x */
3383 if ((res = mp_init_copy (&q, x)) != MP_OKAY) {
3384 return res;
3387 /* q1 = x / b**(k-1) */
3388 mp_rshd (&q, um - 1);
3390 /* according to HAC this optimization is ok */
3391 if (((unsigned long) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) {
3392 if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) {
3393 goto CLEANUP;
3395 } else {
3396 if ((res = s_mp_mul_high_digs (&q, mu, &q, um - 1)) != MP_OKAY) {
3397 goto CLEANUP;
3401 /* q3 = q2 / b**(k+1) */
3402 mp_rshd (&q, um + 1);
3404 /* x = x mod b**(k+1), quick (no division) */
3405 if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
3406 goto CLEANUP;
3409 /* q = q * m mod b**(k+1), quick (no division) */
3410 if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) {
3411 goto CLEANUP;
3414 /* x = x - q */
3415 if ((res = mp_sub (x, &q, x)) != MP_OKAY) {
3416 goto CLEANUP;
3419 /* If x < 0, add b**(k+1) to it */
3420 if (mp_cmp_d (x, 0) == MP_LT) {
3421 mp_set (&q, 1);
3422 if ((res = mp_lshd (&q, um + 1)) != MP_OKAY)
3423 goto CLEANUP;
3424 if ((res = mp_add (x, &q, x)) != MP_OKAY)
3425 goto CLEANUP;
3428 /* Back off if it's too big */
3429 while (mp_cmp (x, m) != MP_LT) {
3430 if ((res = s_mp_sub (x, m, x)) != MP_OKAY) {
3431 goto CLEANUP;
3435 CLEANUP:
3436 mp_clear (&q);
3438 return res;
3441 /* reduces a modulo n where n is of the form 2**p - d */
3443 mp_reduce_2k(mp_int *a, const mp_int *n, mp_digit d)
3445 mp_int q;
3446 int p, res;
3448 if ((res = mp_init(&q)) != MP_OKAY) {
3449 return res;
3452 p = mp_count_bits(n);
3453 top:
3454 /* q = a/2**p, a = a mod 2**p */
3455 if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
3456 goto ERR;
3459 if (d != 1) {
3460 /* q = q * d */
3461 if ((res = mp_mul_d(&q, d, &q)) != MP_OKAY) {
3462 goto ERR;
3466 /* a = a + q */
3467 if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
3468 goto ERR;
3471 if (mp_cmp_mag(a, n) != MP_LT) {
3472 s_mp_sub(a, n, a);
3473 goto top;
3476 ERR:
3477 mp_clear(&q);
3478 return res;
3481 /* determines the setup value */
3482 int
3483 mp_reduce_2k_setup(const mp_int *a, mp_digit *d)
3485 int res, p;
3486 mp_int tmp;
3488 if ((res = mp_init(&tmp)) != MP_OKAY) {
3489 return res;
3492 p = mp_count_bits(a);
3493 if ((res = mp_2expt(&tmp, p)) != MP_OKAY) {
3494 mp_clear(&tmp);
3495 return res;
3498 if ((res = s_mp_sub(&tmp, a, &tmp)) != MP_OKAY) {
3499 mp_clear(&tmp);
3500 return res;
3503 *d = tmp.dp[0];
3504 mp_clear(&tmp);
3505 return MP_OKAY;
3508 /* pre-calculate the value required for Barrett reduction
3509 * For a given modulus "b" it calulates the value required in "a"
3511 int mp_reduce_setup (mp_int * a, const mp_int * b)
3513 int res;
3515 if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
3516 return res;
3518 return mp_div (a, b, a, NULL);
3521 /* shift right a certain amount of digits */
3522 void mp_rshd (mp_int * a, int b)
3524 int x;
3526 /* if b <= 0 then ignore it */
3527 if (b <= 0) {
3528 return;
3531 /* if b > used then simply zero it and return */
3532 if (a->used <= b) {
3533 mp_zero (a);
3534 return;
3538 register mp_digit *bottom, *top;
3540 /* shift the digits down */
3542 /* bottom */
3543 bottom = a->dp;
3545 /* top [offset into digits] */
3546 top = a->dp + b;
3548 /* this is implemented as a sliding window where
3549 * the window is b-digits long and digits from
3550 * the top of the window are copied to the bottom
3552 * e.g.
3554 b-2 | b-1 | b0 | b1 | b2 | ... | bb | ---->
3555 /\ | ---->
3556 \-------------------/ ---->
3558 for (x = 0; x < (a->used - b); x++) {
3559 *bottom++ = *top++;
3562 /* zero the top digits */
3563 for (; x < a->used; x++) {
3564 *bottom++ = 0;
3568 /* remove excess digits */
3569 a->used -= b;
3572 /* set to a digit */
3573 void mp_set (mp_int * a, mp_digit b)
3575 mp_zero (a);
3576 a->dp[0] = b & MP_MASK;
3577 a->used = (a->dp[0] != 0) ? 1 : 0;
3580 /* set a 32-bit const */
3581 int mp_set_int (mp_int * a, unsigned long b)
3583 int x, res;
3585 mp_zero (a);
3587 /* set four bits at a time */
3588 for (x = 0; x < 8; x++) {
3589 /* shift the number up four bits */
3590 if ((res = mp_mul_2d (a, 4, a)) != MP_OKAY) {
3591 return res;
3594 /* OR in the top four bits of the source */
3595 a->dp[0] |= (b >> 28) & 15;
3597 /* shift the source up to the next four bits */
3598 b <<= 4;
3600 /* ensure that digits are not clamped off */
3601 a->used += 1;
3603 mp_clamp (a);
3604 return MP_OKAY;
3607 /* shrink a bignum */
3608 int mp_shrink (mp_int * a)
3610 mp_digit *tmp;
3611 if (a->alloc != a->used && a->used > 0) {
3612 if ((tmp = realloc (a->dp, sizeof (mp_digit) * a->used)) == NULL) {
3613 return MP_MEM;
3615 a->dp = tmp;
3616 a->alloc = a->used;
3618 return MP_OKAY;
3621 /* get the size for an signed equivalent */
3622 int mp_signed_bin_size (const mp_int * a)
3624 return 1 + mp_unsigned_bin_size (a);
3627 /* computes b = a*a */
3629 mp_sqr (const mp_int * a, mp_int * b)
3631 int res;
3633 if (a->used >= KARATSUBA_SQR_CUTOFF) {
3634 res = mp_karatsuba_sqr (a, b);
3635 } else
3637 /* can we use the fast comba multiplier? */
3638 if ((a->used * 2 + 1) < MP_WARRAY &&
3639 a->used <
3640 (1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
3641 res = fast_s_mp_sqr (a, b);
3642 } else
3643 res = s_mp_sqr (a, b);
3645 b->sign = MP_ZPOS;
3646 return res;
3649 /* c = a * a (mod b) */
3651 mp_sqrmod (const mp_int * a, mp_int * b, mp_int * c)
3653 int res;
3654 mp_int t;
3656 if ((res = mp_init (&t)) != MP_OKAY) {
3657 return res;
3660 if ((res = mp_sqr (a, &t)) != MP_OKAY) {
3661 mp_clear (&t);
3662 return res;
3664 res = mp_mod (&t, b, c);
3665 mp_clear (&t);
3666 return res;
3669 /* high level subtraction (handles signs) */
3671 mp_sub (mp_int * a, mp_int * b, mp_int * c)
3673 int sa, sb, res;
3675 sa = a->sign;
3676 sb = b->sign;
3678 if (sa != sb) {
3679 /* subtract a negative from a positive, OR */
3680 /* subtract a positive from a negative. */
3681 /* In either case, ADD their magnitudes, */
3682 /* and use the sign of the first number. */
3683 c->sign = sa;
3684 res = s_mp_add (a, b, c);
3685 } else {
3686 /* subtract a positive from a positive, OR */
3687 /* subtract a negative from a negative. */
3688 /* First, take the difference between their */
3689 /* magnitudes, then... */
3690 if (mp_cmp_mag (a, b) != MP_LT) {
3691 /* Copy the sign from the first */
3692 c->sign = sa;
3693 /* The first has a larger or equal magnitude */
3694 res = s_mp_sub (a, b, c);
3695 } else {
3696 /* The result has the *opposite* sign from */
3697 /* the first number. */
3698 c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS;
3699 /* The second has a larger magnitude */
3700 res = s_mp_sub (b, a, c);
3703 return res;
3706 /* single digit subtraction */
3708 mp_sub_d (mp_int * a, mp_digit b, mp_int * c)
3710 mp_digit *tmpa, *tmpc, mu;
3711 int res, ix, oldused;
3713 /* grow c as required */
3714 if (c->alloc < a->used + 1) {
3715 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
3716 return res;
3720 /* if a is negative just do an unsigned
3721 * addition [with fudged signs]
3723 if (a->sign == MP_NEG) {
3724 a->sign = MP_ZPOS;
3725 res = mp_add_d(a, b, c);
3726 a->sign = c->sign = MP_NEG;
3727 return res;
3730 /* setup regs */
3731 oldused = c->used;
3732 tmpa = a->dp;
3733 tmpc = c->dp;
3735 /* if a <= b simply fix the single digit */
3736 if ((a->used == 1 && a->dp[0] <= b) || a->used == 0) {
3737 if (a->used == 1) {
3738 *tmpc++ = b - *tmpa;
3739 } else {
3740 *tmpc++ = b;
3742 ix = 1;
3744 /* negative/1digit */
3745 c->sign = MP_NEG;
3746 c->used = 1;
3747 } else {
3748 /* positive/size */
3749 c->sign = MP_ZPOS;
3750 c->used = a->used;
3752 /* subtract first digit */
3753 *tmpc = *tmpa++ - b;
3754 mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
3755 *tmpc++ &= MP_MASK;
3757 /* handle rest of the digits */
3758 for (ix = 1; ix < a->used; ix++) {
3759 *tmpc = *tmpa++ - mu;
3760 mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
3761 *tmpc++ &= MP_MASK;
3765 /* zero excess digits */
3766 while (ix++ < oldused) {
3767 *tmpc++ = 0;
3769 mp_clamp(c);
3770 return MP_OKAY;
3773 /* store in unsigned [big endian] format */
3775 mp_to_unsigned_bin (const mp_int * a, unsigned char *b)
3777 int x, res;
3778 mp_int t;
3780 if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
3781 return res;
3784 x = 0;
3785 while (mp_iszero (&t) == 0) {
3786 b[x++] = (unsigned char) (t.dp[0] & 255);
3787 if ((res = mp_div_2d (&t, 8, &t, NULL)) != MP_OKAY) {
3788 mp_clear (&t);
3789 return res;
3792 bn_reverse (b, x);
3793 mp_clear (&t);
3794 return MP_OKAY;
3797 /* get the size for an unsigned equivalent */
3799 mp_unsigned_bin_size (const mp_int * a)
3801 int size = mp_count_bits (a);
3802 return (size / 8 + ((size & 7) != 0 ? 1 : 0));
3805 /* set to zero */
3806 void
3807 mp_zero (mp_int * a)
3809 a->sign = MP_ZPOS;
3810 a->used = 0;
3811 memset (a->dp, 0, sizeof (mp_digit) * a->alloc);
3814 static const mp_digit __prime_tab[] = {
3815 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
3816 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
3817 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
3818 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083,
3819 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
3820 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
3821 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
3822 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
3824 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
3825 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
3826 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
3827 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
3828 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
3829 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
3830 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
3831 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
3833 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
3834 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
3835 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
3836 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
3837 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
3838 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
3839 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
3840 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
3842 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
3843 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
3844 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
3845 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
3846 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
3847 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
3848 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
3849 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
3852 /* reverse an array, used for radix code */
3853 void
3854 bn_reverse (unsigned char *s, int len)
3856 int ix, iy;
3857 unsigned char t;
3859 ix = 0;
3860 iy = len - 1;
3861 while (ix < iy) {
3862 t = s[ix];
3863 s[ix] = s[iy];
3864 s[iy] = t;
3865 ++ix;
3866 --iy;
3870 /* low level addition, based on HAC pp.594, Algorithm 14.7 */
3872 s_mp_add (mp_int * a, mp_int * b, mp_int * c)
3874 mp_int *x;
3875 int olduse, res, min, max;
3877 /* find sizes, we let |a| <= |b| which means we have to sort
3878 * them. "x" will point to the input with the most digits
3880 if (a->used > b->used) {
3881 min = b->used;
3882 max = a->used;
3883 x = a;
3884 } else {
3885 min = a->used;
3886 max = b->used;
3887 x = b;
3890 /* init result */
3891 if (c->alloc < max + 1) {
3892 if ((res = mp_grow (c, max + 1)) != MP_OKAY) {
3893 return res;
3897 /* get old used digit count and set new one */
3898 olduse = c->used;
3899 c->used = max + 1;
3902 register mp_digit u, *tmpa, *tmpb, *tmpc;
3903 register int i;
3905 /* alias for digit pointers */
3907 /* first input */
3908 tmpa = a->dp;
3910 /* second input */
3911 tmpb = b->dp;
3913 /* destination */
3914 tmpc = c->dp;
3916 /* zero the carry */
3917 u = 0;
3918 for (i = 0; i < min; i++) {
3919 /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
3920 *tmpc = *tmpa++ + *tmpb++ + u;
3922 /* U = carry bit of T[i] */
3923 u = *tmpc >> ((mp_digit)DIGIT_BIT);
3925 /* take away carry bit from T[i] */
3926 *tmpc++ &= MP_MASK;
3929 /* now copy higher words if any, that is in A+B
3930 * if A or B has more digits add those in
3932 if (min != max) {
3933 for (; i < max; i++) {
3934 /* T[i] = X[i] + U */
3935 *tmpc = x->dp[i] + u;
3937 /* U = carry bit of T[i] */
3938 u = *tmpc >> ((mp_digit)DIGIT_BIT);
3940 /* take away carry bit from T[i] */
3941 *tmpc++ &= MP_MASK;
3945 /* add carry */
3946 *tmpc++ = u;
3948 /* clear digits above oldused */
3949 for (i = c->used; i < olduse; i++) {
3950 *tmpc++ = 0;
3954 mp_clamp (c);
3955 return MP_OKAY;
3958 int s_mp_exptmod (const mp_int * G, const mp_int * X, mp_int * P, mp_int * Y)
3960 mp_int M[256], res, mu;
3961 mp_digit buf;
3962 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
3964 /* find window size */
3965 x = mp_count_bits (X);
3966 if (x <= 7) {
3967 winsize = 2;
3968 } else if (x <= 36) {
3969 winsize = 3;
3970 } else if (x <= 140) {
3971 winsize = 4;
3972 } else if (x <= 450) {
3973 winsize = 5;
3974 } else if (x <= 1303) {
3975 winsize = 6;
3976 } else if (x <= 3529) {
3977 winsize = 7;
3978 } else {
3979 winsize = 8;
3982 /* init M array */
3983 /* init first cell */
3984 if ((err = mp_init(&M[1])) != MP_OKAY) {
3985 return err;
3988 /* now init the second half of the array */
3989 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
3990 if ((err = mp_init(&M[x])) != MP_OKAY) {
3991 for (y = 1<<(winsize-1); y < x; y++) {
3992 mp_clear (&M[y]);
3994 mp_clear(&M[1]);
3995 return err;
3999 /* create mu, used for Barrett reduction */
4000 if ((err = mp_init (&mu)) != MP_OKAY) {
4001 goto __M;
4003 if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
4004 goto __MU;
4007 /* create M table
4009 * The M table contains powers of the base,
4010 * e.g. M[x] = G**x mod P
4012 * The first half of the table is not
4013 * computed though accept for M[0] and M[1]
4015 if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
4016 goto __MU;
4019 /* compute the value at M[1<<(winsize-1)] by squaring
4020 * M[1] (winsize-1) times
4022 if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
4023 goto __MU;
4026 for (x = 0; x < (winsize - 1); x++) {
4027 if ((err = mp_sqr (&M[1 << (winsize - 1)],
4028 &M[1 << (winsize - 1)])) != MP_OKAY) {
4029 goto __MU;
4031 if ((err = mp_reduce (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) {
4032 goto __MU;
4036 /* create upper table, that is M[x] = M[x-1] * M[1] (mod P)
4037 * for x = (2**(winsize - 1) + 1) to (2**winsize - 1)
4039 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
4040 if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
4041 goto __MU;
4043 if ((err = mp_reduce (&M[x], P, &mu)) != MP_OKAY) {
4044 goto __MU;
4048 /* setup result */
4049 if ((err = mp_init (&res)) != MP_OKAY) {
4050 goto __MU;
4052 mp_set (&res, 1);
4054 /* set initial mode and bit cnt */
4055 mode = 0;
4056 bitcnt = 1;
4057 buf = 0;
4058 digidx = X->used - 1;
4059 bitcpy = 0;
4060 bitbuf = 0;
4062 for (;;) {
4063 /* grab next digit as required */
4064 if (--bitcnt == 0) {
4065 /* if digidx == -1 we are out of digits */
4066 if (digidx == -1) {
4067 break;
4069 /* read next digit and reset the bitcnt */
4070 buf = X->dp[digidx--];
4071 bitcnt = (int) DIGIT_BIT;
4074 /* grab the next msb from the exponent */
4075 y = (buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
4076 buf <<= (mp_digit)1;
4078 /* if the bit is zero and mode == 0 then we ignore it
4079 * These represent the leading zero bits before the first 1 bit
4080 * in the exponent. Technically this opt is not required but it
4081 * does lower the # of trivial squaring/reductions used
4083 if (mode == 0 && y == 0) {
4084 continue;
4087 /* if the bit is zero and mode == 1 then we square */
4088 if (mode == 1 && y == 0) {
4089 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
4090 goto __RES;
4092 if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
4093 goto __RES;
4095 continue;
4098 /* else we add it to the window */
4099 bitbuf |= (y << (winsize - ++bitcpy));
4100 mode = 2;
4102 if (bitcpy == winsize) {
4103 /* ok window is filled so square as required and multiply */
4104 /* square first */
4105 for (x = 0; x < winsize; x++) {
4106 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
4107 goto __RES;
4109 if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
4110 goto __RES;
4114 /* then multiply */
4115 if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
4116 goto __RES;
4118 if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
4119 goto __RES;
4122 /* empty window and reset */
4123 bitcpy = 0;
4124 bitbuf = 0;
4125 mode = 1;
4129 /* if bits remain then square/multiply */
4130 if (mode == 2 && bitcpy > 0) {
4131 /* square then multiply if the bit is set */
4132 for (x = 0; x < bitcpy; x++) {
4133 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
4134 goto __RES;
4136 if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
4137 goto __RES;
4140 bitbuf <<= 1;
4141 if ((bitbuf & (1 << winsize)) != 0) {
4142 /* then multiply */
4143 if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
4144 goto __RES;
4146 if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
4147 goto __RES;
4153 mp_exch (&res, Y);
4154 err = MP_OKAY;
4155 __RES:mp_clear (&res);
4156 __MU:mp_clear (&mu);
4157 __M:
4158 mp_clear(&M[1]);
4159 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
4160 mp_clear (&M[x]);
4162 return err;
4165 /* multiplies |a| * |b| and only computes up to digs digits of result
4166 * HAC pp. 595, Algorithm 14.12 Modified so you can control how
4167 * many digits of output are created.
4170 s_mp_mul_digs (const mp_int * a, const mp_int * b, mp_int * c, int digs)
4172 mp_int t;
4173 int res, pa, pb, ix, iy;
4174 mp_digit u;
4175 mp_word r;
4176 mp_digit tmpx, *tmpt, *tmpy;
4178 /* can we use the fast multiplier? */
4179 if (((digs) < MP_WARRAY) &&
4180 MIN (a->used, b->used) <
4181 (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
4182 return fast_s_mp_mul_digs (a, b, c, digs);
4185 if ((res = mp_init_size (&t, digs)) != MP_OKAY) {
4186 return res;
4188 t.used = digs;
4190 /* compute the digits of the product directly */
4191 pa = a->used;
4192 for (ix = 0; ix < pa; ix++) {
4193 /* set the carry to zero */
4194 u = 0;
4196 /* limit ourselves to making digs digits of output */
4197 pb = MIN (b->used, digs - ix);
4199 /* setup some aliases */
4200 /* copy of the digit from a used within the nested loop */
4201 tmpx = a->dp[ix];
4203 /* an alias for the destination shifted ix places */
4204 tmpt = t.dp + ix;
4206 /* an alias for the digits of b */
4207 tmpy = b->dp;
4209 /* compute the columns of the output and propagate the carry */
4210 for (iy = 0; iy < pb; iy++) {
4211 /* compute the column as a mp_word */
4212 r = ((mp_word)*tmpt) +
4213 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
4214 ((mp_word) u);
4216 /* the new column is the lower part of the result */
4217 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
4219 /* get the carry word from the result */
4220 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
4222 /* set carry if it is placed below digs */
4223 if (ix + iy < digs) {
4224 *tmpt = u;
4228 mp_clamp (&t);
4229 mp_exch (&t, c);
4231 mp_clear (&t);
4232 return MP_OKAY;
4235 /* multiplies |a| * |b| and does not compute the lower digs digits
4236 * [meant to get the higher part of the product]
4239 s_mp_mul_high_digs (const mp_int * a, const mp_int * b, mp_int * c, int digs)
4241 mp_int t;
4242 int res, pa, pb, ix, iy;
4243 mp_digit u;
4244 mp_word r;
4245 mp_digit tmpx, *tmpt, *tmpy;
4247 /* can we use the fast multiplier? */
4248 if (((a->used + b->used + 1) < MP_WARRAY)
4249 && MIN (a->used, b->used) < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
4250 return fast_s_mp_mul_high_digs (a, b, c, digs);
4253 if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) {
4254 return res;
4256 t.used = a->used + b->used + 1;
4258 pa = a->used;
4259 pb = b->used;
4260 for (ix = 0; ix < pa; ix++) {
4261 /* clear the carry */
4262 u = 0;
4264 /* left hand side of A[ix] * B[iy] */
4265 tmpx = a->dp[ix];
4267 /* alias to the address of where the digits will be stored */
4268 tmpt = &(t.dp[digs]);
4270 /* alias for where to read the right hand side from */
4271 tmpy = b->dp + (digs - ix);
4273 for (iy = digs - ix; iy < pb; iy++) {
4274 /* calculate the double precision result */
4275 r = ((mp_word)*tmpt) +
4276 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
4277 ((mp_word) u);
4279 /* get the lower part */
4280 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
4282 /* carry the carry */
4283 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
4285 *tmpt = u;
4287 mp_clamp (&t);
4288 mp_exch (&t, c);
4289 mp_clear (&t);
4290 return MP_OKAY;
4293 /* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
4295 s_mp_sqr (const mp_int * a, mp_int * b)
4297 mp_int t;
4298 int res, ix, iy, pa;
4299 mp_word r;
4300 mp_digit u, tmpx, *tmpt;
4302 pa = a->used;
4303 if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) {
4304 return res;
4307 /* default used is maximum possible size */
4308 t.used = 2*pa + 1;
4310 for (ix = 0; ix < pa; ix++) {
4311 /* first calculate the digit at 2*ix */
4312 /* calculate double precision result */
4313 r = ((mp_word) t.dp[2*ix]) +
4314 ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]);
4316 /* store lower part in result */
4317 t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK));
4319 /* get the carry */
4320 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
4322 /* left hand side of A[ix] * A[iy] */
4323 tmpx = a->dp[ix];
4325 /* alias for where to store the results */
4326 tmpt = t.dp + (2*ix + 1);
4328 for (iy = ix + 1; iy < pa; iy++) {
4329 /* first calculate the product */
4330 r = ((mp_word)tmpx) * ((mp_word)a->dp[iy]);
4332 /* now calculate the double precision result, note we use
4333 * addition instead of *2 since it's easier to optimize
4335 r = ((mp_word) *tmpt) + r + r + ((mp_word) u);
4337 /* store lower part */
4338 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
4340 /* get carry */
4341 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
4343 /* propagate upwards */
4344 while (u != ((mp_digit) 0)) {
4345 r = ((mp_word) *tmpt) + ((mp_word) u);
4346 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
4347 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
4351 mp_clamp (&t);
4352 mp_exch (&t, b);
4353 mp_clear (&t);
4354 return MP_OKAY;
4357 /* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */
4359 s_mp_sub (const mp_int * a, const mp_int * b, mp_int * c)
4361 int olduse, res, min, max;
4363 /* find sizes */
4364 min = b->used;
4365 max = a->used;
4367 /* init result */
4368 if (c->alloc < max) {
4369 if ((res = mp_grow (c, max)) != MP_OKAY) {
4370 return res;
4373 olduse = c->used;
4374 c->used = max;
4377 register mp_digit u, *tmpa, *tmpb, *tmpc;
4378 register int i;
4380 /* alias for digit pointers */
4381 tmpa = a->dp;
4382 tmpb = b->dp;
4383 tmpc = c->dp;
4385 /* set carry to zero */
4386 u = 0;
4387 for (i = 0; i < min; i++) {
4388 /* T[i] = A[i] - B[i] - U */
4389 *tmpc = *tmpa++ - *tmpb++ - u;
4391 /* U = carry bit of T[i]
4392 * Note this saves performing an AND operation since
4393 * if a carry does occur it will propagate all the way to the
4394 * MSB. As a result a single shift is enough to get the carry
4396 u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
4398 /* Clear carry from T[i] */
4399 *tmpc++ &= MP_MASK;
4402 /* now copy higher words if any, e.g. if A has more digits than B */
4403 for (; i < max; i++) {
4404 /* T[i] = A[i] - U */
4405 *tmpc = *tmpa++ - u;
4407 /* U = carry bit of T[i] */
4408 u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
4410 /* Clear carry from T[i] */
4411 *tmpc++ &= MP_MASK;
4414 /* clear digits above used (since we may not have grown result above) */
4415 for (i = c->used; i < olduse; i++) {
4416 *tmpc++ = 0;
4420 mp_clamp (c);
4421 return MP_OKAY;