user32/tests: Compare against ASCII string first since it may not be a valid Unicode...
[wine/multimedia.git] / dlls / rsaenh / mpi.c
bloba7408a4243ce7622d8697c0e246f195e208cdeaa
1 /*
2 * dlls/rsaenh/mpi.c
3 * Multi Precision Integer functions
5 * Copyright 2004 Michael Jung
6 * Based on public domain code by Tom St Denis (tomstdenis@iahu.ca)
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
24 * This file contains code from the LibTomCrypt cryptographic
25 * library written by Tom St Denis (tomstdenis@iahu.ca). LibTomCrypt
26 * is in the public domain. The code in this file is tailored to
27 * special requirements. Take a look at http://libtomcrypt.org for the
28 * original version.
31 #include <stdarg.h>
32 #include "tomcrypt.h"
34 /* Known optimal configurations
35 CPU /Compiler /MUL CUTOFF/SQR CUTOFF
36 -------------------------------------------------------------
37 Intel P4 Northwood /GCC v3.4.1 / 88/ 128/LTM 0.32 ;-)
39 static const int KARATSUBA_MUL_CUTOFF = 88, /* Min. number of digits before Karatsuba multiplication is used. */
40 KARATSUBA_SQR_CUTOFF = 128; /* Min. number of digits before Karatsuba squaring is used. */
42 /* computes the modular inverse via binary extended euclidean algorithm,
43 * that is c = 1/a mod b
45 * Based on slow invmod except this is optimized for the case where b is
46 * odd as per HAC Note 14.64 on pp. 610
48 int
49 fast_mp_invmod (const mp_int * a, mp_int * b, mp_int * c)
51 mp_int x, y, u, v, B, D;
52 int res, neg;
54 /* 2. [modified] b must be odd */
55 if (mp_iseven (b) == 1) {
56 return MP_VAL;
59 /* init all our temps */
60 if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D, NULL)) != MP_OKAY) {
61 return res;
64 /* x == modulus, y == value to invert */
65 if ((res = mp_copy (b, &x)) != MP_OKAY) {
66 goto __ERR;
69 /* we need y = |a| */
70 if ((res = mp_abs (a, &y)) != MP_OKAY) {
71 goto __ERR;
74 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
75 if ((res = mp_copy (&x, &u)) != MP_OKAY) {
76 goto __ERR;
78 if ((res = mp_copy (&y, &v)) != MP_OKAY) {
79 goto __ERR;
81 mp_set (&D, 1);
83 top:
84 /* 4. while u is even do */
85 while (mp_iseven (&u) == 1) {
86 /* 4.1 u = u/2 */
87 if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
88 goto __ERR;
90 /* 4.2 if B is odd then */
91 if (mp_isodd (&B) == 1) {
92 if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
93 goto __ERR;
96 /* B = B/2 */
97 if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
98 goto __ERR;
102 /* 5. while v is even do */
103 while (mp_iseven (&v) == 1) {
104 /* 5.1 v = v/2 */
105 if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
106 goto __ERR;
108 /* 5.2 if D is odd then */
109 if (mp_isodd (&D) == 1) {
110 /* D = (D-x)/2 */
111 if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
112 goto __ERR;
115 /* D = D/2 */
116 if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
117 goto __ERR;
121 /* 6. if u >= v then */
122 if (mp_cmp (&u, &v) != MP_LT) {
123 /* u = u - v, B = B - D */
124 if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
125 goto __ERR;
128 if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
129 goto __ERR;
131 } else {
132 /* v - v - u, D = D - B */
133 if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
134 goto __ERR;
137 if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
138 goto __ERR;
142 /* if not zero goto step 4 */
143 if (mp_iszero (&u) == 0) {
144 goto top;
147 /* now a = C, b = D, gcd == g*v */
149 /* if v != 1 then there is no inverse */
150 if (mp_cmp_d (&v, 1) != MP_EQ) {
151 res = MP_VAL;
152 goto __ERR;
155 /* b is now the inverse */
156 neg = a->sign;
157 while (D.sign == MP_NEG) {
158 if ((res = mp_add (&D, b, &D)) != MP_OKAY) {
159 goto __ERR;
162 mp_exch (&D, c);
163 c->sign = neg;
164 res = MP_OKAY;
166 __ERR:mp_clear_multi (&x, &y, &u, &v, &B, &D, NULL);
167 return res;
170 /* computes xR**-1 == x (mod N) via Montgomery Reduction
172 * This is an optimized implementation of montgomery_reduce
173 * which uses the comba method to quickly calculate the columns of the
174 * reduction.
176 * Based on Algorithm 14.32 on pp.601 of HAC.
179 fast_mp_montgomery_reduce (mp_int * x, const mp_int * n, mp_digit rho)
181 int ix, res, olduse;
182 mp_word W[MP_WARRAY];
184 /* get old used count */
185 olduse = x->used;
187 /* grow a as required */
188 if (x->alloc < n->used + 1) {
189 if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) {
190 return res;
194 /* first we have to get the digits of the input into
195 * an array of double precision words W[...]
198 register mp_word *_W;
199 register mp_digit *tmpx;
201 /* alias for the W[] array */
202 _W = W;
204 /* alias for the digits of x*/
205 tmpx = x->dp;
207 /* copy the digits of a into W[0..a->used-1] */
208 for (ix = 0; ix < x->used; ix++) {
209 *_W++ = *tmpx++;
212 /* zero the high words of W[a->used..m->used*2] */
213 for (; ix < n->used * 2 + 1; ix++) {
214 *_W++ = 0;
218 /* now we proceed to zero successive digits
219 * from the least significant upwards
221 for (ix = 0; ix < n->used; ix++) {
222 /* mu = ai * m' mod b
224 * We avoid a double precision multiplication (which isn't required)
225 * by casting the value down to a mp_digit. Note this requires
226 * that W[ix-1] have the carry cleared (see after the inner loop)
228 register mp_digit mu;
229 mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK);
231 /* a = a + mu * m * b**i
233 * This is computed in place and on the fly. The multiplication
234 * by b**i is handled by offseting which columns the results
235 * are added to.
237 * Note the comba method normally doesn't handle carries in the
238 * inner loop In this case we fix the carry from the previous
239 * column since the Montgomery reduction requires digits of the
240 * result (so far) [see above] to work. This is
241 * handled by fixing up one carry after the inner loop. The
242 * carry fixups are done in order so after these loops the
243 * first m->used words of W[] have the carries fixed
246 register int iy;
247 register mp_digit *tmpn;
248 register mp_word *_W;
250 /* alias for the digits of the modulus */
251 tmpn = n->dp;
253 /* Alias for the columns set by an offset of ix */
254 _W = W + ix;
256 /* inner loop */
257 for (iy = 0; iy < n->used; iy++) {
258 *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++);
262 /* now fix carry for next digit, W[ix+1] */
263 W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT);
266 /* now we have to propagate the carries and
267 * shift the words downward [all those least
268 * significant digits we zeroed].
271 register mp_digit *tmpx;
272 register mp_word *_W, *_W1;
274 /* nox fix rest of carries */
276 /* alias for current word */
277 _W1 = W + ix;
279 /* alias for next word, where the carry goes */
280 _W = W + ++ix;
282 for (; ix <= n->used * 2 + 1; ix++) {
283 *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT);
286 /* copy out, A = A/b**n
288 * The result is A/b**n but instead of converting from an
289 * array of mp_word to mp_digit than calling mp_rshd
290 * we just copy them in the right order
293 /* alias for destination word */
294 tmpx = x->dp;
296 /* alias for shifted double precision result */
297 _W = W + n->used;
299 for (ix = 0; ix < n->used + 1; ix++) {
300 *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK));
303 /* zero oldused digits, if the input a was larger than
304 * m->used+1 we'll have to clear the digits
306 for (; ix < olduse; ix++) {
307 *tmpx++ = 0;
311 /* set the max used and clamp */
312 x->used = n->used + 1;
313 mp_clamp (x);
315 /* if A >= m then A = A - m */
316 if (mp_cmp_mag (x, n) != MP_LT) {
317 return s_mp_sub (x, n, x);
319 return MP_OKAY;
322 /* Fast (comba) multiplier
324 * This is the fast column-array [comba] multiplier. It is
325 * designed to compute the columns of the product first
326 * then handle the carries afterwards. This has the effect
327 * of making the nested loops that compute the columns very
328 * simple and schedulable on super-scalar processors.
330 * This has been modified to produce a variable number of
331 * digits of output so if say only a half-product is required
332 * you don't have to compute the upper half (a feature
333 * required for fast Barrett reduction).
335 * Based on Algorithm 14.12 on pp.595 of HAC.
339 fast_s_mp_mul_digs (const mp_int * a, const mp_int * b, mp_int * c, int digs)
341 int olduse, res, pa, ix, iz;
342 mp_digit W[MP_WARRAY];
343 register mp_word _W;
345 /* grow the destination as required */
346 if (c->alloc < digs) {
347 if ((res = mp_grow (c, digs)) != MP_OKAY) {
348 return res;
352 /* number of output digits to produce */
353 pa = MIN(digs, a->used + b->used);
355 /* clear the carry */
356 _W = 0;
357 for (ix = 0; ix <= pa; ix++) {
358 int tx, ty;
359 int iy;
360 mp_digit *tmpx, *tmpy;
362 /* get offsets into the two bignums */
363 ty = MIN(b->used-1, ix);
364 tx = ix - ty;
366 /* setup temp aliases */
367 tmpx = a->dp + tx;
368 tmpy = b->dp + ty;
370 /* this is the number of times the loop will iterrate, essentially its
371 while (tx++ < a->used && ty-- >= 0) { ... }
373 iy = MIN(a->used-tx, ty+1);
375 /* execute loop */
376 for (iz = 0; iz < iy; ++iz) {
377 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
380 /* store term */
381 W[ix] = ((mp_digit)_W) & MP_MASK;
383 /* make next carry */
384 _W = _W >> ((mp_word)DIGIT_BIT);
387 /* setup dest */
388 olduse = c->used;
389 c->used = digs;
392 register mp_digit *tmpc;
393 tmpc = c->dp;
394 for (ix = 0; ix < digs; ix++) {
395 /* now extract the previous digit [below the carry] */
396 *tmpc++ = W[ix];
399 /* clear unused digits [that existed in the old copy of c] */
400 for (; ix < olduse; ix++) {
401 *tmpc++ = 0;
404 mp_clamp (c);
405 return MP_OKAY;
408 /* this is a modified version of fast_s_mul_digs that only produces
409 * output digits *above* digs. See the comments for fast_s_mul_digs
410 * to see how it works.
412 * This is used in the Barrett reduction since for one of the multiplications
413 * only the higher digits were needed. This essentially halves the work.
415 * Based on Algorithm 14.12 on pp.595 of HAC.
418 fast_s_mp_mul_high_digs (const mp_int * a, const mp_int * b, mp_int * c, int digs)
420 int olduse, res, pa, ix, iz;
421 mp_digit W[MP_WARRAY];
422 mp_word _W;
424 /* grow the destination as required */
425 pa = a->used + b->used;
426 if (c->alloc < pa) {
427 if ((res = mp_grow (c, pa)) != MP_OKAY) {
428 return res;
432 /* number of output digits to produce */
433 pa = a->used + b->used;
434 _W = 0;
435 for (ix = digs; ix <= pa; ix++) {
436 int tx, ty, iy;
437 mp_digit *tmpx, *tmpy;
439 /* get offsets into the two bignums */
440 ty = MIN(b->used-1, ix);
441 tx = ix - ty;
443 /* setup temp aliases */
444 tmpx = a->dp + tx;
445 tmpy = b->dp + ty;
447 /* this is the number of times the loop will iterrate, essentially its
448 while (tx++ < a->used && ty-- >= 0) { ... }
450 iy = MIN(a->used-tx, ty+1);
452 /* execute loop */
453 for (iz = 0; iz < iy; iz++) {
454 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
457 /* store term */
458 W[ix] = ((mp_digit)_W) & MP_MASK;
460 /* make next carry */
461 _W = _W >> ((mp_word)DIGIT_BIT);
464 /* setup dest */
465 olduse = c->used;
466 c->used = pa;
469 register mp_digit *tmpc;
471 tmpc = c->dp + digs;
472 for (ix = digs; ix <= pa; ix++) {
473 /* now extract the previous digit [below the carry] */
474 *tmpc++ = W[ix];
477 /* clear unused digits [that existed in the old copy of c] */
478 for (; ix < olduse; ix++) {
479 *tmpc++ = 0;
482 mp_clamp (c);
483 return MP_OKAY;
486 /* fast squaring
488 * This is the comba method where the columns of the product
489 * are computed first then the carries are computed. This
490 * has the effect of making a very simple inner loop that
491 * is executed the most
493 * W2 represents the outer products and W the inner.
495 * A further optimizations is made because the inner
496 * products are of the form "A * B * 2". The *2 part does
497 * not need to be computed until the end which is good
498 * because 64-bit shifts are slow!
500 * Based on Algorithm 14.16 on pp.597 of HAC.
503 /* the jist of squaring...
505 you do like mult except the offset of the tmpx [one that starts closer to zero]
506 can't equal the offset of tmpy. So basically you set up iy like before then you min it with
507 (ty-tx) so that it never happens. You double all those you add in the inner loop
509 After that loop you do the squares and add them in.
511 Remove W2 and don't memset W
515 int fast_s_mp_sqr (const mp_int * a, mp_int * b)
517 int olduse, res, pa, ix, iz;
518 mp_digit W[MP_WARRAY], *tmpx;
519 mp_word W1;
521 /* grow the destination as required */
522 pa = a->used + a->used;
523 if (b->alloc < pa) {
524 if ((res = mp_grow (b, pa)) != MP_OKAY) {
525 return res;
529 /* number of output digits to produce */
530 W1 = 0;
531 for (ix = 0; ix <= pa; ix++) {
532 int tx, ty, iy;
533 mp_word _W;
534 mp_digit *tmpy;
536 /* clear counter */
537 _W = 0;
539 /* get offsets into the two bignums */
540 ty = MIN(a->used-1, ix);
541 tx = ix - ty;
543 /* setup temp aliases */
544 tmpx = a->dp + tx;
545 tmpy = a->dp + ty;
547 /* this is the number of times the loop will iterrate, essentially its
548 while (tx++ < a->used && ty-- >= 0) { ... }
550 iy = MIN(a->used-tx, ty+1);
552 /* now for squaring tx can never equal ty
553 * we halve the distance since they approach at a rate of 2x
554 * and we have to round because odd cases need to be executed
556 iy = MIN(iy, (ty-tx+1)>>1);
558 /* execute loop */
559 for (iz = 0; iz < iy; iz++) {
560 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
563 /* double the inner product and add carry */
564 _W = _W + _W + W1;
566 /* even columns have the square term in them */
567 if ((ix&1) == 0) {
568 _W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]);
571 /* store it */
572 W[ix] = _W;
574 /* make next carry */
575 W1 = _W >> ((mp_word)DIGIT_BIT);
578 /* setup dest */
579 olduse = b->used;
580 b->used = a->used+a->used;
583 mp_digit *tmpb;
584 tmpb = b->dp;
585 for (ix = 0; ix < pa; ix++) {
586 *tmpb++ = W[ix] & MP_MASK;
589 /* clear unused digits [that existed in the old copy of c] */
590 for (; ix < olduse; ix++) {
591 *tmpb++ = 0;
594 mp_clamp (b);
595 return MP_OKAY;
598 /* computes a = 2**b
600 * Simple algorithm which zeroes the int, grows it then just sets one bit
601 * as required.
604 mp_2expt (mp_int * a, int b)
606 int res;
608 /* zero a as per default */
609 mp_zero (a);
611 /* grow a to accommodate the single bit */
612 if ((res = mp_grow (a, b / DIGIT_BIT + 1)) != MP_OKAY) {
613 return res;
616 /* set the used count of where the bit will go */
617 a->used = b / DIGIT_BIT + 1;
619 /* put the single bit in its place */
620 a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT);
622 return MP_OKAY;
625 /* b = |a|
627 * Simple function copies the input and fixes the sign to positive
630 mp_abs (const mp_int * a, mp_int * b)
632 int res;
634 /* copy a to b */
635 if (a != b) {
636 if ((res = mp_copy (a, b)) != MP_OKAY) {
637 return res;
641 /* force the sign of b to positive */
642 b->sign = MP_ZPOS;
644 return MP_OKAY;
647 /* high level addition (handles signs) */
648 int mp_add (mp_int * a, mp_int * b, mp_int * c)
650 int sa, sb, res;
652 /* get sign of both inputs */
653 sa = a->sign;
654 sb = b->sign;
656 /* handle two cases, not four */
657 if (sa == sb) {
658 /* both positive or both negative */
659 /* add their magnitudes, copy the sign */
660 c->sign = sa;
661 res = s_mp_add (a, b, c);
662 } else {
663 /* one positive, the other negative */
664 /* subtract the one with the greater magnitude from */
665 /* the one of the lesser magnitude. The result gets */
666 /* the sign of the one with the greater magnitude. */
667 if (mp_cmp_mag (a, b) == MP_LT) {
668 c->sign = sb;
669 res = s_mp_sub (b, a, c);
670 } else {
671 c->sign = sa;
672 res = s_mp_sub (a, b, c);
675 return res;
679 /* single digit addition */
681 mp_add_d (mp_int * a, mp_digit b, mp_int * c)
683 int res, ix, oldused;
684 mp_digit *tmpa, *tmpc, mu;
686 /* grow c as required */
687 if (c->alloc < a->used + 1) {
688 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
689 return res;
693 /* if a is negative and |a| >= b, call c = |a| - b */
694 if (a->sign == MP_NEG && (a->used > 1 || a->dp[0] >= b)) {
695 /* temporarily fix sign of a */
696 a->sign = MP_ZPOS;
698 /* c = |a| - b */
699 res = mp_sub_d(a, b, c);
701 /* fix sign */
702 a->sign = c->sign = MP_NEG;
704 return res;
707 /* old number of used digits in c */
708 oldused = c->used;
710 /* sign always positive */
711 c->sign = MP_ZPOS;
713 /* source alias */
714 tmpa = a->dp;
716 /* destination alias */
717 tmpc = c->dp;
719 /* if a is positive */
720 if (a->sign == MP_ZPOS) {
721 /* add digit, after this we're propagating
722 * the carry.
724 *tmpc = *tmpa++ + b;
725 mu = *tmpc >> DIGIT_BIT;
726 *tmpc++ &= MP_MASK;
728 /* now handle rest of the digits */
729 for (ix = 1; ix < a->used; ix++) {
730 *tmpc = *tmpa++ + mu;
731 mu = *tmpc >> DIGIT_BIT;
732 *tmpc++ &= MP_MASK;
734 /* set final carry */
735 ix++;
736 *tmpc++ = mu;
738 /* setup size */
739 c->used = a->used + 1;
740 } else {
741 /* a was negative and |a| < b */
742 c->used = 1;
744 /* the result is a single digit */
745 if (a->used == 1) {
746 *tmpc++ = b - a->dp[0];
747 } else {
748 *tmpc++ = b;
751 /* setup count so the clearing of oldused
752 * can fall through correctly
754 ix = 1;
757 /* now zero to oldused */
758 while (ix++ < oldused) {
759 *tmpc++ = 0;
761 mp_clamp(c);
763 return MP_OKAY;
766 /* trim unused digits
768 * This is used to ensure that leading zero digits are
769 * trimed and the leading "used" digit will be non-zero
770 * Typically very fast. Also fixes the sign if there
771 * are no more leading digits
773 void
774 mp_clamp (mp_int * a)
776 /* decrease used while the most significant digit is
777 * zero.
779 while (a->used > 0 && a->dp[a->used - 1] == 0) {
780 --(a->used);
783 /* reset the sign flag if used == 0 */
784 if (a->used == 0) {
785 a->sign = MP_ZPOS;
789 /* clear one (frees) */
790 void
791 mp_clear (mp_int * a)
793 int i;
795 /* only do anything if a hasn't been freed previously */
796 if (a->dp != NULL) {
797 /* first zero the digits */
798 for (i = 0; i < a->used; i++) {
799 a->dp[i] = 0;
802 /* free ram */
803 free(a->dp);
805 /* reset members to make debugging easier */
806 a->dp = NULL;
807 a->alloc = a->used = 0;
808 a->sign = MP_ZPOS;
813 void mp_clear_multi(mp_int *mp, ...)
815 mp_int* next_mp = mp;
816 va_list args;
817 va_start(args, mp);
818 while (next_mp != NULL) {
819 mp_clear(next_mp);
820 next_mp = va_arg(args, mp_int*);
822 va_end(args);
825 /* compare two ints (signed)*/
827 mp_cmp (const mp_int * a, const mp_int * b)
829 /* compare based on sign */
830 if (a->sign != b->sign) {
831 if (a->sign == MP_NEG) {
832 return MP_LT;
833 } else {
834 return MP_GT;
838 /* compare digits */
839 if (a->sign == MP_NEG) {
840 /* if negative compare opposite direction */
841 return mp_cmp_mag(b, a);
842 } else {
843 return mp_cmp_mag(a, b);
847 /* compare a digit */
848 int mp_cmp_d(const mp_int * a, mp_digit b)
850 /* compare based on sign */
851 if (a->sign == MP_NEG) {
852 return MP_LT;
855 /* compare based on magnitude */
856 if (a->used > 1) {
857 return MP_GT;
860 /* compare the only digit of a to b */
861 if (a->dp[0] > b) {
862 return MP_GT;
863 } else if (a->dp[0] < b) {
864 return MP_LT;
865 } else {
866 return MP_EQ;
870 /* compare maginitude of two ints (unsigned) */
871 int mp_cmp_mag (const mp_int * a, const mp_int * b)
873 int n;
874 mp_digit *tmpa, *tmpb;
876 /* compare based on # of non-zero digits */
877 if (a->used > b->used) {
878 return MP_GT;
881 if (a->used < b->used) {
882 return MP_LT;
885 /* alias for a */
886 tmpa = a->dp + (a->used - 1);
888 /* alias for b */
889 tmpb = b->dp + (a->used - 1);
891 /* compare based on digits */
892 for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {
893 if (*tmpa > *tmpb) {
894 return MP_GT;
897 if (*tmpa < *tmpb) {
898 return MP_LT;
901 return MP_EQ;
904 static const int lnz[16] = {
905 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
908 /* Counts the number of lsbs which are zero before the first zero bit */
909 int mp_cnt_lsb(const mp_int *a)
911 int x;
912 mp_digit q, qq;
914 /* easy out */
915 if (mp_iszero(a) == 1) {
916 return 0;
919 /* scan lower digits until non-zero */
920 for (x = 0; x < a->used && a->dp[x] == 0; x++);
921 q = a->dp[x];
922 x *= DIGIT_BIT;
924 /* now scan this digit until a 1 is found */
925 if ((q & 1) == 0) {
926 do {
927 qq = q & 15;
928 x += lnz[qq];
929 q >>= 4;
930 } while (qq == 0);
932 return x;
935 /* copy, b = a */
937 mp_copy (const mp_int * a, mp_int * b)
939 int res, n;
941 /* if dst == src do nothing */
942 if (a == b) {
943 return MP_OKAY;
946 /* grow dest */
947 if (b->alloc < a->used) {
948 if ((res = mp_grow (b, a->used)) != MP_OKAY) {
949 return res;
953 /* zero b and copy the parameters over */
955 register mp_digit *tmpa, *tmpb;
957 /* pointer aliases */
959 /* source */
960 tmpa = a->dp;
962 /* destination */
963 tmpb = b->dp;
965 /* copy all the digits */
966 for (n = 0; n < a->used; n++) {
967 *tmpb++ = *tmpa++;
970 /* clear high digits */
971 for (; n < b->used; n++) {
972 *tmpb++ = 0;
976 /* copy used count and sign */
977 b->used = a->used;
978 b->sign = a->sign;
979 return MP_OKAY;
982 /* returns the number of bits in an int */
984 mp_count_bits (const mp_int * a)
986 int r;
987 mp_digit q;
989 /* shortcut */
990 if (a->used == 0) {
991 return 0;
994 /* get number of digits and add that */
995 r = (a->used - 1) * DIGIT_BIT;
997 /* take the last digit and count the bits in it */
998 q = a->dp[a->used - 1];
999 while (q > ((mp_digit) 0)) {
1000 ++r;
1001 q >>= ((mp_digit) 1);
1003 return r;
1006 /* integer signed division.
1007 * c*b + d == a [e.g. a/b, c=quotient, d=remainder]
1008 * HAC pp.598 Algorithm 14.20
1010 * Note that the description in HAC is horribly
1011 * incomplete. For example, it doesn't consider
1012 * the case where digits are removed from 'x' in
1013 * the inner loop. It also doesn't consider the
1014 * case that y has fewer than three digits, etc..
1016 * The overall algorithm is as described as
1017 * 14.20 from HAC but fixed to treat these cases.
1019 int mp_div (const mp_int * a, const mp_int * b, mp_int * c, mp_int * d)
1021 mp_int q, x, y, t1, t2;
1022 int res, n, t, i, norm, neg;
1024 /* is divisor zero ? */
1025 if (mp_iszero (b) == 1) {
1026 return MP_VAL;
1029 /* if a < b then q=0, r = a */
1030 if (mp_cmp_mag (a, b) == MP_LT) {
1031 if (d != NULL) {
1032 res = mp_copy (a, d);
1033 } else {
1034 res = MP_OKAY;
1036 if (c != NULL) {
1037 mp_zero (c);
1039 return res;
1042 if ((res = mp_init_size (&q, a->used + 2)) != MP_OKAY) {
1043 return res;
1045 q.used = a->used + 2;
1047 if ((res = mp_init (&t1)) != MP_OKAY) {
1048 goto __Q;
1051 if ((res = mp_init (&t2)) != MP_OKAY) {
1052 goto __T1;
1055 if ((res = mp_init_copy (&x, a)) != MP_OKAY) {
1056 goto __T2;
1059 if ((res = mp_init_copy (&y, b)) != MP_OKAY) {
1060 goto __X;
1063 /* fix the sign */
1064 neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
1065 x.sign = y.sign = MP_ZPOS;
1067 /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */
1068 norm = mp_count_bits(&y) % DIGIT_BIT;
1069 if (norm < (int)(DIGIT_BIT-1)) {
1070 norm = (DIGIT_BIT-1) - norm;
1071 if ((res = mp_mul_2d (&x, norm, &x)) != MP_OKAY) {
1072 goto __Y;
1074 if ((res = mp_mul_2d (&y, norm, &y)) != MP_OKAY) {
1075 goto __Y;
1077 } else {
1078 norm = 0;
1081 /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */
1082 n = x.used - 1;
1083 t = y.used - 1;
1085 /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */
1086 if ((res = mp_lshd (&y, n - t)) != MP_OKAY) { /* y = y*b**{n-t} */
1087 goto __Y;
1090 while (mp_cmp (&x, &y) != MP_LT) {
1091 ++(q.dp[n - t]);
1092 if ((res = mp_sub (&x, &y, &x)) != MP_OKAY) {
1093 goto __Y;
1097 /* reset y by shifting it back down */
1098 mp_rshd (&y, n - t);
1100 /* step 3. for i from n down to (t + 1) */
1101 for (i = n; i >= (t + 1); i--) {
1102 if (i > x.used) {
1103 continue;
1106 /* step 3.1 if xi == yt then set q{i-t-1} to b-1,
1107 * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */
1108 if (x.dp[i] == y.dp[t]) {
1109 q.dp[i - t - 1] = ((((mp_digit)1) << DIGIT_BIT) - 1);
1110 } else {
1111 mp_word tmp;
1112 tmp = ((mp_word) x.dp[i]) << ((mp_word) DIGIT_BIT);
1113 tmp |= ((mp_word) x.dp[i - 1]);
1114 tmp /= ((mp_word) y.dp[t]);
1115 if (tmp > (mp_word) MP_MASK)
1116 tmp = MP_MASK;
1117 q.dp[i - t - 1] = (mp_digit) (tmp & (mp_word) (MP_MASK));
1120 /* while (q{i-t-1} * (yt * b + y{t-1})) >
1121 xi * b**2 + xi-1 * b + xi-2
1123 do q{i-t-1} -= 1;
1125 q.dp[i - t - 1] = (q.dp[i - t - 1] + 1) & MP_MASK;
1126 do {
1127 q.dp[i - t - 1] = (q.dp[i - t - 1] - 1) & MP_MASK;
1129 /* find left hand */
1130 mp_zero (&t1);
1131 t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1];
1132 t1.dp[1] = y.dp[t];
1133 t1.used = 2;
1134 if ((res = mp_mul_d (&t1, q.dp[i - t - 1], &t1)) != MP_OKAY) {
1135 goto __Y;
1138 /* find right hand */
1139 t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2];
1140 t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1];
1141 t2.dp[2] = x.dp[i];
1142 t2.used = 3;
1143 } while (mp_cmp_mag(&t1, &t2) == MP_GT);
1145 /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */
1146 if ((res = mp_mul_d (&y, q.dp[i - t - 1], &t1)) != MP_OKAY) {
1147 goto __Y;
1150 if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
1151 goto __Y;
1154 if ((res = mp_sub (&x, &t1, &x)) != MP_OKAY) {
1155 goto __Y;
1158 /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */
1159 if (x.sign == MP_NEG) {
1160 if ((res = mp_copy (&y, &t1)) != MP_OKAY) {
1161 goto __Y;
1163 if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
1164 goto __Y;
1166 if ((res = mp_add (&x, &t1, &x)) != MP_OKAY) {
1167 goto __Y;
1170 q.dp[i - t - 1] = (q.dp[i - t - 1] - 1UL) & MP_MASK;
1174 /* now q is the quotient and x is the remainder
1175 * [which we have to normalize]
1178 /* get sign before writing to c */
1179 x.sign = x.used == 0 ? MP_ZPOS : a->sign;
1181 if (c != NULL) {
1182 mp_clamp (&q);
1183 mp_exch (&q, c);
1184 c->sign = neg;
1187 if (d != NULL) {
1188 mp_div_2d (&x, norm, &x, NULL);
1189 mp_exch (&x, d);
1192 res = MP_OKAY;
1194 __Y:mp_clear (&y);
1195 __X:mp_clear (&x);
1196 __T2:mp_clear (&t2);
1197 __T1:mp_clear (&t1);
1198 __Q:mp_clear (&q);
1199 return res;
1202 /* b = a/2 */
1203 int mp_div_2(const mp_int * a, mp_int * b)
1205 int x, res, oldused;
1207 /* copy */
1208 if (b->alloc < a->used) {
1209 if ((res = mp_grow (b, a->used)) != MP_OKAY) {
1210 return res;
1214 oldused = b->used;
1215 b->used = a->used;
1217 register mp_digit r, rr, *tmpa, *tmpb;
1219 /* source alias */
1220 tmpa = a->dp + b->used - 1;
1222 /* dest alias */
1223 tmpb = b->dp + b->used - 1;
1225 /* carry */
1226 r = 0;
1227 for (x = b->used - 1; x >= 0; x--) {
1228 /* get the carry for the next iteration */
1229 rr = *tmpa & 1;
1231 /* shift the current digit, add in carry and store */
1232 *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
1234 /* forward carry to next iteration */
1235 r = rr;
1238 /* zero excess digits */
1239 tmpb = b->dp + b->used;
1240 for (x = b->used; x < oldused; x++) {
1241 *tmpb++ = 0;
1244 b->sign = a->sign;
1245 mp_clamp (b);
1246 return MP_OKAY;
1249 /* shift right by a certain bit count (store quotient in c, optional remainder in d) */
1250 int mp_div_2d (const mp_int * a, int b, mp_int * c, mp_int * d)
1252 mp_digit D, r, rr;
1253 int x, res;
1254 mp_int t;
1257 /* if the shift count is <= 0 then we do no work */
1258 if (b <= 0) {
1259 res = mp_copy (a, c);
1260 if (d != NULL) {
1261 mp_zero (d);
1263 return res;
1266 if ((res = mp_init (&t)) != MP_OKAY) {
1267 return res;
1270 /* get the remainder */
1271 if (d != NULL) {
1272 if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) {
1273 mp_clear (&t);
1274 return res;
1278 /* copy */
1279 if ((res = mp_copy (a, c)) != MP_OKAY) {
1280 mp_clear (&t);
1281 return res;
1284 /* shift by as many digits in the bit count */
1285 if (b >= (int)DIGIT_BIT) {
1286 mp_rshd (c, b / DIGIT_BIT);
1289 /* shift any bit count < DIGIT_BIT */
1290 D = (mp_digit) (b % DIGIT_BIT);
1291 if (D != 0) {
1292 register mp_digit *tmpc, mask, shift;
1294 /* mask */
1295 mask = (((mp_digit)1) << D) - 1;
1297 /* shift for lsb */
1298 shift = DIGIT_BIT - D;
1300 /* alias */
1301 tmpc = c->dp + (c->used - 1);
1303 /* carry */
1304 r = 0;
1305 for (x = c->used - 1; x >= 0; x--) {
1306 /* get the lower bits of this word in a temp */
1307 rr = *tmpc & mask;
1309 /* shift the current word and mix in the carry bits from the previous word */
1310 *tmpc = (*tmpc >> D) | (r << shift);
1311 --tmpc;
1313 /* set the carry to the carry bits of the current word found above */
1314 r = rr;
1317 mp_clamp (c);
1318 if (d != NULL) {
1319 mp_exch (&t, d);
1321 mp_clear (&t);
1322 return MP_OKAY;
1325 static int s_is_power_of_two(mp_digit b, int *p)
1327 int x;
1329 for (x = 1; x < DIGIT_BIT; x++) {
1330 if (b == (((mp_digit)1)<<x)) {
1331 *p = x;
1332 return 1;
1335 return 0;
1338 /* single digit division (based on routine from MPI) */
1339 int mp_div_d (const mp_int * a, mp_digit b, mp_int * c, mp_digit * d)
1341 mp_int q;
1342 mp_word w;
1343 mp_digit t;
1344 int res, ix;
1346 /* cannot divide by zero */
1347 if (b == 0) {
1348 return MP_VAL;
1351 /* quick outs */
1352 if (b == 1 || mp_iszero(a) == 1) {
1353 if (d != NULL) {
1354 *d = 0;
1356 if (c != NULL) {
1357 return mp_copy(a, c);
1359 return MP_OKAY;
1362 /* power of two ? */
1363 if (s_is_power_of_two(b, &ix) == 1) {
1364 if (d != NULL) {
1365 *d = a->dp[0] & ((((mp_digit)1)<<ix) - 1);
1367 if (c != NULL) {
1368 return mp_div_2d(a, ix, c, NULL);
1370 return MP_OKAY;
1373 /* no easy answer [c'est la vie]. Just division */
1374 if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
1375 return res;
1378 q.used = a->used;
1379 q.sign = a->sign;
1380 w = 0;
1381 for (ix = a->used - 1; ix >= 0; ix--) {
1382 w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
1384 if (w >= b) {
1385 t = (mp_digit)(w / b);
1386 w -= ((mp_word)t) * ((mp_word)b);
1387 } else {
1388 t = 0;
1390 q.dp[ix] = (mp_digit)t;
1393 if (d != NULL) {
1394 *d = (mp_digit)w;
1397 if (c != NULL) {
1398 mp_clamp(&q);
1399 mp_exch(&q, c);
1401 mp_clear(&q);
1403 return res;
1406 /* reduce "x" in place modulo "n" using the Diminished Radix algorithm.
1408 * Based on algorithm from the paper
1410 * "Generating Efficient Primes for Discrete Log Cryptosystems"
1411 * Chae Hoon Lim, Pil Loong Lee,
1412 * POSTECH Information Research Laboratories
1414 * The modulus must be of a special format [see manual]
1416 * Has been modified to use algorithm 7.10 from the LTM book instead
1418 * Input x must be in the range 0 <= x <= (n-1)**2
1421 mp_dr_reduce (mp_int * x, const mp_int * n, mp_digit k)
1423 int err, i, m;
1424 mp_word r;
1425 mp_digit mu, *tmpx1, *tmpx2;
1427 /* m = digits in modulus */
1428 m = n->used;
1430 /* ensure that "x" has at least 2m digits */
1431 if (x->alloc < m + m) {
1432 if ((err = mp_grow (x, m + m)) != MP_OKAY) {
1433 return err;
1437 /* top of loop, this is where the code resumes if
1438 * another reduction pass is required.
1440 top:
1441 /* aliases for digits */
1442 /* alias for lower half of x */
1443 tmpx1 = x->dp;
1445 /* alias for upper half of x, or x/B**m */
1446 tmpx2 = x->dp + m;
1448 /* set carry to zero */
1449 mu = 0;
1451 /* compute (x mod B**m) + k * [x/B**m] inline and inplace */
1452 for (i = 0; i < m; i++) {
1453 r = ((mp_word)*tmpx2++) * ((mp_word)k) + *tmpx1 + mu;
1454 *tmpx1++ = (mp_digit)(r & MP_MASK);
1455 mu = (mp_digit)(r >> ((mp_word)DIGIT_BIT));
1458 /* set final carry */
1459 *tmpx1++ = mu;
1461 /* zero words above m */
1462 for (i = m + 1; i < x->used; i++) {
1463 *tmpx1++ = 0;
1466 /* clamp, sub and return */
1467 mp_clamp (x);
1469 /* if x >= n then subtract and reduce again
1470 * Each successive "recursion" makes the input smaller and smaller.
1472 if (mp_cmp_mag (x, n) != MP_LT) {
1473 s_mp_sub(x, n, x);
1474 goto top;
1476 return MP_OKAY;
1479 /* determines the setup value */
1480 void mp_dr_setup(const mp_int *a, mp_digit *d)
1482 /* the casts are required if DIGIT_BIT is one less than
1483 * the number of bits in a mp_digit [e.g. DIGIT_BIT==31]
1485 *d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) -
1486 ((mp_word)a->dp[0]));
1489 /* swap the elements of two integers, for cases where you can't simply swap the
1490 * mp_int pointers around
1492 void
1493 mp_exch (mp_int * a, mp_int * b)
1495 mp_int t;
1497 t = *a;
1498 *a = *b;
1499 *b = t;
1502 /* this is a shell function that calls either the normal or Montgomery
1503 * exptmod functions. Originally the call to the montgomery code was
1504 * embedded in the normal function but that wasted a lot of stack space
1505 * for nothing (since 99% of the time the Montgomery code would be called)
1507 int mp_exptmod (const mp_int * G, const mp_int * X, mp_int * P, mp_int * Y)
1509 int dr;
1511 /* modulus P must be positive */
1512 if (P->sign == MP_NEG) {
1513 return MP_VAL;
1516 /* if exponent X is negative we have to recurse */
1517 if (X->sign == MP_NEG) {
1518 mp_int tmpG, tmpX;
1519 int err;
1521 /* first compute 1/G mod P */
1522 if ((err = mp_init(&tmpG)) != MP_OKAY) {
1523 return err;
1525 if ((err = mp_invmod(G, P, &tmpG)) != MP_OKAY) {
1526 mp_clear(&tmpG);
1527 return err;
1530 /* now get |X| */
1531 if ((err = mp_init(&tmpX)) != MP_OKAY) {
1532 mp_clear(&tmpG);
1533 return err;
1535 if ((err = mp_abs(X, &tmpX)) != MP_OKAY) {
1536 mp_clear_multi(&tmpG, &tmpX, NULL);
1537 return err;
1540 /* and now compute (1/G)**|X| instead of G**X [X < 0] */
1541 err = mp_exptmod(&tmpG, &tmpX, P, Y);
1542 mp_clear_multi(&tmpG, &tmpX, NULL);
1543 return err;
1546 dr = 0;
1548 /* if the modulus is odd or dr != 0 use the fast method */
1549 if (mp_isodd (P) == 1 || dr != 0) {
1550 return mp_exptmod_fast (G, X, P, Y, dr);
1551 } else {
1552 /* otherwise use the generic Barrett reduction technique */
1553 return s_mp_exptmod (G, X, P, Y);
1557 /* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
1559 * Uses a left-to-right k-ary sliding window to compute the modular exponentiation.
1560 * The value of k changes based on the size of the exponent.
1562 * Uses Montgomery or Diminished Radix reduction [whichever appropriate]
1566 mp_exptmod_fast (const mp_int * G, const mp_int * X, mp_int * P, mp_int * Y, int redmode)
1568 mp_int M[256], res;
1569 mp_digit buf, mp;
1570 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
1572 /* use a pointer to the reduction algorithm. This allows us to use
1573 * one of many reduction algorithms without modding the guts of
1574 * the code with if statements everywhere.
1576 int (*redux)(mp_int*,const mp_int*,mp_digit);
1578 /* find window size */
1579 x = mp_count_bits (X);
1580 if (x <= 7) {
1581 winsize = 2;
1582 } else if (x <= 36) {
1583 winsize = 3;
1584 } else if (x <= 140) {
1585 winsize = 4;
1586 } else if (x <= 450) {
1587 winsize = 5;
1588 } else if (x <= 1303) {
1589 winsize = 6;
1590 } else if (x <= 3529) {
1591 winsize = 7;
1592 } else {
1593 winsize = 8;
1596 /* init M array */
1597 /* init first cell */
1598 if ((err = mp_init(&M[1])) != MP_OKAY) {
1599 return err;
1602 /* now init the second half of the array */
1603 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
1604 if ((err = mp_init(&M[x])) != MP_OKAY) {
1605 for (y = 1<<(winsize-1); y < x; y++) {
1606 mp_clear (&M[y]);
1608 mp_clear(&M[1]);
1609 return err;
1613 /* determine and setup reduction code */
1614 if (redmode == 0) {
1615 /* now setup montgomery */
1616 if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
1617 goto __M;
1620 /* automatically pick the comba one if available (saves quite a few calls/ifs) */
1621 if (((P->used * 2 + 1) < MP_WARRAY) &&
1622 P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
1623 redux = fast_mp_montgomery_reduce;
1624 } else {
1625 /* use slower baseline Montgomery method */
1626 redux = mp_montgomery_reduce;
1628 } else if (redmode == 1) {
1629 /* setup DR reduction for moduli of the form B**k - b */
1630 mp_dr_setup(P, &mp);
1631 redux = mp_dr_reduce;
1632 } else {
1633 /* setup DR reduction for moduli of the form 2**k - b */
1634 if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
1635 goto __M;
1637 redux = mp_reduce_2k;
1640 /* setup result */
1641 if ((err = mp_init (&res)) != MP_OKAY) {
1642 goto __M;
1645 /* create M table
1649 * The first half of the table is not computed though accept for M[0] and M[1]
1652 if (redmode == 0) {
1653 /* now we need R mod m */
1654 if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) {
1655 goto __RES;
1658 /* now set M[1] to G * R mod m */
1659 if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) {
1660 goto __RES;
1662 } else {
1663 mp_set(&res, 1);
1664 if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
1665 goto __RES;
1669 /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
1670 if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
1671 goto __RES;
1674 for (x = 0; x < (winsize - 1); x++) {
1675 if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) {
1676 goto __RES;
1678 if ((err = redux (&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) {
1679 goto __RES;
1683 /* create upper table */
1684 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
1685 if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
1686 goto __RES;
1688 if ((err = redux (&M[x], P, mp)) != MP_OKAY) {
1689 goto __RES;
1693 /* set initial mode and bit cnt */
1694 mode = 0;
1695 bitcnt = 1;
1696 buf = 0;
1697 digidx = X->used - 1;
1698 bitcpy = 0;
1699 bitbuf = 0;
1701 for (;;) {
1702 /* grab next digit as required */
1703 if (--bitcnt == 0) {
1704 /* if digidx == -1 we are out of digits so break */
1705 if (digidx == -1) {
1706 break;
1708 /* read next digit and reset bitcnt */
1709 buf = X->dp[digidx--];
1710 bitcnt = (int)DIGIT_BIT;
1713 /* grab the next msb from the exponent */
1714 y = (mp_digit)(buf >> (DIGIT_BIT - 1)) & 1;
1715 buf <<= (mp_digit)1;
1717 /* if the bit is zero and mode == 0 then we ignore it
1718 * These represent the leading zero bits before the first 1 bit
1719 * in the exponent. Technically this opt is not required but it
1720 * does lower the # of trivial squaring/reductions used
1722 if (mode == 0 && y == 0) {
1723 continue;
1726 /* if the bit is zero and mode == 1 then we square */
1727 if (mode == 1 && y == 0) {
1728 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
1729 goto __RES;
1731 if ((err = redux (&res, P, mp)) != MP_OKAY) {
1732 goto __RES;
1734 continue;
1737 /* else we add it to the window */
1738 bitbuf |= (y << (winsize - ++bitcpy));
1739 mode = 2;
1741 if (bitcpy == winsize) {
1742 /* ok window is filled so square as required and multiply */
1743 /* square first */
1744 for (x = 0; x < winsize; x++) {
1745 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
1746 goto __RES;
1748 if ((err = redux (&res, P, mp)) != MP_OKAY) {
1749 goto __RES;
1753 /* then multiply */
1754 if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
1755 goto __RES;
1757 if ((err = redux (&res, P, mp)) != MP_OKAY) {
1758 goto __RES;
1761 /* empty window and reset */
1762 bitcpy = 0;
1763 bitbuf = 0;
1764 mode = 1;
1768 /* if bits remain then square/multiply */
1769 if (mode == 2 && bitcpy > 0) {
1770 /* square then multiply if the bit is set */
1771 for (x = 0; x < bitcpy; x++) {
1772 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
1773 goto __RES;
1775 if ((err = redux (&res, P, mp)) != MP_OKAY) {
1776 goto __RES;
1779 /* get next bit of the window */
1780 bitbuf <<= 1;
1781 if ((bitbuf & (1 << winsize)) != 0) {
1782 /* then multiply */
1783 if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
1784 goto __RES;
1786 if ((err = redux (&res, P, mp)) != MP_OKAY) {
1787 goto __RES;
1793 if (redmode == 0) {
1794 /* fixup result if Montgomery reduction is used
1795 * recall that any value in a Montgomery system is
1796 * actually multiplied by R mod n. So we have
1797 * to reduce one more time to cancel out the factor
1798 * of R.
1800 if ((err = redux(&res, P, mp)) != MP_OKAY) {
1801 goto __RES;
1805 /* swap res with Y */
1806 mp_exch (&res, Y);
1807 err = MP_OKAY;
1808 __RES:mp_clear (&res);
1809 __M:
1810 mp_clear(&M[1]);
1811 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
1812 mp_clear (&M[x]);
1814 return err;
1817 /* Greatest Common Divisor using the binary method */
1818 int mp_gcd (const mp_int * a, const mp_int * b, mp_int * c)
1820 mp_int u, v;
1821 int k, u_lsb, v_lsb, res;
1823 /* either zero than gcd is the largest */
1824 if (mp_iszero (a) == 1 && mp_iszero (b) == 0) {
1825 return mp_abs (b, c);
1827 if (mp_iszero (a) == 0 && mp_iszero (b) == 1) {
1828 return mp_abs (a, c);
1831 /* optimized. At this point if a == 0 then
1832 * b must equal zero too
1834 if (mp_iszero (a) == 1) {
1835 mp_zero(c);
1836 return MP_OKAY;
1839 /* get copies of a and b we can modify */
1840 if ((res = mp_init_copy (&u, a)) != MP_OKAY) {
1841 return res;
1844 if ((res = mp_init_copy (&v, b)) != MP_OKAY) {
1845 goto __U;
1848 /* must be positive for the remainder of the algorithm */
1849 u.sign = v.sign = MP_ZPOS;
1851 /* B1. Find the common power of two for u and v */
1852 u_lsb = mp_cnt_lsb(&u);
1853 v_lsb = mp_cnt_lsb(&v);
1854 k = MIN(u_lsb, v_lsb);
1856 if (k > 0) {
1857 /* divide the power of two out */
1858 if ((res = mp_div_2d(&u, k, &u, NULL)) != MP_OKAY) {
1859 goto __V;
1862 if ((res = mp_div_2d(&v, k, &v, NULL)) != MP_OKAY) {
1863 goto __V;
1867 /* divide any remaining factors of two out */
1868 if (u_lsb != k) {
1869 if ((res = mp_div_2d(&u, u_lsb - k, &u, NULL)) != MP_OKAY) {
1870 goto __V;
1874 if (v_lsb != k) {
1875 if ((res = mp_div_2d(&v, v_lsb - k, &v, NULL)) != MP_OKAY) {
1876 goto __V;
1880 while (mp_iszero(&v) == 0) {
1881 /* make sure v is the largest */
1882 if (mp_cmp_mag(&u, &v) == MP_GT) {
1883 /* swap u and v to make sure v is >= u */
1884 mp_exch(&u, &v);
1887 /* subtract smallest from largest */
1888 if ((res = s_mp_sub(&v, &u, &v)) != MP_OKAY) {
1889 goto __V;
1892 /* Divide out all factors of two */
1893 if ((res = mp_div_2d(&v, mp_cnt_lsb(&v), &v, NULL)) != MP_OKAY) {
1894 goto __V;
1898 /* multiply by 2**k which we divided out at the beginning */
1899 if ((res = mp_mul_2d (&u, k, c)) != MP_OKAY) {
1900 goto __V;
1902 c->sign = MP_ZPOS;
1903 res = MP_OKAY;
1904 __V:mp_clear (&u);
1905 __U:mp_clear (&v);
1906 return res;
1909 /* get the lower 32-bits of an mp_int */
1910 unsigned long mp_get_int(const mp_int * a)
1912 int i;
1913 unsigned long res;
1915 if (a->used == 0) {
1916 return 0;
1919 /* get number of digits of the lsb we have to read */
1920 i = MIN(a->used,(int)((sizeof(unsigned long)*CHAR_BIT+DIGIT_BIT-1)/DIGIT_BIT))-1;
1922 /* get most significant digit of result */
1923 res = DIGIT(a,i);
1925 while (--i >= 0) {
1926 res = (res << DIGIT_BIT) | DIGIT(a,i);
1929 /* force result to 32-bits always so it is consistent on non 32-bit platforms */
1930 return res & 0xFFFFFFFFUL;
1933 /* grow as required */
1934 int mp_grow (mp_int * a, int size)
1936 int i;
1937 mp_digit *tmp;
1939 /* if the alloc size is smaller alloc more ram */
1940 if (a->alloc < size) {
1941 /* ensure there are always at least MP_PREC digits extra on top */
1942 size += (MP_PREC * 2) - (size % MP_PREC);
1944 /* reallocate the array a->dp
1946 * We store the return in a temporary variable
1947 * in case the operation failed we don't want
1948 * to overwrite the dp member of a.
1950 tmp = realloc (a->dp, sizeof (mp_digit) * size);
1951 if (tmp == NULL) {
1952 /* reallocation failed but "a" is still valid [can be freed] */
1953 return MP_MEM;
1956 /* reallocation succeeded so set a->dp */
1957 a->dp = tmp;
1959 /* zero excess digits */
1960 i = a->alloc;
1961 a->alloc = size;
1962 for (; i < a->alloc; i++) {
1963 a->dp[i] = 0;
1966 return MP_OKAY;
1969 /* init a new mp_int */
1970 int mp_init (mp_int * a)
1972 int i;
1974 /* allocate memory required and clear it */
1975 a->dp = malloc (sizeof (mp_digit) * MP_PREC);
1976 if (a->dp == NULL) {
1977 return MP_MEM;
1980 /* set the digits to zero */
1981 for (i = 0; i < MP_PREC; i++) {
1982 a->dp[i] = 0;
1985 /* set the used to zero, allocated digits to the default precision
1986 * and sign to positive */
1987 a->used = 0;
1988 a->alloc = MP_PREC;
1989 a->sign = MP_ZPOS;
1991 return MP_OKAY;
1994 /* creates "a" then copies b into it */
1995 int mp_init_copy (mp_int * a, const mp_int * b)
1997 int res;
1999 if ((res = mp_init (a)) != MP_OKAY) {
2000 return res;
2002 return mp_copy (b, a);
2005 int mp_init_multi(mp_int *mp, ...)
2007 mp_err res = MP_OKAY; /* Assume ok until proven otherwise */
2008 int n = 0; /* Number of ok inits */
2009 mp_int* cur_arg = mp;
2010 va_list args;
2012 va_start(args, mp); /* init args to next argument from caller */
2013 while (cur_arg != NULL) {
2014 if (mp_init(cur_arg) != MP_OKAY) {
2015 /* Oops - error! Back-track and mp_clear what we already
2016 succeeded in init-ing, then return error.
2018 va_list clean_args;
2020 /* end the current list */
2021 va_end(args);
2023 /* now start cleaning up */
2024 cur_arg = mp;
2025 va_start(clean_args, mp);
2026 while (n--) {
2027 mp_clear(cur_arg);
2028 cur_arg = va_arg(clean_args, mp_int*);
2030 va_end(clean_args);
2031 res = MP_MEM;
2032 break;
2034 n++;
2035 cur_arg = va_arg(args, mp_int*);
2037 va_end(args);
2038 return res; /* Assumed ok, if error flagged above. */
2041 /* init an mp_init for a given size */
2042 int mp_init_size (mp_int * a, int size)
2044 int x;
2046 /* pad size so there are always extra digits */
2047 size += (MP_PREC * 2) - (size % MP_PREC);
2049 /* alloc mem */
2050 a->dp = malloc (sizeof (mp_digit) * size);
2051 if (a->dp == NULL) {
2052 return MP_MEM;
2055 /* set the members */
2056 a->used = 0;
2057 a->alloc = size;
2058 a->sign = MP_ZPOS;
2060 /* zero the digits */
2061 for (x = 0; x < size; x++) {
2062 a->dp[x] = 0;
2065 return MP_OKAY;
2068 /* hac 14.61, pp608 */
2069 int mp_invmod (const mp_int * a, mp_int * b, mp_int * c)
2071 /* b cannot be negative */
2072 if (b->sign == MP_NEG || mp_iszero(b) == 1) {
2073 return MP_VAL;
2076 /* if the modulus is odd we can use a faster routine instead */
2077 if (mp_isodd (b) == 1) {
2078 return fast_mp_invmod (a, b, c);
2081 return mp_invmod_slow(a, b, c);
2084 /* hac 14.61, pp608 */
2085 int mp_invmod_slow (const mp_int * a, mp_int * b, mp_int * c)
2087 mp_int x, y, u, v, A, B, C, D;
2088 int res;
2090 /* b cannot be negative */
2091 if (b->sign == MP_NEG || mp_iszero(b) == 1) {
2092 return MP_VAL;
2095 /* init temps */
2096 if ((res = mp_init_multi(&x, &y, &u, &v,
2097 &A, &B, &C, &D, NULL)) != MP_OKAY) {
2098 return res;
2101 /* x = a, y = b */
2102 if ((res = mp_copy (a, &x)) != MP_OKAY) {
2103 goto __ERR;
2105 if ((res = mp_copy (b, &y)) != MP_OKAY) {
2106 goto __ERR;
2109 /* 2. [modified] if x,y are both even then return an error! */
2110 if (mp_iseven (&x) == 1 && mp_iseven (&y) == 1) {
2111 res = MP_VAL;
2112 goto __ERR;
2115 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
2116 if ((res = mp_copy (&x, &u)) != MP_OKAY) {
2117 goto __ERR;
2119 if ((res = mp_copy (&y, &v)) != MP_OKAY) {
2120 goto __ERR;
2122 mp_set (&A, 1);
2123 mp_set (&D, 1);
2125 top:
2126 /* 4. while u is even do */
2127 while (mp_iseven (&u) == 1) {
2128 /* 4.1 u = u/2 */
2129 if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
2130 goto __ERR;
2132 /* 4.2 if A or B is odd then */
2133 if (mp_isodd (&A) == 1 || mp_isodd (&B) == 1) {
2134 /* A = (A+y)/2, B = (B-x)/2 */
2135 if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
2136 goto __ERR;
2138 if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
2139 goto __ERR;
2142 /* A = A/2, B = B/2 */
2143 if ((res = mp_div_2 (&A, &A)) != MP_OKAY) {
2144 goto __ERR;
2146 if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
2147 goto __ERR;
2151 /* 5. while v is even do */
2152 while (mp_iseven (&v) == 1) {
2153 /* 5.1 v = v/2 */
2154 if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
2155 goto __ERR;
2157 /* 5.2 if C or D is odd then */
2158 if (mp_isodd (&C) == 1 || mp_isodd (&D) == 1) {
2159 /* C = (C+y)/2, D = (D-x)/2 */
2160 if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
2161 goto __ERR;
2163 if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
2164 goto __ERR;
2167 /* C = C/2, D = D/2 */
2168 if ((res = mp_div_2 (&C, &C)) != MP_OKAY) {
2169 goto __ERR;
2171 if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
2172 goto __ERR;
2176 /* 6. if u >= v then */
2177 if (mp_cmp (&u, &v) != MP_LT) {
2178 /* u = u - v, A = A - C, B = B - D */
2179 if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
2180 goto __ERR;
2183 if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) {
2184 goto __ERR;
2187 if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
2188 goto __ERR;
2190 } else {
2191 /* v - v - u, C = C - A, D = D - B */
2192 if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
2193 goto __ERR;
2196 if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) {
2197 goto __ERR;
2200 if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
2201 goto __ERR;
2205 /* if not zero goto step 4 */
2206 if (mp_iszero (&u) == 0)
2207 goto top;
2209 /* now a = C, b = D, gcd == g*v */
2211 /* if v != 1 then there is no inverse */
2212 if (mp_cmp_d (&v, 1) != MP_EQ) {
2213 res = MP_VAL;
2214 goto __ERR;
2217 /* if its too low */
2218 while (mp_cmp_d(&C, 0) == MP_LT) {
2219 if ((res = mp_add(&C, b, &C)) != MP_OKAY) {
2220 goto __ERR;
2224 /* too big */
2225 while (mp_cmp_mag(&C, b) != MP_LT) {
2226 if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {
2227 goto __ERR;
2231 /* C is now the inverse */
2232 mp_exch (&C, c);
2233 res = MP_OKAY;
2234 __ERR:mp_clear_multi (&x, &y, &u, &v, &A, &B, &C, &D, NULL);
2235 return res;
2238 /* c = |a| * |b| using Karatsuba Multiplication using
2239 * three half size multiplications
2241 * Let B represent the radix [e.g. 2**DIGIT_BIT] and
2242 * let n represent half of the number of digits in
2243 * the min(a,b)
2245 * a = a1 * B**n + a0
2246 * b = b1 * B**n + b0
2248 * Then, a * b =>
2249 a1b1 * B**2n + ((a1 - a0)(b1 - b0) + a0b0 + a1b1) * B + a0b0
2251 * Note that a1b1 and a0b0 are used twice and only need to be
2252 * computed once. So in total three half size (half # of
2253 * digit) multiplications are performed, a0b0, a1b1 and
2254 * (a1-b1)(a0-b0)
2256 * Note that a multiplication of half the digits requires
2257 * 1/4th the number of single precision multiplications so in
2258 * total after one call 25% of the single precision multiplications
2259 * are saved. Note also that the call to mp_mul can end up back
2260 * in this function if the a0, a1, b0, or b1 are above the threshold.
2261 * This is known as divide-and-conquer and leads to the famous
2262 * O(N**lg(3)) or O(N**1.584) work which is asymptopically lower than
2263 * the standard O(N**2) that the baseline/comba methods use.
2264 * Generally though the overhead of this method doesn't pay off
2265 * until a certain size (N ~ 80) is reached.
2267 int mp_karatsuba_mul (const mp_int * a, const mp_int * b, mp_int * c)
2269 mp_int x0, x1, y0, y1, t1, x0y0, x1y1;
2270 int B, err;
2272 /* default the return code to an error */
2273 err = MP_MEM;
2275 /* min # of digits */
2276 B = MIN (a->used, b->used);
2278 /* now divide in two */
2279 B = B >> 1;
2281 /* init copy all the temps */
2282 if (mp_init_size (&x0, B) != MP_OKAY)
2283 goto ERR;
2284 if (mp_init_size (&x1, a->used - B) != MP_OKAY)
2285 goto X0;
2286 if (mp_init_size (&y0, B) != MP_OKAY)
2287 goto X1;
2288 if (mp_init_size (&y1, b->used - B) != MP_OKAY)
2289 goto Y0;
2291 /* init temps */
2292 if (mp_init_size (&t1, B * 2) != MP_OKAY)
2293 goto Y1;
2294 if (mp_init_size (&x0y0, B * 2) != MP_OKAY)
2295 goto T1;
2296 if (mp_init_size (&x1y1, B * 2) != MP_OKAY)
2297 goto X0Y0;
2299 /* now shift the digits */
2300 x0.used = y0.used = B;
2301 x1.used = a->used - B;
2302 y1.used = b->used - B;
2305 register int x;
2306 register mp_digit *tmpa, *tmpb, *tmpx, *tmpy;
2308 /* we copy the digits directly instead of using higher level functions
2309 * since we also need to shift the digits
2311 tmpa = a->dp;
2312 tmpb = b->dp;
2314 tmpx = x0.dp;
2315 tmpy = y0.dp;
2316 for (x = 0; x < B; x++) {
2317 *tmpx++ = *tmpa++;
2318 *tmpy++ = *tmpb++;
2321 tmpx = x1.dp;
2322 for (x = B; x < a->used; x++) {
2323 *tmpx++ = *tmpa++;
2326 tmpy = y1.dp;
2327 for (x = B; x < b->used; x++) {
2328 *tmpy++ = *tmpb++;
2332 /* only need to clamp the lower words since by definition the
2333 * upper words x1/y1 must have a known number of digits
2335 mp_clamp (&x0);
2336 mp_clamp (&y0);
2338 /* now calc the products x0y0 and x1y1 */
2339 /* after this x0 is no longer required, free temp [x0==t2]! */
2340 if (mp_mul (&x0, &y0, &x0y0) != MP_OKAY)
2341 goto X1Y1; /* x0y0 = x0*y0 */
2342 if (mp_mul (&x1, &y1, &x1y1) != MP_OKAY)
2343 goto X1Y1; /* x1y1 = x1*y1 */
2345 /* now calc x1-x0 and y1-y0 */
2346 if (mp_sub (&x1, &x0, &t1) != MP_OKAY)
2347 goto X1Y1; /* t1 = x1 - x0 */
2348 if (mp_sub (&y1, &y0, &x0) != MP_OKAY)
2349 goto X1Y1; /* t2 = y1 - y0 */
2350 if (mp_mul (&t1, &x0, &t1) != MP_OKAY)
2351 goto X1Y1; /* t1 = (x1 - x0) * (y1 - y0) */
2353 /* add x0y0 */
2354 if (mp_add (&x0y0, &x1y1, &x0) != MP_OKAY)
2355 goto X1Y1; /* t2 = x0y0 + x1y1 */
2356 if (mp_sub (&x0, &t1, &t1) != MP_OKAY)
2357 goto X1Y1; /* t1 = x0y0 + x1y1 - (x1-x0)*(y1-y0) */
2359 /* shift by B */
2360 if (mp_lshd (&t1, B) != MP_OKAY)
2361 goto X1Y1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */
2362 if (mp_lshd (&x1y1, B * 2) != MP_OKAY)
2363 goto X1Y1; /* x1y1 = x1y1 << 2*B */
2365 if (mp_add (&x0y0, &t1, &t1) != MP_OKAY)
2366 goto X1Y1; /* t1 = x0y0 + t1 */
2367 if (mp_add (&t1, &x1y1, c) != MP_OKAY)
2368 goto X1Y1; /* t1 = x0y0 + t1 + x1y1 */
2370 /* Algorithm succeeded set the return code to MP_OKAY */
2371 err = MP_OKAY;
2373 X1Y1:mp_clear (&x1y1);
2374 X0Y0:mp_clear (&x0y0);
2375 T1:mp_clear (&t1);
2376 Y1:mp_clear (&y1);
2377 Y0:mp_clear (&y0);
2378 X1:mp_clear (&x1);
2379 X0:mp_clear (&x0);
2380 ERR:
2381 return err;
2384 /* Karatsuba squaring, computes b = a*a using three
2385 * half size squarings
2387 * See comments of karatsuba_mul for details. It
2388 * is essentially the same algorithm but merely
2389 * tuned to perform recursive squarings.
2391 int mp_karatsuba_sqr (const mp_int * a, mp_int * b)
2393 mp_int x0, x1, t1, t2, x0x0, x1x1;
2394 int B, err;
2396 err = MP_MEM;
2398 /* min # of digits */
2399 B = a->used;
2401 /* now divide in two */
2402 B = B >> 1;
2404 /* init copy all the temps */
2405 if (mp_init_size (&x0, B) != MP_OKAY)
2406 goto ERR;
2407 if (mp_init_size (&x1, a->used - B) != MP_OKAY)
2408 goto X0;
2410 /* init temps */
2411 if (mp_init_size (&t1, a->used * 2) != MP_OKAY)
2412 goto X1;
2413 if (mp_init_size (&t2, a->used * 2) != MP_OKAY)
2414 goto T1;
2415 if (mp_init_size (&x0x0, B * 2) != MP_OKAY)
2416 goto T2;
2417 if (mp_init_size (&x1x1, (a->used - B) * 2) != MP_OKAY)
2418 goto X0X0;
2421 register int x;
2422 register mp_digit *dst, *src;
2424 src = a->dp;
2426 /* now shift the digits */
2427 dst = x0.dp;
2428 for (x = 0; x < B; x++) {
2429 *dst++ = *src++;
2432 dst = x1.dp;
2433 for (x = B; x < a->used; x++) {
2434 *dst++ = *src++;
2438 x0.used = B;
2439 x1.used = a->used - B;
2441 mp_clamp (&x0);
2443 /* now calc the products x0*x0 and x1*x1 */
2444 if (mp_sqr (&x0, &x0x0) != MP_OKAY)
2445 goto X1X1; /* x0x0 = x0*x0 */
2446 if (mp_sqr (&x1, &x1x1) != MP_OKAY)
2447 goto X1X1; /* x1x1 = x1*x1 */
2449 /* now calc (x1-x0)**2 */
2450 if (mp_sub (&x1, &x0, &t1) != MP_OKAY)
2451 goto X1X1; /* t1 = x1 - x0 */
2452 if (mp_sqr (&t1, &t1) != MP_OKAY)
2453 goto X1X1; /* t1 = (x1 - x0) * (x1 - x0) */
2455 /* add x0y0 */
2456 if (s_mp_add (&x0x0, &x1x1, &t2) != MP_OKAY)
2457 goto X1X1; /* t2 = x0x0 + x1x1 */
2458 if (mp_sub (&t2, &t1, &t1) != MP_OKAY)
2459 goto X1X1; /* t1 = x0x0 + x1x1 - (x1-x0)*(x1-x0) */
2461 /* shift by B */
2462 if (mp_lshd (&t1, B) != MP_OKAY)
2463 goto X1X1; /* t1 = (x0x0 + x1x1 - (x1-x0)*(x1-x0))<<B */
2464 if (mp_lshd (&x1x1, B * 2) != MP_OKAY)
2465 goto X1X1; /* x1x1 = x1x1 << 2*B */
2467 if (mp_add (&x0x0, &t1, &t1) != MP_OKAY)
2468 goto X1X1; /* t1 = x0x0 + t1 */
2469 if (mp_add (&t1, &x1x1, b) != MP_OKAY)
2470 goto X1X1; /* t1 = x0x0 + t1 + x1x1 */
2472 err = MP_OKAY;
2474 X1X1:mp_clear (&x1x1);
2475 X0X0:mp_clear (&x0x0);
2476 T2:mp_clear (&t2);
2477 T1:mp_clear (&t1);
2478 X1:mp_clear (&x1);
2479 X0:mp_clear (&x0);
2480 ERR:
2481 return err;
2484 /* computes least common multiple as |a*b|/(a, b) */
2485 int mp_lcm (const mp_int * a, const mp_int * b, mp_int * c)
2487 int res;
2488 mp_int t1, t2;
2491 if ((res = mp_init_multi (&t1, &t2, NULL)) != MP_OKAY) {
2492 return res;
2495 /* t1 = get the GCD of the two inputs */
2496 if ((res = mp_gcd (a, b, &t1)) != MP_OKAY) {
2497 goto __T;
2500 /* divide the smallest by the GCD */
2501 if (mp_cmp_mag(a, b) == MP_LT) {
2502 /* store quotient in t2 such that t2 * b is the LCM */
2503 if ((res = mp_div(a, &t1, &t2, NULL)) != MP_OKAY) {
2504 goto __T;
2506 res = mp_mul(b, &t2, c);
2507 } else {
2508 /* store quotient in t2 such that t2 * a is the LCM */
2509 if ((res = mp_div(b, &t1, &t2, NULL)) != MP_OKAY) {
2510 goto __T;
2512 res = mp_mul(a, &t2, c);
2515 /* fix the sign to positive */
2516 c->sign = MP_ZPOS;
2518 __T:
2519 mp_clear_multi (&t1, &t2, NULL);
2520 return res;
2523 /* shift left a certain amount of digits */
2524 int mp_lshd (mp_int * a, int b)
2526 int x, res;
2528 /* if its less than zero return */
2529 if (b <= 0) {
2530 return MP_OKAY;
2533 /* grow to fit the new digits */
2534 if (a->alloc < a->used + b) {
2535 if ((res = mp_grow (a, a->used + b)) != MP_OKAY) {
2536 return res;
2541 register mp_digit *top, *bottom;
2543 /* increment the used by the shift amount then copy upwards */
2544 a->used += b;
2546 /* top */
2547 top = a->dp + a->used - 1;
2549 /* base */
2550 bottom = a->dp + a->used - 1 - b;
2552 /* much like mp_rshd this is implemented using a sliding window
2553 * except the window goes the otherway around. Copying from
2554 * the bottom to the top. see bn_mp_rshd.c for more info.
2556 for (x = a->used - 1; x >= b; x--) {
2557 *top-- = *bottom--;
2560 /* zero the lower digits */
2561 top = a->dp;
2562 for (x = 0; x < b; x++) {
2563 *top++ = 0;
2566 return MP_OKAY;
2569 /* c = a mod b, 0 <= c < b */
2571 mp_mod (const mp_int * a, mp_int * b, mp_int * c)
2573 mp_int t;
2574 int res;
2576 if ((res = mp_init (&t)) != MP_OKAY) {
2577 return res;
2580 if ((res = mp_div (a, b, NULL, &t)) != MP_OKAY) {
2581 mp_clear (&t);
2582 return res;
2585 if (t.sign != b->sign) {
2586 res = mp_add (b, &t, c);
2587 } else {
2588 res = MP_OKAY;
2589 mp_exch (&t, c);
2592 mp_clear (&t);
2593 return res;
2596 /* calc a value mod 2**b */
2598 mp_mod_2d (const mp_int * a, int b, mp_int * c)
2600 int x, res;
2602 /* if b is <= 0 then zero the int */
2603 if (b <= 0) {
2604 mp_zero (c);
2605 return MP_OKAY;
2608 /* if the modulus is larger than the value than return */
2609 if (b > (int) (a->used * DIGIT_BIT)) {
2610 res = mp_copy (a, c);
2611 return res;
2614 /* copy */
2615 if ((res = mp_copy (a, c)) != MP_OKAY) {
2616 return res;
2619 /* zero digits above the last digit of the modulus */
2620 for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
2621 c->dp[x] = 0;
2623 /* clear the digit that is not completely outside/inside the modulus */
2624 c->dp[b / DIGIT_BIT] &=
2625 (mp_digit) ((((mp_digit) 1) << (((mp_digit) b) % DIGIT_BIT)) - ((mp_digit) 1));
2626 mp_clamp (c);
2627 return MP_OKAY;
2631 mp_mod_d (const mp_int * a, mp_digit b, mp_digit * c)
2633 return mp_div_d(a, b, NULL, c);
2637 * shifts with subtractions when the result is greater than b.
2639 * The method is slightly modified to shift B unconditionally up to just under
2640 * the leading bit of b. This saves a lot of multiple precision shifting.
2642 int mp_montgomery_calc_normalization (mp_int * a, const mp_int * b)
2644 int x, bits, res;
2646 /* how many bits of last digit does b use */
2647 bits = mp_count_bits (b) % DIGIT_BIT;
2650 if (b->used > 1) {
2651 if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) {
2652 return res;
2654 } else {
2655 mp_set(a, 1);
2656 bits = 1;
2660 /* now compute C = A * B mod b */
2661 for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
2662 if ((res = mp_mul_2 (a, a)) != MP_OKAY) {
2663 return res;
2665 if (mp_cmp_mag (a, b) != MP_LT) {
2666 if ((res = s_mp_sub (a, b, a)) != MP_OKAY) {
2667 return res;
2672 return MP_OKAY;
2675 /* computes xR**-1 == x (mod N) via Montgomery Reduction */
2677 mp_montgomery_reduce (mp_int * x, const mp_int * n, mp_digit rho)
2679 int ix, res, digs;
2680 mp_digit mu;
2682 /* can the fast reduction [comba] method be used?
2684 * Note that unlike in mul you're safely allowed *less*
2685 * than the available columns [255 per default] since carries
2686 * are fixed up in the inner loop.
2688 digs = n->used * 2 + 1;
2689 if ((digs < MP_WARRAY) &&
2690 n->used <
2691 (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
2692 return fast_mp_montgomery_reduce (x, n, rho);
2695 /* grow the input as required */
2696 if (x->alloc < digs) {
2697 if ((res = mp_grow (x, digs)) != MP_OKAY) {
2698 return res;
2701 x->used = digs;
2703 for (ix = 0; ix < n->used; ix++) {
2704 /* mu = ai * rho mod b
2706 * The value of rho must be precalculated via
2707 * montgomery_setup() such that
2708 * it equals -1/n0 mod b this allows the
2709 * following inner loop to reduce the
2710 * input one digit at a time
2712 mu = (mp_digit) (((mp_word)x->dp[ix]) * ((mp_word)rho) & MP_MASK);
2714 /* a = a + mu * m * b**i */
2716 register int iy;
2717 register mp_digit *tmpn, *tmpx, u;
2718 register mp_word r;
2720 /* alias for digits of the modulus */
2721 tmpn = n->dp;
2723 /* alias for the digits of x [the input] */
2724 tmpx = x->dp + ix;
2726 /* set the carry to zero */
2727 u = 0;
2729 /* Multiply and add in place */
2730 for (iy = 0; iy < n->used; iy++) {
2731 /* compute product and sum */
2732 r = ((mp_word)mu) * ((mp_word)*tmpn++) +
2733 ((mp_word) u) + ((mp_word) * tmpx);
2735 /* get carry */
2736 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
2738 /* fix digit */
2739 *tmpx++ = (mp_digit)(r & ((mp_word) MP_MASK));
2741 /* At this point the ix'th digit of x should be zero */
2744 /* propagate carries upwards as required*/
2745 while (u) {
2746 *tmpx += u;
2747 u = *tmpx >> DIGIT_BIT;
2748 *tmpx++ &= MP_MASK;
2753 /* at this point the n.used'th least
2754 * significant digits of x are all zero
2755 * which means we can shift x to the
2756 * right by n.used digits and the
2757 * residue is unchanged.
2760 /* x = x/b**n.used */
2761 mp_clamp(x);
2762 mp_rshd (x, n->used);
2764 /* if x >= n then x = x - n */
2765 if (mp_cmp_mag (x, n) != MP_LT) {
2766 return s_mp_sub (x, n, x);
2769 return MP_OKAY;
2772 /* setups the montgomery reduction stuff */
2774 mp_montgomery_setup (const mp_int * n, mp_digit * rho)
2776 mp_digit x, b;
2778 /* fast inversion mod 2**k
2780 * Based on the fact that
2782 * XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n)
2783 * => 2*X*A - X*X*A*A = 1
2784 * => 2*(1) - (1) = 1
2786 b = n->dp[0];
2788 if ((b & 1) == 0) {
2789 return MP_VAL;
2792 x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
2793 x *= 2 - b * x; /* here x*a==1 mod 2**8 */
2794 x *= 2 - b * x; /* here x*a==1 mod 2**16 */
2795 x *= 2 - b * x; /* here x*a==1 mod 2**32 */
2797 /* rho = -1/m mod b */
2798 *rho = (((mp_word)1 << ((mp_word) DIGIT_BIT)) - x) & MP_MASK;
2800 return MP_OKAY;
2803 /* high level multiplication (handles sign) */
2804 int mp_mul (const mp_int * a, const mp_int * b, mp_int * c)
2806 int res, neg;
2807 neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
2809 /* use Karatsuba? */
2810 if (MIN (a->used, b->used) >= KARATSUBA_MUL_CUTOFF) {
2811 res = mp_karatsuba_mul (a, b, c);
2812 } else
2814 /* can we use the fast multiplier?
2816 * The fast multiplier can be used if the output will
2817 * have less than MP_WARRAY digits and the number of
2818 * digits won't affect carry propagation
2820 int digs = a->used + b->used + 1;
2822 if ((digs < MP_WARRAY) &&
2823 MIN(a->used, b->used) <=
2824 (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
2825 res = fast_s_mp_mul_digs (a, b, c, digs);
2826 } else
2827 res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */
2829 c->sign = (c->used > 0) ? neg : MP_ZPOS;
2830 return res;
2833 /* b = a*2 */
2834 int mp_mul_2(const mp_int * a, mp_int * b)
2836 int x, res, oldused;
2838 /* grow to accommodate result */
2839 if (b->alloc < a->used + 1) {
2840 if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) {
2841 return res;
2845 oldused = b->used;
2846 b->used = a->used;
2849 register mp_digit r, rr, *tmpa, *tmpb;
2851 /* alias for source */
2852 tmpa = a->dp;
2854 /* alias for dest */
2855 tmpb = b->dp;
2857 /* carry */
2858 r = 0;
2859 for (x = 0; x < a->used; x++) {
2861 /* get what will be the *next* carry bit from the
2862 * MSB of the current digit
2864 rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1));
2866 /* now shift up this digit, add in the carry [from the previous] */
2867 *tmpb++ = ((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK;
2869 /* copy the carry that would be from the source
2870 * digit into the next iteration
2872 r = rr;
2875 /* new leading digit? */
2876 if (r != 0) {
2877 /* add a MSB which is always 1 at this point */
2878 *tmpb = 1;
2879 ++(b->used);
2882 /* now zero any excess digits on the destination
2883 * that we didn't write to
2885 tmpb = b->dp + b->used;
2886 for (x = b->used; x < oldused; x++) {
2887 *tmpb++ = 0;
2890 b->sign = a->sign;
2891 return MP_OKAY;
2894 /* shift left by a certain bit count */
2895 int mp_mul_2d (const mp_int * a, int b, mp_int * c)
2897 mp_digit d;
2898 int res;
2900 /* copy */
2901 if (a != c) {
2902 if ((res = mp_copy (a, c)) != MP_OKAY) {
2903 return res;
2907 if (c->alloc < (int)(c->used + b/DIGIT_BIT + 1)) {
2908 if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) {
2909 return res;
2913 /* shift by as many digits in the bit count */
2914 if (b >= (int)DIGIT_BIT) {
2915 if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) {
2916 return res;
2920 /* shift any bit count < DIGIT_BIT */
2921 d = (mp_digit) (b % DIGIT_BIT);
2922 if (d != 0) {
2923 register mp_digit *tmpc, shift, mask, r, rr;
2924 register int x;
2926 /* bitmask for carries */
2927 mask = (((mp_digit)1) << d) - 1;
2929 /* shift for msbs */
2930 shift = DIGIT_BIT - d;
2932 /* alias */
2933 tmpc = c->dp;
2935 /* carry */
2936 r = 0;
2937 for (x = 0; x < c->used; x++) {
2938 /* get the higher bits of the current word */
2939 rr = (*tmpc >> shift) & mask;
2941 /* shift the current word and OR in the carry */
2942 *tmpc = ((*tmpc << d) | r) & MP_MASK;
2943 ++tmpc;
2945 /* set the carry to the carry bits of the current word */
2946 r = rr;
2949 /* set final carry */
2950 if (r != 0) {
2951 c->dp[(c->used)++] = r;
2954 mp_clamp (c);
2955 return MP_OKAY;
2958 /* multiply by a digit */
2960 mp_mul_d (const mp_int * a, mp_digit b, mp_int * c)
2962 mp_digit u, *tmpa, *tmpc;
2963 mp_word r;
2964 int ix, res, olduse;
2966 /* make sure c is big enough to hold a*b */
2967 if (c->alloc < a->used + 1) {
2968 if ((res = mp_grow (c, a->used + 1)) != MP_OKAY) {
2969 return res;
2973 /* get the original destinations used count */
2974 olduse = c->used;
2976 /* set the sign */
2977 c->sign = a->sign;
2979 /* alias for a->dp [source] */
2980 tmpa = a->dp;
2982 /* alias for c->dp [dest] */
2983 tmpc = c->dp;
2985 /* zero carry */
2986 u = 0;
2988 /* compute columns */
2989 for (ix = 0; ix < a->used; ix++) {
2990 /* compute product and carry sum for this term */
2991 r = ((mp_word) u) + ((mp_word)*tmpa++) * ((mp_word)b);
2993 /* mask off higher bits to get a single digit */
2994 *tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK));
2996 /* send carry into next iteration */
2997 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
3000 /* store final carry [if any] */
3001 *tmpc++ = u;
3003 /* now zero digits above the top */
3004 while (ix++ < olduse) {
3005 *tmpc++ = 0;
3008 /* set used count */
3009 c->used = a->used + 1;
3010 mp_clamp(c);
3012 return MP_OKAY;
3015 /* d = a * b (mod c) */
3017 mp_mulmod (const mp_int * a, const mp_int * b, mp_int * c, mp_int * d)
3019 int res;
3020 mp_int t;
3022 if ((res = mp_init (&t)) != MP_OKAY) {
3023 return res;
3026 if ((res = mp_mul (a, b, &t)) != MP_OKAY) {
3027 mp_clear (&t);
3028 return res;
3030 res = mp_mod (&t, c, d);
3031 mp_clear (&t);
3032 return res;
3035 /* determines if an integers is divisible by one
3036 * of the first PRIME_SIZE primes or not
3038 * sets result to 0 if not, 1 if yes
3040 int mp_prime_is_divisible (const mp_int * a, int *result)
3042 int err, ix;
3043 mp_digit res;
3045 /* default to not */
3046 *result = MP_NO;
3048 for (ix = 0; ix < PRIME_SIZE; ix++) {
3049 /* what is a mod __prime_tab[ix] */
3050 if ((err = mp_mod_d (a, __prime_tab[ix], &res)) != MP_OKAY) {
3051 return err;
3054 /* is the residue zero? */
3055 if (res == 0) {
3056 *result = MP_YES;
3057 return MP_OKAY;
3061 return MP_OKAY;
3064 /* performs a variable number of rounds of Miller-Rabin
3066 * Probability of error after t rounds is no more than
3069 * Sets result to 1 if probably prime, 0 otherwise
3071 int mp_prime_is_prime (mp_int * a, int t, int *result)
3073 mp_int b;
3074 int ix, err, res;
3076 /* default to no */
3077 *result = MP_NO;
3079 /* valid value of t? */
3080 if (t <= 0 || t > PRIME_SIZE) {
3081 return MP_VAL;
3084 /* is the input equal to one of the primes in the table? */
3085 for (ix = 0; ix < PRIME_SIZE; ix++) {
3086 if (mp_cmp_d(a, __prime_tab[ix]) == MP_EQ) {
3087 *result = 1;
3088 return MP_OKAY;
3092 /* first perform trial division */
3093 if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) {
3094 return err;
3097 /* return if it was trivially divisible */
3098 if (res == MP_YES) {
3099 return MP_OKAY;
3102 /* now perform the miller-rabin rounds */
3103 if ((err = mp_init (&b)) != MP_OKAY) {
3104 return err;
3107 for (ix = 0; ix < t; ix++) {
3108 /* set the prime */
3109 mp_set (&b, __prime_tab[ix]);
3111 if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) {
3112 goto __B;
3115 if (res == MP_NO) {
3116 goto __B;
3120 /* passed the test */
3121 *result = MP_YES;
3122 __B:mp_clear (&b);
3123 return err;
3126 /* Miller-Rabin test of "a" to the base of "b" as described in
3127 * HAC pp. 139 Algorithm 4.24
3129 * Sets result to 0 if definitely composite or 1 if probably prime.
3130 * Randomly the chance of error is no more than 1/4 and often
3131 * very much lower.
3133 int mp_prime_miller_rabin (mp_int * a, const mp_int * b, int *result)
3135 mp_int n1, y, r;
3136 int s, j, err;
3138 /* default */
3139 *result = MP_NO;
3141 /* ensure b > 1 */
3142 if (mp_cmp_d(b, 1) != MP_GT) {
3143 return MP_VAL;
3146 /* get n1 = a - 1 */
3147 if ((err = mp_init_copy (&n1, a)) != MP_OKAY) {
3148 return err;
3150 if ((err = mp_sub_d (&n1, 1, &n1)) != MP_OKAY) {
3151 goto __N1;
3154 /* set 2**s * r = n1 */
3155 if ((err = mp_init_copy (&r, &n1)) != MP_OKAY) {
3156 goto __N1;
3159 /* count the number of least significant bits
3160 * which are zero
3162 s = mp_cnt_lsb(&r);
3164 /* now divide n - 1 by 2**s */
3165 if ((err = mp_div_2d (&r, s, &r, NULL)) != MP_OKAY) {
3166 goto __R;
3169 /* compute y = b**r mod a */
3170 if ((err = mp_init (&y)) != MP_OKAY) {
3171 goto __R;
3173 if ((err = mp_exptmod (b, &r, a, &y)) != MP_OKAY) {
3174 goto __Y;
3177 /* if y != 1 and y != n1 do */
3178 if (mp_cmp_d (&y, 1) != MP_EQ && mp_cmp (&y, &n1) != MP_EQ) {
3179 j = 1;
3180 /* while j <= s-1 and y != n1 */
3181 while ((j <= (s - 1)) && mp_cmp (&y, &n1) != MP_EQ) {
3182 if ((err = mp_sqrmod (&y, a, &y)) != MP_OKAY) {
3183 goto __Y;
3186 /* if y == 1 then composite */
3187 if (mp_cmp_d (&y, 1) == MP_EQ) {
3188 goto __Y;
3191 ++j;
3194 /* if y != n1 then composite */
3195 if (mp_cmp (&y, &n1) != MP_EQ) {
3196 goto __Y;
3200 /* probably prime now */
3201 *result = MP_YES;
3202 __Y:mp_clear (&y);
3203 __R:mp_clear (&r);
3204 __N1:mp_clear (&n1);
3205 return err;
3208 static const struct {
3209 int k, t;
3210 } sizes[] = {
3211 { 128, 28 },
3212 { 256, 16 },
3213 { 384, 10 },
3214 { 512, 7 },
3215 { 640, 6 },
3216 { 768, 5 },
3217 { 896, 4 },
3218 { 1024, 4 }
3221 /* returns # of RM trials required for a given bit size */
3222 int mp_prime_rabin_miller_trials(int size)
3224 int x;
3226 for (x = 0; x < (int)(sizeof(sizes)/(sizeof(sizes[0]))); x++) {
3227 if (sizes[x].k == size) {
3228 return sizes[x].t;
3229 } else if (sizes[x].k > size) {
3230 return (x == 0) ? sizes[0].t : sizes[x - 1].t;
3233 return sizes[x-1].t + 1;
3236 /* makes a truly random prime of a given size (bits),
3238 * Flags are as follows:
3240 * LTM_PRIME_BBS - make prime congruent to 3 mod 4
3241 * LTM_PRIME_SAFE - make sure (p-1)/2 is prime as well (implies LTM_PRIME_BBS)
3242 * LTM_PRIME_2MSB_OFF - make the 2nd highest bit zero
3243 * LTM_PRIME_2MSB_ON - make the 2nd highest bit one
3245 * You have to supply a callback which fills in a buffer with random bytes. "dat" is a parameter you can
3246 * have passed to the callback (e.g. a state or something). This function doesn't use "dat" itself
3247 * so it can be NULL
3251 /* This is possibly the mother of all prime generation functions, muahahahahaha! */
3252 int mp_prime_random_ex(mp_int *a, int t, int size, int flags, ltm_prime_callback cb, void *dat)
3254 unsigned char *tmp, maskAND, maskOR_msb, maskOR_lsb;
3255 int res, err, bsize, maskOR_msb_offset;
3257 /* sanity check the input */
3258 if (size <= 1 || t <= 0) {
3259 return MP_VAL;
3262 /* LTM_PRIME_SAFE implies LTM_PRIME_BBS */
3263 if (flags & LTM_PRIME_SAFE) {
3264 flags |= LTM_PRIME_BBS;
3267 /* calc the byte size */
3268 bsize = (size>>3)+((size&7)?1:0);
3270 /* we need a buffer of bsize bytes */
3271 tmp = malloc(bsize);
3272 if (tmp == NULL) {
3273 return MP_MEM;
3276 /* calc the maskAND value for the MSbyte*/
3277 maskAND = ((size&7) == 0) ? 0xFF : (0xFF >> (8 - (size & 7)));
3279 /* calc the maskOR_msb */
3280 maskOR_msb = 0;
3281 maskOR_msb_offset = ((size & 7) == 1) ? 1 : 0;
3282 if (flags & LTM_PRIME_2MSB_ON) {
3283 maskOR_msb |= 1 << ((size - 2) & 7);
3284 } else if (flags & LTM_PRIME_2MSB_OFF) {
3285 maskAND &= ~(1 << ((size - 2) & 7));
3288 /* get the maskOR_lsb */
3289 maskOR_lsb = 0;
3290 if (flags & LTM_PRIME_BBS) {
3291 maskOR_lsb |= 3;
3294 do {
3295 /* read the bytes */
3296 if (cb(tmp, bsize, dat) != bsize) {
3297 err = MP_VAL;
3298 goto error;
3301 /* work over the MSbyte */
3302 tmp[0] &= maskAND;
3303 tmp[0] |= 1 << ((size - 1) & 7);
3305 /* mix in the maskORs */
3306 tmp[maskOR_msb_offset] |= maskOR_msb;
3307 tmp[bsize-1] |= maskOR_lsb;
3309 /* read it in */
3310 if ((err = mp_read_unsigned_bin(a, tmp, bsize)) != MP_OKAY) { goto error; }
3312 /* is it prime? */
3313 if ((err = mp_prime_is_prime(a, t, &res)) != MP_OKAY) { goto error; }
3314 if (res == MP_NO) {
3315 continue;
3318 if (flags & LTM_PRIME_SAFE) {
3319 /* see if (a-1)/2 is prime */
3320 if ((err = mp_sub_d(a, 1, a)) != MP_OKAY) { goto error; }
3321 if ((err = mp_div_2(a, a)) != MP_OKAY) { goto error; }
3323 /* is it prime? */
3324 if ((err = mp_prime_is_prime(a, t, &res)) != MP_OKAY) { goto error; }
3326 } while (res == MP_NO);
3328 if (flags & LTM_PRIME_SAFE) {
3329 /* restore a to the original value */
3330 if ((err = mp_mul_2(a, a)) != MP_OKAY) { goto error; }
3331 if ((err = mp_add_d(a, 1, a)) != MP_OKAY) { goto error; }
3334 err = MP_OKAY;
3335 error:
3336 free(tmp);
3337 return err;
3340 /* reads an unsigned char array, assumes the msb is stored first [big endian] */
3342 mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
3344 int res;
3346 /* make sure there are at least two digits */
3347 if (a->alloc < 2) {
3348 if ((res = mp_grow(a, 2)) != MP_OKAY) {
3349 return res;
3353 /* zero the int */
3354 mp_zero (a);
3356 /* read the bytes in */
3357 while (c-- > 0) {
3358 if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) {
3359 return res;
3362 a->dp[0] |= *b++;
3363 a->used += 1;
3365 mp_clamp (a);
3366 return MP_OKAY;
3369 /* reduces x mod m, assumes 0 < x < m**2, mu is
3370 * precomputed via mp_reduce_setup.
3371 * From HAC pp.604 Algorithm 14.42
3374 mp_reduce (mp_int * x, const mp_int * m, const mp_int * mu)
3376 mp_int q;
3377 int res, um = m->used;
3379 /* q = x */
3380 if ((res = mp_init_copy (&q, x)) != MP_OKAY) {
3381 return res;
3384 /* q1 = x / b**(k-1) */
3385 mp_rshd (&q, um - 1);
3387 /* according to HAC this optimization is ok */
3388 if (((unsigned long) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) {
3389 if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) {
3390 goto CLEANUP;
3392 } else {
3393 if ((res = s_mp_mul_high_digs (&q, mu, &q, um - 1)) != MP_OKAY) {
3394 goto CLEANUP;
3398 /* q3 = q2 / b**(k+1) */
3399 mp_rshd (&q, um + 1);
3401 /* x = x mod b**(k+1), quick (no division) */
3402 if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
3403 goto CLEANUP;
3406 /* q = q * m mod b**(k+1), quick (no division) */
3407 if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) {
3408 goto CLEANUP;
3411 /* x = x - q */
3412 if ((res = mp_sub (x, &q, x)) != MP_OKAY) {
3413 goto CLEANUP;
3416 /* If x < 0, add b**(k+1) to it */
3417 if (mp_cmp_d (x, 0) == MP_LT) {
3418 mp_set (&q, 1);
3419 if ((res = mp_lshd (&q, um + 1)) != MP_OKAY)
3420 goto CLEANUP;
3421 if ((res = mp_add (x, &q, x)) != MP_OKAY)
3422 goto CLEANUP;
3425 /* Back off if it's too big */
3426 while (mp_cmp (x, m) != MP_LT) {
3427 if ((res = s_mp_sub (x, m, x)) != MP_OKAY) {
3428 goto CLEANUP;
3432 CLEANUP:
3433 mp_clear (&q);
3435 return res;
3438 /* reduces a modulo n where n is of the form 2**p - d */
3440 mp_reduce_2k(mp_int *a, const mp_int *n, mp_digit d)
3442 mp_int q;
3443 int p, res;
3445 if ((res = mp_init(&q)) != MP_OKAY) {
3446 return res;
3449 p = mp_count_bits(n);
3450 top:
3451 /* q = a/2**p, a = a mod 2**p */
3452 if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
3453 goto ERR;
3456 if (d != 1) {
3457 /* q = q * d */
3458 if ((res = mp_mul_d(&q, d, &q)) != MP_OKAY) {
3459 goto ERR;
3463 /* a = a + q */
3464 if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
3465 goto ERR;
3468 if (mp_cmp_mag(a, n) != MP_LT) {
3469 s_mp_sub(a, n, a);
3470 goto top;
3473 ERR:
3474 mp_clear(&q);
3475 return res;
3478 /* determines the setup value */
3479 int
3480 mp_reduce_2k_setup(const mp_int *a, mp_digit *d)
3482 int res, p;
3483 mp_int tmp;
3485 if ((res = mp_init(&tmp)) != MP_OKAY) {
3486 return res;
3489 p = mp_count_bits(a);
3490 if ((res = mp_2expt(&tmp, p)) != MP_OKAY) {
3491 mp_clear(&tmp);
3492 return res;
3495 if ((res = s_mp_sub(&tmp, a, &tmp)) != MP_OKAY) {
3496 mp_clear(&tmp);
3497 return res;
3500 *d = tmp.dp[0];
3501 mp_clear(&tmp);
3502 return MP_OKAY;
3505 /* pre-calculate the value required for Barrett reduction
3506 * For a given modulus "b" it calulates the value required in "a"
3508 int mp_reduce_setup (mp_int * a, const mp_int * b)
3510 int res;
3512 if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
3513 return res;
3515 return mp_div (a, b, a, NULL);
3518 /* shift right a certain amount of digits */
3519 void mp_rshd (mp_int * a, int b)
3521 int x;
3523 /* if b <= 0 then ignore it */
3524 if (b <= 0) {
3525 return;
3528 /* if b > used then simply zero it and return */
3529 if (a->used <= b) {
3530 mp_zero (a);
3531 return;
3535 register mp_digit *bottom, *top;
3537 /* shift the digits down */
3539 /* bottom */
3540 bottom = a->dp;
3542 /* top [offset into digits] */
3543 top = a->dp + b;
3545 /* this is implemented as a sliding window where
3546 * the window is b-digits long and digits from
3547 * the top of the window are copied to the bottom
3549 * e.g.
3551 b-2 | b-1 | b0 | b1 | b2 | ... | bb | ---->
3552 /\ | ---->
3553 \-------------------/ ---->
3555 for (x = 0; x < (a->used - b); x++) {
3556 *bottom++ = *top++;
3559 /* zero the top digits */
3560 for (; x < a->used; x++) {
3561 *bottom++ = 0;
3565 /* remove excess digits */
3566 a->used -= b;
3569 /* set to a digit */
3570 void mp_set (mp_int * a, mp_digit b)
3572 mp_zero (a);
3573 a->dp[0] = b & MP_MASK;
3574 a->used = (a->dp[0] != 0) ? 1 : 0;
3577 /* set a 32-bit const */
3578 int mp_set_int (mp_int * a, unsigned long b)
3580 int x, res;
3582 mp_zero (a);
3584 /* set four bits at a time */
3585 for (x = 0; x < 8; x++) {
3586 /* shift the number up four bits */
3587 if ((res = mp_mul_2d (a, 4, a)) != MP_OKAY) {
3588 return res;
3591 /* OR in the top four bits of the source */
3592 a->dp[0] |= (b >> 28) & 15;
3594 /* shift the source up to the next four bits */
3595 b <<= 4;
3597 /* ensure that digits are not clamped off */
3598 a->used += 1;
3600 mp_clamp (a);
3601 return MP_OKAY;
3604 /* shrink a bignum */
3605 int mp_shrink (mp_int * a)
3607 mp_digit *tmp;
3608 if (a->alloc != a->used && a->used > 0) {
3609 if ((tmp = realloc (a->dp, sizeof (mp_digit) * a->used)) == NULL) {
3610 return MP_MEM;
3612 a->dp = tmp;
3613 a->alloc = a->used;
3615 return MP_OKAY;
3618 /* get the size for an signed equivalent */
3619 int mp_signed_bin_size (const mp_int * a)
3621 return 1 + mp_unsigned_bin_size (a);
3624 /* computes b = a*a */
3626 mp_sqr (const mp_int * a, mp_int * b)
3628 int res;
3630 if (a->used >= KARATSUBA_SQR_CUTOFF) {
3631 res = mp_karatsuba_sqr (a, b);
3632 } else
3634 /* can we use the fast comba multiplier? */
3635 if ((a->used * 2 + 1) < MP_WARRAY &&
3636 a->used <
3637 (1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
3638 res = fast_s_mp_sqr (a, b);
3639 } else
3640 res = s_mp_sqr (a, b);
3642 b->sign = MP_ZPOS;
3643 return res;
3646 /* c = a * a (mod b) */
3648 mp_sqrmod (const mp_int * a, mp_int * b, mp_int * c)
3650 int res;
3651 mp_int t;
3653 if ((res = mp_init (&t)) != MP_OKAY) {
3654 return res;
3657 if ((res = mp_sqr (a, &t)) != MP_OKAY) {
3658 mp_clear (&t);
3659 return res;
3661 res = mp_mod (&t, b, c);
3662 mp_clear (&t);
3663 return res;
3666 /* high level subtraction (handles signs) */
3668 mp_sub (mp_int * a, mp_int * b, mp_int * c)
3670 int sa, sb, res;
3672 sa = a->sign;
3673 sb = b->sign;
3675 if (sa != sb) {
3676 /* subtract a negative from a positive, OR */
3677 /* subtract a positive from a negative. */
3678 /* In either case, ADD their magnitudes, */
3679 /* and use the sign of the first number. */
3680 c->sign = sa;
3681 res = s_mp_add (a, b, c);
3682 } else {
3683 /* subtract a positive from a positive, OR */
3684 /* subtract a negative from a negative. */
3685 /* First, take the difference between their */
3686 /* magnitudes, then... */
3687 if (mp_cmp_mag (a, b) != MP_LT) {
3688 /* Copy the sign from the first */
3689 c->sign = sa;
3690 /* The first has a larger or equal magnitude */
3691 res = s_mp_sub (a, b, c);
3692 } else {
3693 /* The result has the *opposite* sign from */
3694 /* the first number. */
3695 c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS;
3696 /* The second has a larger magnitude */
3697 res = s_mp_sub (b, a, c);
3700 return res;
3703 /* single digit subtraction */
3705 mp_sub_d (mp_int * a, mp_digit b, mp_int * c)
3707 mp_digit *tmpa, *tmpc, mu;
3708 int res, ix, oldused;
3710 /* grow c as required */
3711 if (c->alloc < a->used + 1) {
3712 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
3713 return res;
3717 /* if a is negative just do an unsigned
3718 * addition [with fudged signs]
3720 if (a->sign == MP_NEG) {
3721 a->sign = MP_ZPOS;
3722 res = mp_add_d(a, b, c);
3723 a->sign = c->sign = MP_NEG;
3724 return res;
3727 /* setup regs */
3728 oldused = c->used;
3729 tmpa = a->dp;
3730 tmpc = c->dp;
3732 /* if a <= b simply fix the single digit */
3733 if ((a->used == 1 && a->dp[0] <= b) || a->used == 0) {
3734 if (a->used == 1) {
3735 *tmpc++ = b - *tmpa;
3736 } else {
3737 *tmpc++ = b;
3739 ix = 1;
3741 /* negative/1digit */
3742 c->sign = MP_NEG;
3743 c->used = 1;
3744 } else {
3745 /* positive/size */
3746 c->sign = MP_ZPOS;
3747 c->used = a->used;
3749 /* subtract first digit */
3750 *tmpc = *tmpa++ - b;
3751 mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
3752 *tmpc++ &= MP_MASK;
3754 /* handle rest of the digits */
3755 for (ix = 1; ix < a->used; ix++) {
3756 *tmpc = *tmpa++ - mu;
3757 mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
3758 *tmpc++ &= MP_MASK;
3762 /* zero excess digits */
3763 while (ix++ < oldused) {
3764 *tmpc++ = 0;
3766 mp_clamp(c);
3767 return MP_OKAY;
3770 /* store in unsigned [big endian] format */
3772 mp_to_unsigned_bin (const mp_int * a, unsigned char *b)
3774 int x, res;
3775 mp_int t;
3777 if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
3778 return res;
3781 x = 0;
3782 while (mp_iszero (&t) == 0) {
3783 b[x++] = (unsigned char) (t.dp[0] & 255);
3784 if ((res = mp_div_2d (&t, 8, &t, NULL)) != MP_OKAY) {
3785 mp_clear (&t);
3786 return res;
3789 bn_reverse (b, x);
3790 mp_clear (&t);
3791 return MP_OKAY;
3794 /* get the size for an unsigned equivalent */
3796 mp_unsigned_bin_size (const mp_int * a)
3798 int size = mp_count_bits (a);
3799 return (size / 8 + ((size & 7) != 0 ? 1 : 0));
3802 /* set to zero */
3803 void
3804 mp_zero (mp_int * a)
3806 a->sign = MP_ZPOS;
3807 a->used = 0;
3808 memset (a->dp, 0, sizeof (mp_digit) * a->alloc);
3811 const mp_digit __prime_tab[] = {
3812 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
3813 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
3814 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
3815 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083,
3816 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
3817 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
3818 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
3819 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
3821 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
3822 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
3823 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
3824 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
3825 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
3826 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
3827 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
3828 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
3830 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
3831 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
3832 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
3833 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
3834 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
3835 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
3836 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
3837 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
3839 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
3840 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
3841 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
3842 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
3843 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
3844 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
3845 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
3846 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
3849 /* reverse an array, used for radix code */
3850 void
3851 bn_reverse (unsigned char *s, int len)
3853 int ix, iy;
3854 unsigned char t;
3856 ix = 0;
3857 iy = len - 1;
3858 while (ix < iy) {
3859 t = s[ix];
3860 s[ix] = s[iy];
3861 s[iy] = t;
3862 ++ix;
3863 --iy;
3867 /* low level addition, based on HAC pp.594, Algorithm 14.7 */
3869 s_mp_add (mp_int * a, mp_int * b, mp_int * c)
3871 mp_int *x;
3872 int olduse, res, min, max;
3874 /* find sizes, we let |a| <= |b| which means we have to sort
3875 * them. "x" will point to the input with the most digits
3877 if (a->used > b->used) {
3878 min = b->used;
3879 max = a->used;
3880 x = a;
3881 } else {
3882 min = a->used;
3883 max = b->used;
3884 x = b;
3887 /* init result */
3888 if (c->alloc < max + 1) {
3889 if ((res = mp_grow (c, max + 1)) != MP_OKAY) {
3890 return res;
3894 /* get old used digit count and set new one */
3895 olduse = c->used;
3896 c->used = max + 1;
3899 register mp_digit u, *tmpa, *tmpb, *tmpc;
3900 register int i;
3902 /* alias for digit pointers */
3904 /* first input */
3905 tmpa = a->dp;
3907 /* second input */
3908 tmpb = b->dp;
3910 /* destination */
3911 tmpc = c->dp;
3913 /* zero the carry */
3914 u = 0;
3915 for (i = 0; i < min; i++) {
3916 /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
3917 *tmpc = *tmpa++ + *tmpb++ + u;
3919 /* U = carry bit of T[i] */
3920 u = *tmpc >> ((mp_digit)DIGIT_BIT);
3922 /* take away carry bit from T[i] */
3923 *tmpc++ &= MP_MASK;
3926 /* now copy higher words if any, that is in A+B
3927 * if A or B has more digits add those in
3929 if (min != max) {
3930 for (; i < max; i++) {
3931 /* T[i] = X[i] + U */
3932 *tmpc = x->dp[i] + u;
3934 /* U = carry bit of T[i] */
3935 u = *tmpc >> ((mp_digit)DIGIT_BIT);
3937 /* take away carry bit from T[i] */
3938 *tmpc++ &= MP_MASK;
3942 /* add carry */
3943 *tmpc++ = u;
3945 /* clear digits above oldused */
3946 for (i = c->used; i < olduse; i++) {
3947 *tmpc++ = 0;
3951 mp_clamp (c);
3952 return MP_OKAY;
3955 int s_mp_exptmod (const mp_int * G, const mp_int * X, mp_int * P, mp_int * Y)
3957 mp_int M[256], res, mu;
3958 mp_digit buf;
3959 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
3961 /* find window size */
3962 x = mp_count_bits (X);
3963 if (x <= 7) {
3964 winsize = 2;
3965 } else if (x <= 36) {
3966 winsize = 3;
3967 } else if (x <= 140) {
3968 winsize = 4;
3969 } else if (x <= 450) {
3970 winsize = 5;
3971 } else if (x <= 1303) {
3972 winsize = 6;
3973 } else if (x <= 3529) {
3974 winsize = 7;
3975 } else {
3976 winsize = 8;
3979 /* init M array */
3980 /* init first cell */
3981 if ((err = mp_init(&M[1])) != MP_OKAY) {
3982 return err;
3985 /* now init the second half of the array */
3986 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
3987 if ((err = mp_init(&M[x])) != MP_OKAY) {
3988 for (y = 1<<(winsize-1); y < x; y++) {
3989 mp_clear (&M[y]);
3991 mp_clear(&M[1]);
3992 return err;
3996 /* create mu, used for Barrett reduction */
3997 if ((err = mp_init (&mu)) != MP_OKAY) {
3998 goto __M;
4000 if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
4001 goto __MU;
4004 /* create M table
4006 * The M table contains powers of the base,
4007 * e.g. M[x] = G**x mod P
4009 * The first half of the table is not
4010 * computed though accept for M[0] and M[1]
4012 if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
4013 goto __MU;
4016 /* compute the value at M[1<<(winsize-1)] by squaring
4017 * M[1] (winsize-1) times
4019 if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
4020 goto __MU;
4023 for (x = 0; x < (winsize - 1); x++) {
4024 if ((err = mp_sqr (&M[1 << (winsize - 1)],
4025 &M[1 << (winsize - 1)])) != MP_OKAY) {
4026 goto __MU;
4028 if ((err = mp_reduce (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) {
4029 goto __MU;
4033 /* create upper table, that is M[x] = M[x-1] * M[1] (mod P)
4034 * for x = (2**(winsize - 1) + 1) to (2**winsize - 1)
4036 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
4037 if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
4038 goto __MU;
4040 if ((err = mp_reduce (&M[x], P, &mu)) != MP_OKAY) {
4041 goto __MU;
4045 /* setup result */
4046 if ((err = mp_init (&res)) != MP_OKAY) {
4047 goto __MU;
4049 mp_set (&res, 1);
4051 /* set initial mode and bit cnt */
4052 mode = 0;
4053 bitcnt = 1;
4054 buf = 0;
4055 digidx = X->used - 1;
4056 bitcpy = 0;
4057 bitbuf = 0;
4059 for (;;) {
4060 /* grab next digit as required */
4061 if (--bitcnt == 0) {
4062 /* if digidx == -1 we are out of digits */
4063 if (digidx == -1) {
4064 break;
4066 /* read next digit and reset the bitcnt */
4067 buf = X->dp[digidx--];
4068 bitcnt = (int) DIGIT_BIT;
4071 /* grab the next msb from the exponent */
4072 y = (buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
4073 buf <<= (mp_digit)1;
4075 /* if the bit is zero and mode == 0 then we ignore it
4076 * These represent the leading zero bits before the first 1 bit
4077 * in the exponent. Technically this opt is not required but it
4078 * does lower the # of trivial squaring/reductions used
4080 if (mode == 0 && y == 0) {
4081 continue;
4084 /* if the bit is zero and mode == 1 then we square */
4085 if (mode == 1 && y == 0) {
4086 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
4087 goto __RES;
4089 if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
4090 goto __RES;
4092 continue;
4095 /* else we add it to the window */
4096 bitbuf |= (y << (winsize - ++bitcpy));
4097 mode = 2;
4099 if (bitcpy == winsize) {
4100 /* ok window is filled so square as required and multiply */
4101 /* square first */
4102 for (x = 0; x < winsize; x++) {
4103 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
4104 goto __RES;
4106 if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
4107 goto __RES;
4111 /* then multiply */
4112 if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
4113 goto __RES;
4115 if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
4116 goto __RES;
4119 /* empty window and reset */
4120 bitcpy = 0;
4121 bitbuf = 0;
4122 mode = 1;
4126 /* if bits remain then square/multiply */
4127 if (mode == 2 && bitcpy > 0) {
4128 /* square then multiply if the bit is set */
4129 for (x = 0; x < bitcpy; x++) {
4130 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
4131 goto __RES;
4133 if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
4134 goto __RES;
4137 bitbuf <<= 1;
4138 if ((bitbuf & (1 << winsize)) != 0) {
4139 /* then multiply */
4140 if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
4141 goto __RES;
4143 if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
4144 goto __RES;
4150 mp_exch (&res, Y);
4151 err = MP_OKAY;
4152 __RES:mp_clear (&res);
4153 __MU:mp_clear (&mu);
4154 __M:
4155 mp_clear(&M[1]);
4156 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
4157 mp_clear (&M[x]);
4159 return err;
4162 /* multiplies |a| * |b| and only computes up to digs digits of result
4163 * HAC pp. 595, Algorithm 14.12 Modified so you can control how
4164 * many digits of output are created.
4167 s_mp_mul_digs (const mp_int * a, const mp_int * b, mp_int * c, int digs)
4169 mp_int t;
4170 int res, pa, pb, ix, iy;
4171 mp_digit u;
4172 mp_word r;
4173 mp_digit tmpx, *tmpt, *tmpy;
4175 /* can we use the fast multiplier? */
4176 if (((digs) < MP_WARRAY) &&
4177 MIN (a->used, b->used) <
4178 (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
4179 return fast_s_mp_mul_digs (a, b, c, digs);
4182 if ((res = mp_init_size (&t, digs)) != MP_OKAY) {
4183 return res;
4185 t.used = digs;
4187 /* compute the digits of the product directly */
4188 pa = a->used;
4189 for (ix = 0; ix < pa; ix++) {
4190 /* set the carry to zero */
4191 u = 0;
4193 /* limit ourselves to making digs digits of output */
4194 pb = MIN (b->used, digs - ix);
4196 /* setup some aliases */
4197 /* copy of the digit from a used within the nested loop */
4198 tmpx = a->dp[ix];
4200 /* an alias for the destination shifted ix places */
4201 tmpt = t.dp + ix;
4203 /* an alias for the digits of b */
4204 tmpy = b->dp;
4206 /* compute the columns of the output and propagate the carry */
4207 for (iy = 0; iy < pb; iy++) {
4208 /* compute the column as a mp_word */
4209 r = ((mp_word)*tmpt) +
4210 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
4211 ((mp_word) u);
4213 /* the new column is the lower part of the result */
4214 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
4216 /* get the carry word from the result */
4217 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
4219 /* set carry if it is placed below digs */
4220 if (ix + iy < digs) {
4221 *tmpt = u;
4225 mp_clamp (&t);
4226 mp_exch (&t, c);
4228 mp_clear (&t);
4229 return MP_OKAY;
4232 /* multiplies |a| * |b| and does not compute the lower digs digits
4233 * [meant to get the higher part of the product]
4236 s_mp_mul_high_digs (const mp_int * a, const mp_int * b, mp_int * c, int digs)
4238 mp_int t;
4239 int res, pa, pb, ix, iy;
4240 mp_digit u;
4241 mp_word r;
4242 mp_digit tmpx, *tmpt, *tmpy;
4244 /* can we use the fast multiplier? */
4245 if (((a->used + b->used + 1) < MP_WARRAY)
4246 && MIN (a->used, b->used) < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
4247 return fast_s_mp_mul_high_digs (a, b, c, digs);
4250 if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) {
4251 return res;
4253 t.used = a->used + b->used + 1;
4255 pa = a->used;
4256 pb = b->used;
4257 for (ix = 0; ix < pa; ix++) {
4258 /* clear the carry */
4259 u = 0;
4261 /* left hand side of A[ix] * B[iy] */
4262 tmpx = a->dp[ix];
4264 /* alias to the address of where the digits will be stored */
4265 tmpt = &(t.dp[digs]);
4267 /* alias for where to read the right hand side from */
4268 tmpy = b->dp + (digs - ix);
4270 for (iy = digs - ix; iy < pb; iy++) {
4271 /* calculate the double precision result */
4272 r = ((mp_word)*tmpt) +
4273 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
4274 ((mp_word) u);
4276 /* get the lower part */
4277 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
4279 /* carry the carry */
4280 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
4282 *tmpt = u;
4284 mp_clamp (&t);
4285 mp_exch (&t, c);
4286 mp_clear (&t);
4287 return MP_OKAY;
4290 /* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
4292 s_mp_sqr (const mp_int * a, mp_int * b)
4294 mp_int t;
4295 int res, ix, iy, pa;
4296 mp_word r;
4297 mp_digit u, tmpx, *tmpt;
4299 pa = a->used;
4300 if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) {
4301 return res;
4304 /* default used is maximum possible size */
4305 t.used = 2*pa + 1;
4307 for (ix = 0; ix < pa; ix++) {
4308 /* first calculate the digit at 2*ix */
4309 /* calculate double precision result */
4310 r = ((mp_word) t.dp[2*ix]) +
4311 ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]);
4313 /* store lower part in result */
4314 t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK));
4316 /* get the carry */
4317 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
4319 /* left hand side of A[ix] * A[iy] */
4320 tmpx = a->dp[ix];
4322 /* alias for where to store the results */
4323 tmpt = t.dp + (2*ix + 1);
4325 for (iy = ix + 1; iy < pa; iy++) {
4326 /* first calculate the product */
4327 r = ((mp_word)tmpx) * ((mp_word)a->dp[iy]);
4329 /* now calculate the double precision result, note we use
4330 * addition instead of *2 since it's easier to optimize
4332 r = ((mp_word) *tmpt) + r + r + ((mp_word) u);
4334 /* store lower part */
4335 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
4337 /* get carry */
4338 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
4340 /* propagate upwards */
4341 while (u != ((mp_digit) 0)) {
4342 r = ((mp_word) *tmpt) + ((mp_word) u);
4343 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
4344 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
4348 mp_clamp (&t);
4349 mp_exch (&t, b);
4350 mp_clear (&t);
4351 return MP_OKAY;
4354 /* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */
4356 s_mp_sub (const mp_int * a, const mp_int * b, mp_int * c)
4358 int olduse, res, min, max;
4360 /* find sizes */
4361 min = b->used;
4362 max = a->used;
4364 /* init result */
4365 if (c->alloc < max) {
4366 if ((res = mp_grow (c, max)) != MP_OKAY) {
4367 return res;
4370 olduse = c->used;
4371 c->used = max;
4374 register mp_digit u, *tmpa, *tmpb, *tmpc;
4375 register int i;
4377 /* alias for digit pointers */
4378 tmpa = a->dp;
4379 tmpb = b->dp;
4380 tmpc = c->dp;
4382 /* set carry to zero */
4383 u = 0;
4384 for (i = 0; i < min; i++) {
4385 /* T[i] = A[i] - B[i] - U */
4386 *tmpc = *tmpa++ - *tmpb++ - u;
4388 /* U = carry bit of T[i]
4389 * Note this saves performing an AND operation since
4390 * if a carry does occur it will propagate all the way to the
4391 * MSB. As a result a single shift is enough to get the carry
4393 u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
4395 /* Clear carry from T[i] */
4396 *tmpc++ &= MP_MASK;
4399 /* now copy higher words if any, e.g. if A has more digits than B */
4400 for (; i < max; i++) {
4401 /* T[i] = A[i] - U */
4402 *tmpc = *tmpa++ - u;
4404 /* U = carry bit of T[i] */
4405 u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
4407 /* Clear carry from T[i] */
4408 *tmpc++ &= MP_MASK;
4411 /* clear digits above used (since we may not have grown result above) */
4412 for (i = c->used; i < olduse; i++) {
4413 *tmpc++ = 0;
4417 mp_clamp (c);
4418 return MP_OKAY;