riched20: Sign-compare warnings fix.
[wine/multimedia.git] / dlls / rsaenh / mpi.c
blob5e10ec8dae273c6f651afd6777aef3d784a67366
1 /*
2 * dlls/rsaenh/mpi.c
3 * Multi Precision Integer functions
5 * Copyright 2004 Michael Jung
6 * Based on public domain code by Tom St Denis (tomstdenis@iahu.ca)
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
24 * This file contains code from the LibTomCrypt cryptographic
25 * library written by Tom St Denis (tomstdenis@iahu.ca). LibTomCrypt
26 * is in the public domain. The code in this file is tailored to
27 * special requirements. Take a look at http://libtomcrypt.org for the
28 * original version.
31 #include <stdarg.h>
32 #include "tomcrypt.h"
34 /* Known optimal configurations
35 CPU /Compiler /MUL CUTOFF/SQR CUTOFF
36 -------------------------------------------------------------
37 Intel P4 Northwood /GCC v3.4.1 / 88/ 128/LTM 0.32 ;-)
39 static const int KARATSUBA_MUL_CUTOFF = 88, /* Min. number of digits before Karatsuba multiplication is used. */
40 KARATSUBA_SQR_CUTOFF = 128; /* Min. number of digits before Karatsuba squaring is used. */
42 static void bn_reverse(unsigned char *s, int len);
43 static int s_mp_add(mp_int *a, mp_int *b, mp_int *c);
44 static int s_mp_exptmod (const mp_int * G, const mp_int * X, mp_int * P, mp_int * Y);
45 #define s_mp_mul(a, b, c) s_mp_mul_digs(a, b, c, (a)->used + (b)->used + 1)
46 static int s_mp_mul_digs(const mp_int *a, const mp_int *b, mp_int *c, int digs);
47 static int s_mp_mul_high_digs(const mp_int *a, const mp_int *b, mp_int *c, int digs);
48 static int s_mp_sqr(const mp_int *a, mp_int *b);
49 static int s_mp_sub(const mp_int *a, const mp_int *b, mp_int *c);
50 static int mp_exptmod_fast(const mp_int *G, const mp_int *X, mp_int *P, mp_int *Y, int mode);
51 static int mp_invmod_slow (const mp_int * a, mp_int * b, mp_int * c);
52 static int mp_karatsuba_mul(const mp_int *a, const mp_int *b, mp_int *c);
53 static int mp_karatsuba_sqr(const mp_int *a, mp_int *b);
55 /* computes the modular inverse via binary extended euclidean algorithm,
56 * that is c = 1/a mod b
58 * Based on slow invmod except this is optimized for the case where b is
59 * odd as per HAC Note 14.64 on pp. 610
61 static int
62 fast_mp_invmod (const mp_int * a, mp_int * b, mp_int * c)
64 mp_int x, y, u, v, B, D;
65 int res, neg;
67 /* 2. [modified] b must be odd */
68 if (mp_iseven (b) == 1) {
69 return MP_VAL;
72 /* init all our temps */
73 if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D, NULL)) != MP_OKAY) {
74 return res;
77 /* x == modulus, y == value to invert */
78 if ((res = mp_copy (b, &x)) != MP_OKAY) {
79 goto __ERR;
82 /* we need y = |a| */
83 if ((res = mp_abs (a, &y)) != MP_OKAY) {
84 goto __ERR;
87 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
88 if ((res = mp_copy (&x, &u)) != MP_OKAY) {
89 goto __ERR;
91 if ((res = mp_copy (&y, &v)) != MP_OKAY) {
92 goto __ERR;
94 mp_set (&D, 1);
96 top:
97 /* 4. while u is even do */
98 while (mp_iseven (&u) == 1) {
99 /* 4.1 u = u/2 */
100 if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
101 goto __ERR;
103 /* 4.2 if B is odd then */
104 if (mp_isodd (&B) == 1) {
105 if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
106 goto __ERR;
109 /* B = B/2 */
110 if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
111 goto __ERR;
115 /* 5. while v is even do */
116 while (mp_iseven (&v) == 1) {
117 /* 5.1 v = v/2 */
118 if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
119 goto __ERR;
121 /* 5.2 if D is odd then */
122 if (mp_isodd (&D) == 1) {
123 /* D = (D-x)/2 */
124 if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
125 goto __ERR;
128 /* D = D/2 */
129 if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
130 goto __ERR;
134 /* 6. if u >= v then */
135 if (mp_cmp (&u, &v) != MP_LT) {
136 /* u = u - v, B = B - D */
137 if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
138 goto __ERR;
141 if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
142 goto __ERR;
144 } else {
145 /* v - v - u, D = D - B */
146 if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
147 goto __ERR;
150 if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
151 goto __ERR;
155 /* if not zero goto step 4 */
156 if (mp_iszero (&u) == 0) {
157 goto top;
160 /* now a = C, b = D, gcd == g*v */
162 /* if v != 1 then there is no inverse */
163 if (mp_cmp_d (&v, 1) != MP_EQ) {
164 res = MP_VAL;
165 goto __ERR;
168 /* b is now the inverse */
169 neg = a->sign;
170 while (D.sign == MP_NEG) {
171 if ((res = mp_add (&D, b, &D)) != MP_OKAY) {
172 goto __ERR;
175 mp_exch (&D, c);
176 c->sign = neg;
177 res = MP_OKAY;
179 __ERR:mp_clear_multi (&x, &y, &u, &v, &B, &D, NULL);
180 return res;
183 /* computes xR**-1 == x (mod N) via Montgomery Reduction
185 * This is an optimized implementation of montgomery_reduce
186 * which uses the comba method to quickly calculate the columns of the
187 * reduction.
189 * Based on Algorithm 14.32 on pp.601 of HAC.
191 static int
192 fast_mp_montgomery_reduce (mp_int * x, const mp_int * n, mp_digit rho)
194 int ix, res, olduse;
195 mp_word W[MP_WARRAY];
197 /* get old used count */
198 olduse = x->used;
200 /* grow a as required */
201 if (x->alloc < n->used + 1) {
202 if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) {
203 return res;
207 /* first we have to get the digits of the input into
208 * an array of double precision words W[...]
211 register mp_word *_W;
212 register mp_digit *tmpx;
214 /* alias for the W[] array */
215 _W = W;
217 /* alias for the digits of x*/
218 tmpx = x->dp;
220 /* copy the digits of a into W[0..a->used-1] */
221 for (ix = 0; ix < x->used; ix++) {
222 *_W++ = *tmpx++;
225 /* zero the high words of W[a->used..m->used*2] */
226 for (; ix < n->used * 2 + 1; ix++) {
227 *_W++ = 0;
231 /* now we proceed to zero successive digits
232 * from the least significant upwards
234 for (ix = 0; ix < n->used; ix++) {
235 /* mu = ai * m' mod b
237 * We avoid a double precision multiplication (which isn't required)
238 * by casting the value down to a mp_digit. Note this requires
239 * that W[ix-1] have the carry cleared (see after the inner loop)
241 register mp_digit mu;
242 mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK);
244 /* a = a + mu * m * b**i
246 * This is computed in place and on the fly. The multiplication
247 * by b**i is handled by offsetting which columns the results
248 * are added to.
250 * Note the comba method normally doesn't handle carries in the
251 * inner loop In this case we fix the carry from the previous
252 * column since the Montgomery reduction requires digits of the
253 * result (so far) [see above] to work. This is
254 * handled by fixing up one carry after the inner loop. The
255 * carry fixups are done in order so after these loops the
256 * first m->used words of W[] have the carries fixed
259 register int iy;
260 register mp_digit *tmpn;
261 register mp_word *_W;
263 /* alias for the digits of the modulus */
264 tmpn = n->dp;
266 /* Alias for the columns set by an offset of ix */
267 _W = W + ix;
269 /* inner loop */
270 for (iy = 0; iy < n->used; iy++) {
271 *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++);
275 /* now fix carry for next digit, W[ix+1] */
276 W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT);
279 /* now we have to propagate the carries and
280 * shift the words downward [all those least
281 * significant digits we zeroed].
284 register mp_digit *tmpx;
285 register mp_word *_W, *_W1;
287 /* nox fix rest of carries */
289 /* alias for current word */
290 _W1 = W + ix;
292 /* alias for next word, where the carry goes */
293 _W = W + ++ix;
295 for (; ix <= n->used * 2 + 1; ix++) {
296 *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT);
299 /* copy out, A = A/b**n
301 * The result is A/b**n but instead of converting from an
302 * array of mp_word to mp_digit than calling mp_rshd
303 * we just copy them in the right order
306 /* alias for destination word */
307 tmpx = x->dp;
309 /* alias for shifted double precision result */
310 _W = W + n->used;
312 for (ix = 0; ix < n->used + 1; ix++) {
313 *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK));
316 /* zero oldused digits, if the input a was larger than
317 * m->used+1 we'll have to clear the digits
319 for (; ix < olduse; ix++) {
320 *tmpx++ = 0;
324 /* set the max used and clamp */
325 x->used = n->used + 1;
326 mp_clamp (x);
328 /* if A >= m then A = A - m */
329 if (mp_cmp_mag (x, n) != MP_LT) {
330 return s_mp_sub (x, n, x);
332 return MP_OKAY;
335 /* Fast (comba) multiplier
337 * This is the fast column-array [comba] multiplier. It is
338 * designed to compute the columns of the product first
339 * then handle the carries afterwards. This has the effect
340 * of making the nested loops that compute the columns very
341 * simple and schedulable on super-scalar processors.
343 * This has been modified to produce a variable number of
344 * digits of output so if say only a half-product is required
345 * you don't have to compute the upper half (a feature
346 * required for fast Barrett reduction).
348 * Based on Algorithm 14.12 on pp.595 of HAC.
351 static int
352 fast_s_mp_mul_digs (const mp_int * a, const mp_int * b, mp_int * c, int digs)
354 int olduse, res, pa, ix, iz;
355 mp_digit W[MP_WARRAY];
356 register mp_word _W;
358 /* grow the destination as required */
359 if (c->alloc < digs) {
360 if ((res = mp_grow (c, digs)) != MP_OKAY) {
361 return res;
365 /* number of output digits to produce */
366 pa = MIN(digs, a->used + b->used);
368 /* clear the carry */
369 _W = 0;
370 for (ix = 0; ix <= pa; ix++) {
371 int tx, ty;
372 int iy;
373 mp_digit *tmpx, *tmpy;
375 /* get offsets into the two bignums */
376 ty = MIN(b->used-1, ix);
377 tx = ix - ty;
379 /* setup temp aliases */
380 tmpx = a->dp + tx;
381 tmpy = b->dp + ty;
383 /* This is the number of times the loop will iterate, essentially it's
384 while (tx++ < a->used && ty-- >= 0) { ... }
386 iy = MIN(a->used-tx, ty+1);
388 /* execute loop */
389 for (iz = 0; iz < iy; ++iz) {
390 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
393 /* store term */
394 W[ix] = ((mp_digit)_W) & MP_MASK;
396 /* make next carry */
397 _W = _W >> ((mp_word)DIGIT_BIT);
400 /* setup dest */
401 olduse = c->used;
402 c->used = digs;
405 register mp_digit *tmpc;
406 tmpc = c->dp;
407 for (ix = 0; ix < digs; ix++) {
408 /* now extract the previous digit [below the carry] */
409 *tmpc++ = W[ix];
412 /* clear unused digits [that existed in the old copy of c] */
413 for (; ix < olduse; ix++) {
414 *tmpc++ = 0;
417 mp_clamp (c);
418 return MP_OKAY;
421 /* this is a modified version of fast_s_mul_digs that only produces
422 * output digits *above* digs. See the comments for fast_s_mul_digs
423 * to see how it works.
425 * This is used in the Barrett reduction since for one of the multiplications
426 * only the higher digits were needed. This essentially halves the work.
428 * Based on Algorithm 14.12 on pp.595 of HAC.
430 static int
431 fast_s_mp_mul_high_digs (const mp_int * a, const mp_int * b, mp_int * c, int digs)
433 int olduse, res, pa, ix, iz;
434 mp_digit W[MP_WARRAY];
435 mp_word _W;
437 /* grow the destination as required */
438 pa = a->used + b->used;
439 if (c->alloc < pa) {
440 if ((res = mp_grow (c, pa)) != MP_OKAY) {
441 return res;
445 /* number of output digits to produce */
446 pa = a->used + b->used;
447 _W = 0;
448 for (ix = digs; ix <= pa; ix++) {
449 int tx, ty, iy;
450 mp_digit *tmpx, *tmpy;
452 /* get offsets into the two bignums */
453 ty = MIN(b->used-1, ix);
454 tx = ix - ty;
456 /* setup temp aliases */
457 tmpx = a->dp + tx;
458 tmpy = b->dp + ty;
460 /* This is the number of times the loop will iterate, essentially it's
461 while (tx++ < a->used && ty-- >= 0) { ... }
463 iy = MIN(a->used-tx, ty+1);
465 /* execute loop */
466 for (iz = 0; iz < iy; iz++) {
467 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
470 /* store term */
471 W[ix] = ((mp_digit)_W) & MP_MASK;
473 /* make next carry */
474 _W = _W >> ((mp_word)DIGIT_BIT);
477 /* setup dest */
478 olduse = c->used;
479 c->used = pa;
482 register mp_digit *tmpc;
484 tmpc = c->dp + digs;
485 for (ix = digs; ix <= pa; ix++) {
486 /* now extract the previous digit [below the carry] */
487 *tmpc++ = W[ix];
490 /* clear unused digits [that existed in the old copy of c] */
491 for (; ix < olduse; ix++) {
492 *tmpc++ = 0;
495 mp_clamp (c);
496 return MP_OKAY;
499 /* fast squaring
501 * This is the comba method where the columns of the product
502 * are computed first then the carries are computed. This
503 * has the effect of making a very simple inner loop that
504 * is executed the most
506 * W2 represents the outer products and W the inner.
508 * A further optimizations is made because the inner
509 * products are of the form "A * B * 2". The *2 part does
510 * not need to be computed until the end which is good
511 * because 64-bit shifts are slow!
513 * Based on Algorithm 14.16 on pp.597 of HAC.
516 /* the jist of squaring...
518 you do like mult except the offset of the tmpx [one that starts closer to zero]
519 can't equal the offset of tmpy. So basically you set up iy like before then you min it with
520 (ty-tx) so that it never happens. You double all those you add in the inner loop
522 After that loop you do the squares and add them in.
524 Remove W2 and don't memset W
528 static int fast_s_mp_sqr (const mp_int * a, mp_int * b)
530 int olduse, res, pa, ix, iz;
531 mp_digit W[MP_WARRAY], *tmpx;
532 mp_word W1;
534 /* grow the destination as required */
535 pa = a->used + a->used;
536 if (b->alloc < pa) {
537 if ((res = mp_grow (b, pa)) != MP_OKAY) {
538 return res;
542 /* number of output digits to produce */
543 W1 = 0;
544 for (ix = 0; ix <= pa; ix++) {
545 int tx, ty, iy;
546 mp_word _W;
547 mp_digit *tmpy;
549 /* clear counter */
550 _W = 0;
552 /* get offsets into the two bignums */
553 ty = MIN(a->used-1, ix);
554 tx = ix - ty;
556 /* setup temp aliases */
557 tmpx = a->dp + tx;
558 tmpy = a->dp + ty;
560 /* This is the number of times the loop will iterate, essentially it's
561 while (tx++ < a->used && ty-- >= 0) { ... }
563 iy = MIN(a->used-tx, ty+1);
565 /* now for squaring tx can never equal ty
566 * we halve the distance since they approach at a rate of 2x
567 * and we have to round because odd cases need to be executed
569 iy = MIN(iy, (ty-tx+1)>>1);
571 /* execute loop */
572 for (iz = 0; iz < iy; iz++) {
573 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
576 /* double the inner product and add carry */
577 _W = _W + _W + W1;
579 /* even columns have the square term in them */
580 if ((ix&1) == 0) {
581 _W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]);
584 /* store it */
585 W[ix] = _W;
587 /* make next carry */
588 W1 = _W >> ((mp_word)DIGIT_BIT);
591 /* setup dest */
592 olduse = b->used;
593 b->used = a->used+a->used;
596 mp_digit *tmpb;
597 tmpb = b->dp;
598 for (ix = 0; ix < pa; ix++) {
599 *tmpb++ = W[ix] & MP_MASK;
602 /* clear unused digits [that existed in the old copy of c] */
603 for (; ix < olduse; ix++) {
604 *tmpb++ = 0;
607 mp_clamp (b);
608 return MP_OKAY;
611 /* computes a = 2**b
613 * Simple algorithm which zeroes the int, grows it then just sets one bit
614 * as required.
617 mp_2expt (mp_int * a, int b)
619 int res;
621 /* zero a as per default */
622 mp_zero (a);
624 /* grow a to accommodate the single bit */
625 if ((res = mp_grow (a, b / DIGIT_BIT + 1)) != MP_OKAY) {
626 return res;
629 /* set the used count of where the bit will go */
630 a->used = b / DIGIT_BIT + 1;
632 /* put the single bit in its place */
633 a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT);
635 return MP_OKAY;
638 /* b = |a|
640 * Simple function copies the input and fixes the sign to positive
643 mp_abs (const mp_int * a, mp_int * b)
645 int res;
647 /* copy a to b */
648 if (a != b) {
649 if ((res = mp_copy (a, b)) != MP_OKAY) {
650 return res;
654 /* force the sign of b to positive */
655 b->sign = MP_ZPOS;
657 return MP_OKAY;
660 /* high level addition (handles signs) */
661 int mp_add (mp_int * a, mp_int * b, mp_int * c)
663 int sa, sb, res;
665 /* get sign of both inputs */
666 sa = a->sign;
667 sb = b->sign;
669 /* handle two cases, not four */
670 if (sa == sb) {
671 /* both positive or both negative */
672 /* add their magnitudes, copy the sign */
673 c->sign = sa;
674 res = s_mp_add (a, b, c);
675 } else {
676 /* one positive, the other negative */
677 /* subtract the one with the greater magnitude from */
678 /* the one of the lesser magnitude. The result gets */
679 /* the sign of the one with the greater magnitude. */
680 if (mp_cmp_mag (a, b) == MP_LT) {
681 c->sign = sb;
682 res = s_mp_sub (b, a, c);
683 } else {
684 c->sign = sa;
685 res = s_mp_sub (a, b, c);
688 return res;
692 /* single digit addition */
694 mp_add_d (mp_int * a, mp_digit b, mp_int * c)
696 int res, ix, oldused;
697 mp_digit *tmpa, *tmpc, mu;
699 /* grow c as required */
700 if (c->alloc < a->used + 1) {
701 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
702 return res;
706 /* if a is negative and |a| >= b, call c = |a| - b */
707 if (a->sign == MP_NEG && (a->used > 1 || a->dp[0] >= b)) {
708 /* temporarily fix sign of a */
709 a->sign = MP_ZPOS;
711 /* c = |a| - b */
712 res = mp_sub_d(a, b, c);
714 /* fix sign */
715 a->sign = c->sign = MP_NEG;
717 return res;
720 /* old number of used digits in c */
721 oldused = c->used;
723 /* sign always positive */
724 c->sign = MP_ZPOS;
726 /* source alias */
727 tmpa = a->dp;
729 /* destination alias */
730 tmpc = c->dp;
732 /* if a is positive */
733 if (a->sign == MP_ZPOS) {
734 /* add digit, after this we're propagating
735 * the carry.
737 *tmpc = *tmpa++ + b;
738 mu = *tmpc >> DIGIT_BIT;
739 *tmpc++ &= MP_MASK;
741 /* now handle rest of the digits */
742 for (ix = 1; ix < a->used; ix++) {
743 *tmpc = *tmpa++ + mu;
744 mu = *tmpc >> DIGIT_BIT;
745 *tmpc++ &= MP_MASK;
747 /* set final carry */
748 ix++;
749 *tmpc++ = mu;
751 /* setup size */
752 c->used = a->used + 1;
753 } else {
754 /* a was negative and |a| < b */
755 c->used = 1;
757 /* the result is a single digit */
758 if (a->used == 1) {
759 *tmpc++ = b - a->dp[0];
760 } else {
761 *tmpc++ = b;
764 /* setup count so the clearing of oldused
765 * can fall through correctly
767 ix = 1;
770 /* now zero to oldused */
771 while (ix++ < oldused) {
772 *tmpc++ = 0;
774 mp_clamp(c);
776 return MP_OKAY;
779 /* trim unused digits
781 * This is used to ensure that leading zero digits are
782 * trimed and the leading "used" digit will be non-zero
783 * Typically very fast. Also fixes the sign if there
784 * are no more leading digits
786 void
787 mp_clamp (mp_int * a)
789 /* decrease used while the most significant digit is
790 * zero.
792 while (a->used > 0 && a->dp[a->used - 1] == 0) {
793 --(a->used);
796 /* reset the sign flag if used == 0 */
797 if (a->used == 0) {
798 a->sign = MP_ZPOS;
802 /* clear one (frees) */
803 void
804 mp_clear (mp_int * a)
806 int i;
808 /* only do anything if a hasn't been freed previously */
809 if (a->dp != NULL) {
810 /* first zero the digits */
811 for (i = 0; i < a->used; i++) {
812 a->dp[i] = 0;
815 /* free ram */
816 free(a->dp);
818 /* reset members to make debugging easier */
819 a->dp = NULL;
820 a->alloc = a->used = 0;
821 a->sign = MP_ZPOS;
826 void mp_clear_multi(mp_int *mp, ...)
828 mp_int* next_mp = mp;
829 va_list args;
830 va_start(args, mp);
831 while (next_mp != NULL) {
832 mp_clear(next_mp);
833 next_mp = va_arg(args, mp_int*);
835 va_end(args);
838 /* compare two ints (signed)*/
840 mp_cmp (const mp_int * a, const mp_int * b)
842 /* compare based on sign */
843 if (a->sign != b->sign) {
844 if (a->sign == MP_NEG) {
845 return MP_LT;
846 } else {
847 return MP_GT;
851 /* compare digits */
852 if (a->sign == MP_NEG) {
853 /* if negative compare opposite direction */
854 return mp_cmp_mag(b, a);
855 } else {
856 return mp_cmp_mag(a, b);
860 /* compare a digit */
861 int mp_cmp_d(const mp_int * a, mp_digit b)
863 /* compare based on sign */
864 if (a->sign == MP_NEG) {
865 return MP_LT;
868 /* compare based on magnitude */
869 if (a->used > 1) {
870 return MP_GT;
873 /* compare the only digit of a to b */
874 if (a->dp[0] > b) {
875 return MP_GT;
876 } else if (a->dp[0] < b) {
877 return MP_LT;
878 } else {
879 return MP_EQ;
883 /* compare maginitude of two ints (unsigned) */
884 int mp_cmp_mag (const mp_int * a, const mp_int * b)
886 int n;
887 mp_digit *tmpa, *tmpb;
889 /* compare based on # of non-zero digits */
890 if (a->used > b->used) {
891 return MP_GT;
894 if (a->used < b->used) {
895 return MP_LT;
898 /* alias for a */
899 tmpa = a->dp + (a->used - 1);
901 /* alias for b */
902 tmpb = b->dp + (a->used - 1);
904 /* compare based on digits */
905 for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {
906 if (*tmpa > *tmpb) {
907 return MP_GT;
910 if (*tmpa < *tmpb) {
911 return MP_LT;
914 return MP_EQ;
917 static const int lnz[16] = {
918 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
921 /* Counts the number of lsbs which are zero before the first zero bit */
922 int mp_cnt_lsb(const mp_int *a)
924 int x;
925 mp_digit q, qq;
927 /* easy out */
928 if (mp_iszero(a) == 1) {
929 return 0;
932 /* scan lower digits until non-zero */
933 for (x = 0; x < a->used && a->dp[x] == 0; x++);
934 q = a->dp[x];
935 x *= DIGIT_BIT;
937 /* now scan this digit until a 1 is found */
938 if ((q & 1) == 0) {
939 do {
940 qq = q & 15;
941 x += lnz[qq];
942 q >>= 4;
943 } while (qq == 0);
945 return x;
948 /* copy, b = a */
950 mp_copy (const mp_int * a, mp_int * b)
952 int res, n;
954 /* if dst == src do nothing */
955 if (a == b) {
956 return MP_OKAY;
959 /* grow dest */
960 if (b->alloc < a->used) {
961 if ((res = mp_grow (b, a->used)) != MP_OKAY) {
962 return res;
966 /* zero b and copy the parameters over */
968 register mp_digit *tmpa, *tmpb;
970 /* pointer aliases */
972 /* source */
973 tmpa = a->dp;
975 /* destination */
976 tmpb = b->dp;
978 /* copy all the digits */
979 for (n = 0; n < a->used; n++) {
980 *tmpb++ = *tmpa++;
983 /* clear high digits */
984 for (; n < b->used; n++) {
985 *tmpb++ = 0;
989 /* copy used count and sign */
990 b->used = a->used;
991 b->sign = a->sign;
992 return MP_OKAY;
995 /* returns the number of bits in an int */
997 mp_count_bits (const mp_int * a)
999 int r;
1000 mp_digit q;
1002 /* shortcut */
1003 if (a->used == 0) {
1004 return 0;
1007 /* get number of digits and add that */
1008 r = (a->used - 1) * DIGIT_BIT;
1010 /* take the last digit and count the bits in it */
1011 q = a->dp[a->used - 1];
1012 while (q > ((mp_digit) 0)) {
1013 ++r;
1014 q >>= ((mp_digit) 1);
1016 return r;
1019 /* integer signed division.
1020 * c*b + d == a [e.g. a/b, c=quotient, d=remainder]
1021 * HAC pp.598 Algorithm 14.20
1023 * Note that the description in HAC is horribly
1024 * incomplete. For example, it doesn't consider
1025 * the case where digits are removed from 'x' in
1026 * the inner loop. It also doesn't consider the
1027 * case that y has fewer than three digits, etc..
1029 * The overall algorithm is as described as
1030 * 14.20 from HAC but fixed to treat these cases.
1032 int mp_div (const mp_int * a, const mp_int * b, mp_int * c, mp_int * d)
1034 mp_int q, x, y, t1, t2;
1035 int res, n, t, i, norm, neg;
1037 /* is divisor zero ? */
1038 if (mp_iszero (b) == 1) {
1039 return MP_VAL;
1042 /* if a < b then q=0, r = a */
1043 if (mp_cmp_mag (a, b) == MP_LT) {
1044 if (d != NULL) {
1045 res = mp_copy (a, d);
1046 } else {
1047 res = MP_OKAY;
1049 if (c != NULL) {
1050 mp_zero (c);
1052 return res;
1055 if ((res = mp_init_size (&q, a->used + 2)) != MP_OKAY) {
1056 return res;
1058 q.used = a->used + 2;
1060 if ((res = mp_init (&t1)) != MP_OKAY) {
1061 goto __Q;
1064 if ((res = mp_init (&t2)) != MP_OKAY) {
1065 goto __T1;
1068 if ((res = mp_init_copy (&x, a)) != MP_OKAY) {
1069 goto __T2;
1072 if ((res = mp_init_copy (&y, b)) != MP_OKAY) {
1073 goto __X;
1076 /* fix the sign */
1077 neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
1078 x.sign = y.sign = MP_ZPOS;
1080 /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */
1081 norm = mp_count_bits(&y) % DIGIT_BIT;
1082 if (norm < DIGIT_BIT-1) {
1083 norm = (DIGIT_BIT-1) - norm;
1084 if ((res = mp_mul_2d (&x, norm, &x)) != MP_OKAY) {
1085 goto __Y;
1087 if ((res = mp_mul_2d (&y, norm, &y)) != MP_OKAY) {
1088 goto __Y;
1090 } else {
1091 norm = 0;
1094 /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */
1095 n = x.used - 1;
1096 t = y.used - 1;
1098 /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */
1099 if ((res = mp_lshd (&y, n - t)) != MP_OKAY) { /* y = y*b**{n-t} */
1100 goto __Y;
1103 while (mp_cmp (&x, &y) != MP_LT) {
1104 ++(q.dp[n - t]);
1105 if ((res = mp_sub (&x, &y, &x)) != MP_OKAY) {
1106 goto __Y;
1110 /* reset y by shifting it back down */
1111 mp_rshd (&y, n - t);
1113 /* step 3. for i from n down to (t + 1) */
1114 for (i = n; i >= (t + 1); i--) {
1115 if (i > x.used) {
1116 continue;
1119 /* step 3.1 if xi == yt then set q{i-t-1} to b-1,
1120 * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */
1121 if (x.dp[i] == y.dp[t]) {
1122 q.dp[i - t - 1] = ((((mp_digit)1) << DIGIT_BIT) - 1);
1123 } else {
1124 mp_word tmp;
1125 tmp = ((mp_word) x.dp[i]) << ((mp_word) DIGIT_BIT);
1126 tmp |= ((mp_word) x.dp[i - 1]);
1127 tmp /= ((mp_word) y.dp[t]);
1128 if (tmp > (mp_word) MP_MASK)
1129 tmp = MP_MASK;
1130 q.dp[i - t - 1] = (mp_digit) (tmp & (mp_word) (MP_MASK));
1133 /* while (q{i-t-1} * (yt * b + y{t-1})) >
1134 xi * b**2 + xi-1 * b + xi-2
1136 do q{i-t-1} -= 1;
1138 q.dp[i - t - 1] = (q.dp[i - t - 1] + 1) & MP_MASK;
1139 do {
1140 q.dp[i - t - 1] = (q.dp[i - t - 1] - 1) & MP_MASK;
1142 /* find left hand */
1143 mp_zero (&t1);
1144 t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1];
1145 t1.dp[1] = y.dp[t];
1146 t1.used = 2;
1147 if ((res = mp_mul_d (&t1, q.dp[i - t - 1], &t1)) != MP_OKAY) {
1148 goto __Y;
1151 /* find right hand */
1152 t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2];
1153 t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1];
1154 t2.dp[2] = x.dp[i];
1155 t2.used = 3;
1156 } while (mp_cmp_mag(&t1, &t2) == MP_GT);
1158 /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */
1159 if ((res = mp_mul_d (&y, q.dp[i - t - 1], &t1)) != MP_OKAY) {
1160 goto __Y;
1163 if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
1164 goto __Y;
1167 if ((res = mp_sub (&x, &t1, &x)) != MP_OKAY) {
1168 goto __Y;
1171 /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */
1172 if (x.sign == MP_NEG) {
1173 if ((res = mp_copy (&y, &t1)) != MP_OKAY) {
1174 goto __Y;
1176 if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
1177 goto __Y;
1179 if ((res = mp_add (&x, &t1, &x)) != MP_OKAY) {
1180 goto __Y;
1183 q.dp[i - t - 1] = (q.dp[i - t - 1] - 1UL) & MP_MASK;
1187 /* now q is the quotient and x is the remainder
1188 * [which we have to normalize]
1191 /* get sign before writing to c */
1192 x.sign = x.used == 0 ? MP_ZPOS : a->sign;
1194 if (c != NULL) {
1195 mp_clamp (&q);
1196 mp_exch (&q, c);
1197 c->sign = neg;
1200 if (d != NULL) {
1201 mp_div_2d (&x, norm, &x, NULL);
1202 mp_exch (&x, d);
1205 res = MP_OKAY;
1207 __Y:mp_clear (&y);
1208 __X:mp_clear (&x);
1209 __T2:mp_clear (&t2);
1210 __T1:mp_clear (&t1);
1211 __Q:mp_clear (&q);
1212 return res;
1215 /* b = a/2 */
1216 int mp_div_2(const mp_int * a, mp_int * b)
1218 int x, res, oldused;
1220 /* copy */
1221 if (b->alloc < a->used) {
1222 if ((res = mp_grow (b, a->used)) != MP_OKAY) {
1223 return res;
1227 oldused = b->used;
1228 b->used = a->used;
1230 register mp_digit r, rr, *tmpa, *tmpb;
1232 /* source alias */
1233 tmpa = a->dp + b->used - 1;
1235 /* dest alias */
1236 tmpb = b->dp + b->used - 1;
1238 /* carry */
1239 r = 0;
1240 for (x = b->used - 1; x >= 0; x--) {
1241 /* get the carry for the next iteration */
1242 rr = *tmpa & 1;
1244 /* shift the current digit, add in carry and store */
1245 *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
1247 /* forward carry to next iteration */
1248 r = rr;
1251 /* zero excess digits */
1252 tmpb = b->dp + b->used;
1253 for (x = b->used; x < oldused; x++) {
1254 *tmpb++ = 0;
1257 b->sign = a->sign;
1258 mp_clamp (b);
1259 return MP_OKAY;
1262 /* shift right by a certain bit count (store quotient in c, optional remainder in d) */
1263 int mp_div_2d (const mp_int * a, int b, mp_int * c, mp_int * d)
1265 mp_digit D, r, rr;
1266 int x, res;
1267 mp_int t;
1270 /* if the shift count is <= 0 then we do no work */
1271 if (b <= 0) {
1272 res = mp_copy (a, c);
1273 if (d != NULL) {
1274 mp_zero (d);
1276 return res;
1279 if ((res = mp_init (&t)) != MP_OKAY) {
1280 return res;
1283 /* get the remainder */
1284 if (d != NULL) {
1285 if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) {
1286 mp_clear (&t);
1287 return res;
1291 /* copy */
1292 if ((res = mp_copy (a, c)) != MP_OKAY) {
1293 mp_clear (&t);
1294 return res;
1297 /* shift by as many digits in the bit count */
1298 if (b >= DIGIT_BIT) {
1299 mp_rshd (c, b / DIGIT_BIT);
1302 /* shift any bit count < DIGIT_BIT */
1303 D = (mp_digit) (b % DIGIT_BIT);
1304 if (D != 0) {
1305 register mp_digit *tmpc, mask, shift;
1307 /* mask */
1308 mask = (((mp_digit)1) << D) - 1;
1310 /* shift for lsb */
1311 shift = DIGIT_BIT - D;
1313 /* alias */
1314 tmpc = c->dp + (c->used - 1);
1316 /* carry */
1317 r = 0;
1318 for (x = c->used - 1; x >= 0; x--) {
1319 /* get the lower bits of this word in a temp */
1320 rr = *tmpc & mask;
1322 /* shift the current word and mix in the carry bits from the previous word */
1323 *tmpc = (*tmpc >> D) | (r << shift);
1324 --tmpc;
1326 /* set the carry to the carry bits of the current word found above */
1327 r = rr;
1330 mp_clamp (c);
1331 if (d != NULL) {
1332 mp_exch (&t, d);
1334 mp_clear (&t);
1335 return MP_OKAY;
1338 static int s_is_power_of_two(mp_digit b, int *p)
1340 int x;
1342 for (x = 1; x < DIGIT_BIT; x++) {
1343 if (b == (((mp_digit)1)<<x)) {
1344 *p = x;
1345 return 1;
1348 return 0;
1351 /* single digit division (based on routine from MPI) */
1352 int mp_div_d (const mp_int * a, mp_digit b, mp_int * c, mp_digit * d)
1354 mp_int q;
1355 mp_word w;
1356 mp_digit t;
1357 int res, ix;
1359 /* cannot divide by zero */
1360 if (b == 0) {
1361 return MP_VAL;
1364 /* quick outs */
1365 if (b == 1 || mp_iszero(a) == 1) {
1366 if (d != NULL) {
1367 *d = 0;
1369 if (c != NULL) {
1370 return mp_copy(a, c);
1372 return MP_OKAY;
1375 /* power of two ? */
1376 if (s_is_power_of_two(b, &ix) == 1) {
1377 if (d != NULL) {
1378 *d = a->dp[0] & ((((mp_digit)1)<<ix) - 1);
1380 if (c != NULL) {
1381 return mp_div_2d(a, ix, c, NULL);
1383 return MP_OKAY;
1386 /* no easy answer [c'est la vie]. Just division */
1387 if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
1388 return res;
1391 q.used = a->used;
1392 q.sign = a->sign;
1393 w = 0;
1394 for (ix = a->used - 1; ix >= 0; ix--) {
1395 w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
1397 if (w >= b) {
1398 t = (mp_digit)(w / b);
1399 w -= ((mp_word)t) * ((mp_word)b);
1400 } else {
1401 t = 0;
1403 q.dp[ix] = t;
1406 if (d != NULL) {
1407 *d = (mp_digit)w;
1410 if (c != NULL) {
1411 mp_clamp(&q);
1412 mp_exch(&q, c);
1414 mp_clear(&q);
1416 return res;
1419 /* reduce "x" in place modulo "n" using the Diminished Radix algorithm.
1421 * Based on algorithm from the paper
1423 * "Generating Efficient Primes for Discrete Log Cryptosystems"
1424 * Chae Hoon Lim, Pil Loong Lee,
1425 * POSTECH Information Research Laboratories
1427 * The modulus must be of a special format [see manual]
1429 * Has been modified to use algorithm 7.10 from the LTM book instead
1431 * Input x must be in the range 0 <= x <= (n-1)**2
1434 mp_dr_reduce (mp_int * x, const mp_int * n, mp_digit k)
1436 int err, i, m;
1437 mp_word r;
1438 mp_digit mu, *tmpx1, *tmpx2;
1440 /* m = digits in modulus */
1441 m = n->used;
1443 /* ensure that "x" has at least 2m digits */
1444 if (x->alloc < m + m) {
1445 if ((err = mp_grow (x, m + m)) != MP_OKAY) {
1446 return err;
1450 /* top of loop, this is where the code resumes if
1451 * another reduction pass is required.
1453 top:
1454 /* aliases for digits */
1455 /* alias for lower half of x */
1456 tmpx1 = x->dp;
1458 /* alias for upper half of x, or x/B**m */
1459 tmpx2 = x->dp + m;
1461 /* set carry to zero */
1462 mu = 0;
1464 /* compute (x mod B**m) + k * [x/B**m] inline and inplace */
1465 for (i = 0; i < m; i++) {
1466 r = ((mp_word)*tmpx2++) * ((mp_word)k) + *tmpx1 + mu;
1467 *tmpx1++ = (mp_digit)(r & MP_MASK);
1468 mu = (mp_digit)(r >> ((mp_word)DIGIT_BIT));
1471 /* set final carry */
1472 *tmpx1++ = mu;
1474 /* zero words above m */
1475 for (i = m + 1; i < x->used; i++) {
1476 *tmpx1++ = 0;
1479 /* clamp, sub and return */
1480 mp_clamp (x);
1482 /* if x >= n then subtract and reduce again
1483 * Each successive "recursion" makes the input smaller and smaller.
1485 if (mp_cmp_mag (x, n) != MP_LT) {
1486 s_mp_sub(x, n, x);
1487 goto top;
1489 return MP_OKAY;
1492 /* determines the setup value */
1493 void mp_dr_setup(const mp_int *a, mp_digit *d)
1495 /* the casts are required if DIGIT_BIT is one less than
1496 * the number of bits in a mp_digit [e.g. DIGIT_BIT==31]
1498 *d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) -
1499 ((mp_word)a->dp[0]));
1502 /* swap the elements of two integers, for cases where you can't simply swap the
1503 * mp_int pointers around
1505 void
1506 mp_exch (mp_int * a, mp_int * b)
1508 mp_int t;
1510 t = *a;
1511 *a = *b;
1512 *b = t;
1515 /* this is a shell function that calls either the normal or Montgomery
1516 * exptmod functions. Originally the call to the montgomery code was
1517 * embedded in the normal function but that wasted a lot of stack space
1518 * for nothing (since 99% of the time the Montgomery code would be called)
1520 int mp_exptmod (const mp_int * G, const mp_int * X, mp_int * P, mp_int * Y)
1522 int dr;
1524 /* modulus P must be positive */
1525 if (P->sign == MP_NEG) {
1526 return MP_VAL;
1529 /* if exponent X is negative we have to recurse */
1530 if (X->sign == MP_NEG) {
1531 mp_int tmpG, tmpX;
1532 int err;
1534 /* first compute 1/G mod P */
1535 if ((err = mp_init(&tmpG)) != MP_OKAY) {
1536 return err;
1538 if ((err = mp_invmod(G, P, &tmpG)) != MP_OKAY) {
1539 mp_clear(&tmpG);
1540 return err;
1543 /* now get |X| */
1544 if ((err = mp_init(&tmpX)) != MP_OKAY) {
1545 mp_clear(&tmpG);
1546 return err;
1548 if ((err = mp_abs(X, &tmpX)) != MP_OKAY) {
1549 mp_clear_multi(&tmpG, &tmpX, NULL);
1550 return err;
1553 /* and now compute (1/G)**|X| instead of G**X [X < 0] */
1554 err = mp_exptmod(&tmpG, &tmpX, P, Y);
1555 mp_clear_multi(&tmpG, &tmpX, NULL);
1556 return err;
1559 dr = 0;
1561 /* if the modulus is odd or dr != 0 use the fast method */
1562 if (mp_isodd (P) == 1 || dr != 0) {
1563 return mp_exptmod_fast (G, X, P, Y, dr);
1564 } else {
1565 /* otherwise use the generic Barrett reduction technique */
1566 return s_mp_exptmod (G, X, P, Y);
1570 /* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
1572 * Uses a left-to-right k-ary sliding window to compute the modular exponentiation.
1573 * The value of k changes based on the size of the exponent.
1575 * Uses Montgomery or Diminished Radix reduction [whichever appropriate]
1579 mp_exptmod_fast (const mp_int * G, const mp_int * X, mp_int * P, mp_int * Y, int redmode)
1581 mp_int M[256], res;
1582 mp_digit buf, mp;
1583 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
1585 /* use a pointer to the reduction algorithm. This allows us to use
1586 * one of many reduction algorithms without modding the guts of
1587 * the code with if statements everywhere.
1589 int (*redux)(mp_int*,const mp_int*,mp_digit);
1591 /* find window size */
1592 x = mp_count_bits (X);
1593 if (x <= 7) {
1594 winsize = 2;
1595 } else if (x <= 36) {
1596 winsize = 3;
1597 } else if (x <= 140) {
1598 winsize = 4;
1599 } else if (x <= 450) {
1600 winsize = 5;
1601 } else if (x <= 1303) {
1602 winsize = 6;
1603 } else if (x <= 3529) {
1604 winsize = 7;
1605 } else {
1606 winsize = 8;
1609 /* init M array */
1610 /* init first cell */
1611 if ((err = mp_init(&M[1])) != MP_OKAY) {
1612 return err;
1615 /* now init the second half of the array */
1616 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
1617 if ((err = mp_init(&M[x])) != MP_OKAY) {
1618 for (y = 1<<(winsize-1); y < x; y++) {
1619 mp_clear (&M[y]);
1621 mp_clear(&M[1]);
1622 return err;
1626 /* determine and setup reduction code */
1627 if (redmode == 0) {
1628 /* now setup montgomery */
1629 if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
1630 goto __M;
1633 /* automatically pick the comba one if available (saves quite a few calls/ifs) */
1634 if (((P->used * 2 + 1) < MP_WARRAY) &&
1635 P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
1636 redux = fast_mp_montgomery_reduce;
1637 } else {
1638 /* use slower baseline Montgomery method */
1639 redux = mp_montgomery_reduce;
1641 } else if (redmode == 1) {
1642 /* setup DR reduction for moduli of the form B**k - b */
1643 mp_dr_setup(P, &mp);
1644 redux = mp_dr_reduce;
1645 } else {
1646 /* setup DR reduction for moduli of the form 2**k - b */
1647 if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
1648 goto __M;
1650 redux = mp_reduce_2k;
1653 /* setup result */
1654 if ((err = mp_init (&res)) != MP_OKAY) {
1655 goto __M;
1658 /* create M table
1662 * The first half of the table is not computed though accept for M[0] and M[1]
1665 if (redmode == 0) {
1666 /* now we need R mod m */
1667 if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) {
1668 goto __RES;
1671 /* now set M[1] to G * R mod m */
1672 if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) {
1673 goto __RES;
1675 } else {
1676 mp_set(&res, 1);
1677 if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
1678 goto __RES;
1682 /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
1683 if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
1684 goto __RES;
1687 for (x = 0; x < (winsize - 1); x++) {
1688 if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) {
1689 goto __RES;
1691 if ((err = redux (&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) {
1692 goto __RES;
1696 /* create upper table */
1697 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
1698 if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
1699 goto __RES;
1701 if ((err = redux (&M[x], P, mp)) != MP_OKAY) {
1702 goto __RES;
1706 /* set initial mode and bit cnt */
1707 mode = 0;
1708 bitcnt = 1;
1709 buf = 0;
1710 digidx = X->used - 1;
1711 bitcpy = 0;
1712 bitbuf = 0;
1714 for (;;) {
1715 /* grab next digit as required */
1716 if (--bitcnt == 0) {
1717 /* if digidx == -1 we are out of digits so break */
1718 if (digidx == -1) {
1719 break;
1721 /* read next digit and reset bitcnt */
1722 buf = X->dp[digidx--];
1723 bitcnt = DIGIT_BIT;
1726 /* grab the next msb from the exponent */
1727 y = (buf >> (DIGIT_BIT - 1)) & 1;
1728 buf <<= (mp_digit)1;
1730 /* if the bit is zero and mode == 0 then we ignore it
1731 * These represent the leading zero bits before the first 1 bit
1732 * in the exponent. Technically this opt is not required but it
1733 * does lower the # of trivial squaring/reductions used
1735 if (mode == 0 && y == 0) {
1736 continue;
1739 /* if the bit is zero and mode == 1 then we square */
1740 if (mode == 1 && y == 0) {
1741 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
1742 goto __RES;
1744 if ((err = redux (&res, P, mp)) != MP_OKAY) {
1745 goto __RES;
1747 continue;
1750 /* else we add it to the window */
1751 bitbuf |= (y << (winsize - ++bitcpy));
1752 mode = 2;
1754 if (bitcpy == winsize) {
1755 /* ok window is filled so square as required and multiply */
1756 /* square first */
1757 for (x = 0; x < winsize; x++) {
1758 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
1759 goto __RES;
1761 if ((err = redux (&res, P, mp)) != MP_OKAY) {
1762 goto __RES;
1766 /* then multiply */
1767 if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
1768 goto __RES;
1770 if ((err = redux (&res, P, mp)) != MP_OKAY) {
1771 goto __RES;
1774 /* empty window and reset */
1775 bitcpy = 0;
1776 bitbuf = 0;
1777 mode = 1;
1781 /* if bits remain then square/multiply */
1782 if (mode == 2 && bitcpy > 0) {
1783 /* square then multiply if the bit is set */
1784 for (x = 0; x < bitcpy; x++) {
1785 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
1786 goto __RES;
1788 if ((err = redux (&res, P, mp)) != MP_OKAY) {
1789 goto __RES;
1792 /* get next bit of the window */
1793 bitbuf <<= 1;
1794 if ((bitbuf & (1 << winsize)) != 0) {
1795 /* then multiply */
1796 if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
1797 goto __RES;
1799 if ((err = redux (&res, P, mp)) != MP_OKAY) {
1800 goto __RES;
1806 if (redmode == 0) {
1807 /* fixup result if Montgomery reduction is used
1808 * recall that any value in a Montgomery system is
1809 * actually multiplied by R mod n. So we have
1810 * to reduce one more time to cancel out the factor
1811 * of R.
1813 if ((err = redux(&res, P, mp)) != MP_OKAY) {
1814 goto __RES;
1818 /* swap res with Y */
1819 mp_exch (&res, Y);
1820 err = MP_OKAY;
1821 __RES:mp_clear (&res);
1822 __M:
1823 mp_clear(&M[1]);
1824 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
1825 mp_clear (&M[x]);
1827 return err;
1830 /* Greatest Common Divisor using the binary method */
1831 int mp_gcd (const mp_int * a, const mp_int * b, mp_int * c)
1833 mp_int u, v;
1834 int k, u_lsb, v_lsb, res;
1836 /* either zero than gcd is the largest */
1837 if (mp_iszero (a) == 1 && mp_iszero (b) == 0) {
1838 return mp_abs (b, c);
1840 if (mp_iszero (a) == 0 && mp_iszero (b) == 1) {
1841 return mp_abs (a, c);
1844 /* optimized. At this point if a == 0 then
1845 * b must equal zero too
1847 if (mp_iszero (a) == 1) {
1848 mp_zero(c);
1849 return MP_OKAY;
1852 /* get copies of a and b we can modify */
1853 if ((res = mp_init_copy (&u, a)) != MP_OKAY) {
1854 return res;
1857 if ((res = mp_init_copy (&v, b)) != MP_OKAY) {
1858 goto __U;
1861 /* must be positive for the remainder of the algorithm */
1862 u.sign = v.sign = MP_ZPOS;
1864 /* B1. Find the common power of two for u and v */
1865 u_lsb = mp_cnt_lsb(&u);
1866 v_lsb = mp_cnt_lsb(&v);
1867 k = MIN(u_lsb, v_lsb);
1869 if (k > 0) {
1870 /* divide the power of two out */
1871 if ((res = mp_div_2d(&u, k, &u, NULL)) != MP_OKAY) {
1872 goto __V;
1875 if ((res = mp_div_2d(&v, k, &v, NULL)) != MP_OKAY) {
1876 goto __V;
1880 /* divide any remaining factors of two out */
1881 if (u_lsb != k) {
1882 if ((res = mp_div_2d(&u, u_lsb - k, &u, NULL)) != MP_OKAY) {
1883 goto __V;
1887 if (v_lsb != k) {
1888 if ((res = mp_div_2d(&v, v_lsb - k, &v, NULL)) != MP_OKAY) {
1889 goto __V;
1893 while (mp_iszero(&v) == 0) {
1894 /* make sure v is the largest */
1895 if (mp_cmp_mag(&u, &v) == MP_GT) {
1896 /* swap u and v to make sure v is >= u */
1897 mp_exch(&u, &v);
1900 /* subtract smallest from largest */
1901 if ((res = s_mp_sub(&v, &u, &v)) != MP_OKAY) {
1902 goto __V;
1905 /* Divide out all factors of two */
1906 if ((res = mp_div_2d(&v, mp_cnt_lsb(&v), &v, NULL)) != MP_OKAY) {
1907 goto __V;
1911 /* multiply by 2**k which we divided out at the beginning */
1912 if ((res = mp_mul_2d (&u, k, c)) != MP_OKAY) {
1913 goto __V;
1915 c->sign = MP_ZPOS;
1916 res = MP_OKAY;
1917 __V:mp_clear (&u);
1918 __U:mp_clear (&v);
1919 return res;
1922 /* get the lower 32-bits of an mp_int */
1923 unsigned long mp_get_int(const mp_int * a)
1925 int i;
1926 unsigned long res;
1928 if (a->used == 0) {
1929 return 0;
1932 /* get number of digits of the lsb we have to read */
1933 i = MIN(a->used,(int)((sizeof(unsigned long)*CHAR_BIT+DIGIT_BIT-1)/DIGIT_BIT))-1;
1935 /* get most significant digit of result */
1936 res = DIGIT(a,i);
1938 while (--i >= 0) {
1939 res = (res << DIGIT_BIT) | DIGIT(a,i);
1942 /* force result to 32-bits always so it is consistent on non 32-bit platforms */
1943 return res & 0xFFFFFFFFUL;
1946 /* grow as required */
1947 int mp_grow (mp_int * a, int size)
1949 int i;
1950 mp_digit *tmp;
1952 /* if the alloc size is smaller alloc more ram */
1953 if (a->alloc < size) {
1954 /* ensure there are always at least MP_PREC digits extra on top */
1955 size += (MP_PREC * 2) - (size % MP_PREC);
1957 /* reallocate the array a->dp
1959 * We store the return in a temporary variable
1960 * in case the operation failed we don't want
1961 * to overwrite the dp member of a.
1963 tmp = realloc (a->dp, sizeof (mp_digit) * size);
1964 if (tmp == NULL) {
1965 /* reallocation failed but "a" is still valid [can be freed] */
1966 return MP_MEM;
1969 /* reallocation succeeded so set a->dp */
1970 a->dp = tmp;
1972 /* zero excess digits */
1973 i = a->alloc;
1974 a->alloc = size;
1975 for (; i < a->alloc; i++) {
1976 a->dp[i] = 0;
1979 return MP_OKAY;
1982 /* init a new mp_int */
1983 int mp_init (mp_int * a)
1985 int i;
1987 /* allocate memory required and clear it */
1988 a->dp = malloc (sizeof (mp_digit) * MP_PREC);
1989 if (a->dp == NULL) {
1990 return MP_MEM;
1993 /* set the digits to zero */
1994 for (i = 0; i < MP_PREC; i++) {
1995 a->dp[i] = 0;
1998 /* set the used to zero, allocated digits to the default precision
1999 * and sign to positive */
2000 a->used = 0;
2001 a->alloc = MP_PREC;
2002 a->sign = MP_ZPOS;
2004 return MP_OKAY;
2007 /* creates "a" then copies b into it */
2008 int mp_init_copy (mp_int * a, const mp_int * b)
2010 int res;
2012 if ((res = mp_init (a)) != MP_OKAY) {
2013 return res;
2015 return mp_copy (b, a);
2018 int mp_init_multi(mp_int *mp, ...)
2020 mp_err res = MP_OKAY; /* Assume ok until proven otherwise */
2021 int n = 0; /* Number of ok inits */
2022 mp_int* cur_arg = mp;
2023 va_list args;
2025 va_start(args, mp); /* init args to next argument from caller */
2026 while (cur_arg != NULL) {
2027 if (mp_init(cur_arg) != MP_OKAY) {
2028 /* Oops - error! Back-track and mp_clear what we already
2029 succeeded in init-ing, then return error.
2031 va_list clean_args;
2033 /* end the current list */
2034 va_end(args);
2036 /* now start cleaning up */
2037 cur_arg = mp;
2038 va_start(clean_args, mp);
2039 while (n--) {
2040 mp_clear(cur_arg);
2041 cur_arg = va_arg(clean_args, mp_int*);
2043 va_end(clean_args);
2044 res = MP_MEM;
2045 break;
2047 n++;
2048 cur_arg = va_arg(args, mp_int*);
2050 va_end(args);
2051 return res; /* Assumed ok, if error flagged above. */
2054 /* init an mp_init for a given size */
2055 int mp_init_size (mp_int * a, int size)
2057 int x;
2059 /* pad size so there are always extra digits */
2060 size += (MP_PREC * 2) - (size % MP_PREC);
2062 /* alloc mem */
2063 a->dp = malloc (sizeof (mp_digit) * size);
2064 if (a->dp == NULL) {
2065 return MP_MEM;
2068 /* set the members */
2069 a->used = 0;
2070 a->alloc = size;
2071 a->sign = MP_ZPOS;
2073 /* zero the digits */
2074 for (x = 0; x < size; x++) {
2075 a->dp[x] = 0;
2078 return MP_OKAY;
2081 /* hac 14.61, pp608 */
2082 int mp_invmod (const mp_int * a, mp_int * b, mp_int * c)
2084 /* b cannot be negative */
2085 if (b->sign == MP_NEG || mp_iszero(b) == 1) {
2086 return MP_VAL;
2089 /* if the modulus is odd we can use a faster routine instead */
2090 if (mp_isodd (b) == 1) {
2091 return fast_mp_invmod (a, b, c);
2094 return mp_invmod_slow(a, b, c);
2097 /* hac 14.61, pp608 */
2098 int mp_invmod_slow (const mp_int * a, mp_int * b, mp_int * c)
2100 mp_int x, y, u, v, A, B, C, D;
2101 int res;
2103 /* b cannot be negative */
2104 if (b->sign == MP_NEG || mp_iszero(b) == 1) {
2105 return MP_VAL;
2108 /* init temps */
2109 if ((res = mp_init_multi(&x, &y, &u, &v,
2110 &A, &B, &C, &D, NULL)) != MP_OKAY) {
2111 return res;
2114 /* x = a, y = b */
2115 if ((res = mp_copy (a, &x)) != MP_OKAY) {
2116 goto __ERR;
2118 if ((res = mp_copy (b, &y)) != MP_OKAY) {
2119 goto __ERR;
2122 /* 2. [modified] if x,y are both even then return an error! */
2123 if (mp_iseven (&x) == 1 && mp_iseven (&y) == 1) {
2124 res = MP_VAL;
2125 goto __ERR;
2128 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
2129 if ((res = mp_copy (&x, &u)) != MP_OKAY) {
2130 goto __ERR;
2132 if ((res = mp_copy (&y, &v)) != MP_OKAY) {
2133 goto __ERR;
2135 mp_set (&A, 1);
2136 mp_set (&D, 1);
2138 top:
2139 /* 4. while u is even do */
2140 while (mp_iseven (&u) == 1) {
2141 /* 4.1 u = u/2 */
2142 if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
2143 goto __ERR;
2145 /* 4.2 if A or B is odd then */
2146 if (mp_isodd (&A) == 1 || mp_isodd (&B) == 1) {
2147 /* A = (A+y)/2, B = (B-x)/2 */
2148 if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
2149 goto __ERR;
2151 if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
2152 goto __ERR;
2155 /* A = A/2, B = B/2 */
2156 if ((res = mp_div_2 (&A, &A)) != MP_OKAY) {
2157 goto __ERR;
2159 if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
2160 goto __ERR;
2164 /* 5. while v is even do */
2165 while (mp_iseven (&v) == 1) {
2166 /* 5.1 v = v/2 */
2167 if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
2168 goto __ERR;
2170 /* 5.2 if C or D is odd then */
2171 if (mp_isodd (&C) == 1 || mp_isodd (&D) == 1) {
2172 /* C = (C+y)/2, D = (D-x)/2 */
2173 if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
2174 goto __ERR;
2176 if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
2177 goto __ERR;
2180 /* C = C/2, D = D/2 */
2181 if ((res = mp_div_2 (&C, &C)) != MP_OKAY) {
2182 goto __ERR;
2184 if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
2185 goto __ERR;
2189 /* 6. if u >= v then */
2190 if (mp_cmp (&u, &v) != MP_LT) {
2191 /* u = u - v, A = A - C, B = B - D */
2192 if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
2193 goto __ERR;
2196 if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) {
2197 goto __ERR;
2200 if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
2201 goto __ERR;
2203 } else {
2204 /* v - v - u, C = C - A, D = D - B */
2205 if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
2206 goto __ERR;
2209 if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) {
2210 goto __ERR;
2213 if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
2214 goto __ERR;
2218 /* if not zero goto step 4 */
2219 if (mp_iszero (&u) == 0)
2220 goto top;
2222 /* now a = C, b = D, gcd == g*v */
2224 /* if v != 1 then there is no inverse */
2225 if (mp_cmp_d (&v, 1) != MP_EQ) {
2226 res = MP_VAL;
2227 goto __ERR;
2230 /* if its too low */
2231 while (mp_cmp_d(&C, 0) == MP_LT) {
2232 if ((res = mp_add(&C, b, &C)) != MP_OKAY) {
2233 goto __ERR;
2237 /* too big */
2238 while (mp_cmp_mag(&C, b) != MP_LT) {
2239 if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {
2240 goto __ERR;
2244 /* C is now the inverse */
2245 mp_exch (&C, c);
2246 res = MP_OKAY;
2247 __ERR:mp_clear_multi (&x, &y, &u, &v, &A, &B, &C, &D, NULL);
2248 return res;
2251 /* c = |a| * |b| using Karatsuba Multiplication using
2252 * three half size multiplications
2254 * Let B represent the radix [e.g. 2**DIGIT_BIT] and
2255 * let n represent half of the number of digits in
2256 * the min(a,b)
2258 * a = a1 * B**n + a0
2259 * b = b1 * B**n + b0
2261 * Then, a * b =>
2262 a1b1 * B**2n + ((a1 - a0)(b1 - b0) + a0b0 + a1b1) * B + a0b0
2264 * Note that a1b1 and a0b0 are used twice and only need to be
2265 * computed once. So in total three half size (half # of
2266 * digit) multiplications are performed, a0b0, a1b1 and
2267 * (a1-b1)(a0-b0)
2269 * Note that a multiplication of half the digits requires
2270 * 1/4th the number of single precision multiplications so in
2271 * total after one call 25% of the single precision multiplications
2272 * are saved. Note also that the call to mp_mul can end up back
2273 * in this function if the a0, a1, b0, or b1 are above the threshold.
2274 * This is known as divide-and-conquer and leads to the famous
2275 * O(N**lg(3)) or O(N**1.584) work which is asymptotically lower than
2276 * the standard O(N**2) that the baseline/comba methods use.
2277 * Generally though the overhead of this method doesn't pay off
2278 * until a certain size (N ~ 80) is reached.
2280 int mp_karatsuba_mul (const mp_int * a, const mp_int * b, mp_int * c)
2282 mp_int x0, x1, y0, y1, t1, x0y0, x1y1;
2283 int B, err;
2285 /* default the return code to an error */
2286 err = MP_MEM;
2288 /* min # of digits */
2289 B = MIN (a->used, b->used);
2291 /* now divide in two */
2292 B = B >> 1;
2294 /* init copy all the temps */
2295 if (mp_init_size (&x0, B) != MP_OKAY)
2296 goto ERR;
2297 if (mp_init_size (&x1, a->used - B) != MP_OKAY)
2298 goto X0;
2299 if (mp_init_size (&y0, B) != MP_OKAY)
2300 goto X1;
2301 if (mp_init_size (&y1, b->used - B) != MP_OKAY)
2302 goto Y0;
2304 /* init temps */
2305 if (mp_init_size (&t1, B * 2) != MP_OKAY)
2306 goto Y1;
2307 if (mp_init_size (&x0y0, B * 2) != MP_OKAY)
2308 goto T1;
2309 if (mp_init_size (&x1y1, B * 2) != MP_OKAY)
2310 goto X0Y0;
2312 /* now shift the digits */
2313 x0.used = y0.used = B;
2314 x1.used = a->used - B;
2315 y1.used = b->used - B;
2318 register int x;
2319 register mp_digit *tmpa, *tmpb, *tmpx, *tmpy;
2321 /* we copy the digits directly instead of using higher level functions
2322 * since we also need to shift the digits
2324 tmpa = a->dp;
2325 tmpb = b->dp;
2327 tmpx = x0.dp;
2328 tmpy = y0.dp;
2329 for (x = 0; x < B; x++) {
2330 *tmpx++ = *tmpa++;
2331 *tmpy++ = *tmpb++;
2334 tmpx = x1.dp;
2335 for (x = B; x < a->used; x++) {
2336 *tmpx++ = *tmpa++;
2339 tmpy = y1.dp;
2340 for (x = B; x < b->used; x++) {
2341 *tmpy++ = *tmpb++;
2345 /* only need to clamp the lower words since by definition the
2346 * upper words x1/y1 must have a known number of digits
2348 mp_clamp (&x0);
2349 mp_clamp (&y0);
2351 /* now calc the products x0y0 and x1y1 */
2352 /* after this x0 is no longer required, free temp [x0==t2]! */
2353 if (mp_mul (&x0, &y0, &x0y0) != MP_OKAY)
2354 goto X1Y1; /* x0y0 = x0*y0 */
2355 if (mp_mul (&x1, &y1, &x1y1) != MP_OKAY)
2356 goto X1Y1; /* x1y1 = x1*y1 */
2358 /* now calc x1-x0 and y1-y0 */
2359 if (mp_sub (&x1, &x0, &t1) != MP_OKAY)
2360 goto X1Y1; /* t1 = x1 - x0 */
2361 if (mp_sub (&y1, &y0, &x0) != MP_OKAY)
2362 goto X1Y1; /* t2 = y1 - y0 */
2363 if (mp_mul (&t1, &x0, &t1) != MP_OKAY)
2364 goto X1Y1; /* t1 = (x1 - x0) * (y1 - y0) */
2366 /* add x0y0 */
2367 if (mp_add (&x0y0, &x1y1, &x0) != MP_OKAY)
2368 goto X1Y1; /* t2 = x0y0 + x1y1 */
2369 if (mp_sub (&x0, &t1, &t1) != MP_OKAY)
2370 goto X1Y1; /* t1 = x0y0 + x1y1 - (x1-x0)*(y1-y0) */
2372 /* shift by B */
2373 if (mp_lshd (&t1, B) != MP_OKAY)
2374 goto X1Y1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */
2375 if (mp_lshd (&x1y1, B * 2) != MP_OKAY)
2376 goto X1Y1; /* x1y1 = x1y1 << 2*B */
2378 if (mp_add (&x0y0, &t1, &t1) != MP_OKAY)
2379 goto X1Y1; /* t1 = x0y0 + t1 */
2380 if (mp_add (&t1, &x1y1, c) != MP_OKAY)
2381 goto X1Y1; /* t1 = x0y0 + t1 + x1y1 */
2383 /* Algorithm succeeded set the return code to MP_OKAY */
2384 err = MP_OKAY;
2386 X1Y1:mp_clear (&x1y1);
2387 X0Y0:mp_clear (&x0y0);
2388 T1:mp_clear (&t1);
2389 Y1:mp_clear (&y1);
2390 Y0:mp_clear (&y0);
2391 X1:mp_clear (&x1);
2392 X0:mp_clear (&x0);
2393 ERR:
2394 return err;
2397 /* Karatsuba squaring, computes b = a*a using three
2398 * half size squarings
2400 * See comments of karatsuba_mul for details. It
2401 * is essentially the same algorithm but merely
2402 * tuned to perform recursive squarings.
2404 int mp_karatsuba_sqr (const mp_int * a, mp_int * b)
2406 mp_int x0, x1, t1, t2, x0x0, x1x1;
2407 int B, err;
2409 err = MP_MEM;
2411 /* min # of digits */
2412 B = a->used;
2414 /* now divide in two */
2415 B = B >> 1;
2417 /* init copy all the temps */
2418 if (mp_init_size (&x0, B) != MP_OKAY)
2419 goto ERR;
2420 if (mp_init_size (&x1, a->used - B) != MP_OKAY)
2421 goto X0;
2423 /* init temps */
2424 if (mp_init_size (&t1, a->used * 2) != MP_OKAY)
2425 goto X1;
2426 if (mp_init_size (&t2, a->used * 2) != MP_OKAY)
2427 goto T1;
2428 if (mp_init_size (&x0x0, B * 2) != MP_OKAY)
2429 goto T2;
2430 if (mp_init_size (&x1x1, (a->used - B) * 2) != MP_OKAY)
2431 goto X0X0;
2434 register int x;
2435 register mp_digit *dst, *src;
2437 src = a->dp;
2439 /* now shift the digits */
2440 dst = x0.dp;
2441 for (x = 0; x < B; x++) {
2442 *dst++ = *src++;
2445 dst = x1.dp;
2446 for (x = B; x < a->used; x++) {
2447 *dst++ = *src++;
2451 x0.used = B;
2452 x1.used = a->used - B;
2454 mp_clamp (&x0);
2456 /* now calc the products x0*x0 and x1*x1 */
2457 if (mp_sqr (&x0, &x0x0) != MP_OKAY)
2458 goto X1X1; /* x0x0 = x0*x0 */
2459 if (mp_sqr (&x1, &x1x1) != MP_OKAY)
2460 goto X1X1; /* x1x1 = x1*x1 */
2462 /* now calc (x1-x0)**2 */
2463 if (mp_sub (&x1, &x0, &t1) != MP_OKAY)
2464 goto X1X1; /* t1 = x1 - x0 */
2465 if (mp_sqr (&t1, &t1) != MP_OKAY)
2466 goto X1X1; /* t1 = (x1 - x0) * (x1 - x0) */
2468 /* add x0y0 */
2469 if (s_mp_add (&x0x0, &x1x1, &t2) != MP_OKAY)
2470 goto X1X1; /* t2 = x0x0 + x1x1 */
2471 if (mp_sub (&t2, &t1, &t1) != MP_OKAY)
2472 goto X1X1; /* t1 = x0x0 + x1x1 - (x1-x0)*(x1-x0) */
2474 /* shift by B */
2475 if (mp_lshd (&t1, B) != MP_OKAY)
2476 goto X1X1; /* t1 = (x0x0 + x1x1 - (x1-x0)*(x1-x0))<<B */
2477 if (mp_lshd (&x1x1, B * 2) != MP_OKAY)
2478 goto X1X1; /* x1x1 = x1x1 << 2*B */
2480 if (mp_add (&x0x0, &t1, &t1) != MP_OKAY)
2481 goto X1X1; /* t1 = x0x0 + t1 */
2482 if (mp_add (&t1, &x1x1, b) != MP_OKAY)
2483 goto X1X1; /* t1 = x0x0 + t1 + x1x1 */
2485 err = MP_OKAY;
2487 X1X1:mp_clear (&x1x1);
2488 X0X0:mp_clear (&x0x0);
2489 T2:mp_clear (&t2);
2490 T1:mp_clear (&t1);
2491 X1:mp_clear (&x1);
2492 X0:mp_clear (&x0);
2493 ERR:
2494 return err;
2497 /* computes least common multiple as |a*b|/(a, b) */
2498 int mp_lcm (const mp_int * a, const mp_int * b, mp_int * c)
2500 int res;
2501 mp_int t1, t2;
2504 if ((res = mp_init_multi (&t1, &t2, NULL)) != MP_OKAY) {
2505 return res;
2508 /* t1 = get the GCD of the two inputs */
2509 if ((res = mp_gcd (a, b, &t1)) != MP_OKAY) {
2510 goto __T;
2513 /* divide the smallest by the GCD */
2514 if (mp_cmp_mag(a, b) == MP_LT) {
2515 /* store quotient in t2 such that t2 * b is the LCM */
2516 if ((res = mp_div(a, &t1, &t2, NULL)) != MP_OKAY) {
2517 goto __T;
2519 res = mp_mul(b, &t2, c);
2520 } else {
2521 /* store quotient in t2 such that t2 * a is the LCM */
2522 if ((res = mp_div(b, &t1, &t2, NULL)) != MP_OKAY) {
2523 goto __T;
2525 res = mp_mul(a, &t2, c);
2528 /* fix the sign to positive */
2529 c->sign = MP_ZPOS;
2531 __T:
2532 mp_clear_multi (&t1, &t2, NULL);
2533 return res;
2536 /* shift left a certain amount of digits */
2537 int mp_lshd (mp_int * a, int b)
2539 int x, res;
2541 /* if its less than zero return */
2542 if (b <= 0) {
2543 return MP_OKAY;
2546 /* grow to fit the new digits */
2547 if (a->alloc < a->used + b) {
2548 if ((res = mp_grow (a, a->used + b)) != MP_OKAY) {
2549 return res;
2554 register mp_digit *top, *bottom;
2556 /* increment the used by the shift amount then copy upwards */
2557 a->used += b;
2559 /* top */
2560 top = a->dp + a->used - 1;
2562 /* base */
2563 bottom = a->dp + a->used - 1 - b;
2565 /* much like mp_rshd this is implemented using a sliding window
2566 * except the window goes the otherway around. Copying from
2567 * the bottom to the top. see bn_mp_rshd.c for more info.
2569 for (x = a->used - 1; x >= b; x--) {
2570 *top-- = *bottom--;
2573 /* zero the lower digits */
2574 top = a->dp;
2575 for (x = 0; x < b; x++) {
2576 *top++ = 0;
2579 return MP_OKAY;
2582 /* c = a mod b, 0 <= c < b */
2584 mp_mod (const mp_int * a, mp_int * b, mp_int * c)
2586 mp_int t;
2587 int res;
2589 if ((res = mp_init (&t)) != MP_OKAY) {
2590 return res;
2593 if ((res = mp_div (a, b, NULL, &t)) != MP_OKAY) {
2594 mp_clear (&t);
2595 return res;
2598 if (t.sign != b->sign) {
2599 res = mp_add (b, &t, c);
2600 } else {
2601 res = MP_OKAY;
2602 mp_exch (&t, c);
2605 mp_clear (&t);
2606 return res;
2609 /* calc a value mod 2**b */
2611 mp_mod_2d (const mp_int * a, int b, mp_int * c)
2613 int x, res;
2615 /* if b is <= 0 then zero the int */
2616 if (b <= 0) {
2617 mp_zero (c);
2618 return MP_OKAY;
2621 /* if the modulus is larger than the value than return */
2622 if (b > a->used * DIGIT_BIT) {
2623 res = mp_copy (a, c);
2624 return res;
2627 /* copy */
2628 if ((res = mp_copy (a, c)) != MP_OKAY) {
2629 return res;
2632 /* zero digits above the last digit of the modulus */
2633 for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
2634 c->dp[x] = 0;
2636 /* clear the digit that is not completely outside/inside the modulus */
2637 c->dp[b / DIGIT_BIT] &= (1 << ((mp_digit)b % DIGIT_BIT)) - 1;
2638 mp_clamp (c);
2639 return MP_OKAY;
2643 mp_mod_d (const mp_int * a, mp_digit b, mp_digit * c)
2645 return mp_div_d(a, b, NULL, c);
2649 * shifts with subtractions when the result is greater than b.
2651 * The method is slightly modified to shift B unconditionally up to just under
2652 * the leading bit of b. This saves a lot of multiple precision shifting.
2654 int mp_montgomery_calc_normalization (mp_int * a, const mp_int * b)
2656 int x, bits, res;
2658 /* how many bits of last digit does b use */
2659 bits = mp_count_bits (b) % DIGIT_BIT;
2662 if (b->used > 1) {
2663 if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) {
2664 return res;
2666 } else {
2667 mp_set(a, 1);
2668 bits = 1;
2672 /* now compute C = A * B mod b */
2673 for (x = bits - 1; x < DIGIT_BIT; x++) {
2674 if ((res = mp_mul_2 (a, a)) != MP_OKAY) {
2675 return res;
2677 if (mp_cmp_mag (a, b) != MP_LT) {
2678 if ((res = s_mp_sub (a, b, a)) != MP_OKAY) {
2679 return res;
2684 return MP_OKAY;
2687 /* computes xR**-1 == x (mod N) via Montgomery Reduction */
2689 mp_montgomery_reduce (mp_int * x, const mp_int * n, mp_digit rho)
2691 int ix, res, digs;
2692 mp_digit mu;
2694 /* can the fast reduction [comba] method be used?
2696 * Note that unlike in mul you're safely allowed *less*
2697 * than the available columns [255 per default] since carries
2698 * are fixed up in the inner loop.
2700 digs = n->used * 2 + 1;
2701 if ((digs < MP_WARRAY) &&
2702 n->used <
2703 (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
2704 return fast_mp_montgomery_reduce (x, n, rho);
2707 /* grow the input as required */
2708 if (x->alloc < digs) {
2709 if ((res = mp_grow (x, digs)) != MP_OKAY) {
2710 return res;
2713 x->used = digs;
2715 for (ix = 0; ix < n->used; ix++) {
2716 /* mu = ai * rho mod b
2718 * The value of rho must be precalculated via
2719 * montgomery_setup() such that
2720 * it equals -1/n0 mod b this allows the
2721 * following inner loop to reduce the
2722 * input one digit at a time
2724 mu = (mp_digit) (((mp_word)x->dp[ix]) * ((mp_word)rho) & MP_MASK);
2726 /* a = a + mu * m * b**i */
2728 register int iy;
2729 register mp_digit *tmpn, *tmpx, u;
2730 register mp_word r;
2732 /* alias for digits of the modulus */
2733 tmpn = n->dp;
2735 /* alias for the digits of x [the input] */
2736 tmpx = x->dp + ix;
2738 /* set the carry to zero */
2739 u = 0;
2741 /* Multiply and add in place */
2742 for (iy = 0; iy < n->used; iy++) {
2743 /* compute product and sum */
2744 r = ((mp_word)mu) * ((mp_word)*tmpn++) +
2745 ((mp_word) u) + ((mp_word) * tmpx);
2747 /* get carry */
2748 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
2750 /* fix digit */
2751 *tmpx++ = (mp_digit)(r & ((mp_word) MP_MASK));
2753 /* At this point the ix'th digit of x should be zero */
2756 /* propagate carries upwards as required*/
2757 while (u) {
2758 *tmpx += u;
2759 u = *tmpx >> DIGIT_BIT;
2760 *tmpx++ &= MP_MASK;
2765 /* at this point the n.used'th least
2766 * significant digits of x are all zero
2767 * which means we can shift x to the
2768 * right by n.used digits and the
2769 * residue is unchanged.
2772 /* x = x/b**n.used */
2773 mp_clamp(x);
2774 mp_rshd (x, n->used);
2776 /* if x >= n then x = x - n */
2777 if (mp_cmp_mag (x, n) != MP_LT) {
2778 return s_mp_sub (x, n, x);
2781 return MP_OKAY;
2784 /* setups the montgomery reduction stuff */
2786 mp_montgomery_setup (const mp_int * n, mp_digit * rho)
2788 mp_digit x, b;
2790 /* fast inversion mod 2**k
2792 * Based on the fact that
2794 * XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n)
2795 * => 2*X*A - X*X*A*A = 1
2796 * => 2*(1) - (1) = 1
2798 b = n->dp[0];
2800 if ((b & 1) == 0) {
2801 return MP_VAL;
2804 x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
2805 x *= 2 - b * x; /* here x*a==1 mod 2**8 */
2806 x *= 2 - b * x; /* here x*a==1 mod 2**16 */
2807 x *= 2 - b * x; /* here x*a==1 mod 2**32 */
2809 /* rho = -1/m mod b */
2810 *rho = (((mp_word)1 << ((mp_word) DIGIT_BIT)) - x) & MP_MASK;
2812 return MP_OKAY;
2815 /* high level multiplication (handles sign) */
2816 int mp_mul (const mp_int * a, const mp_int * b, mp_int * c)
2818 int res, neg;
2819 neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
2821 /* use Karatsuba? */
2822 if (MIN (a->used, b->used) >= KARATSUBA_MUL_CUTOFF) {
2823 res = mp_karatsuba_mul (a, b, c);
2824 } else
2826 /* can we use the fast multiplier?
2828 * The fast multiplier can be used if the output will
2829 * have less than MP_WARRAY digits and the number of
2830 * digits won't affect carry propagation
2832 int digs = a->used + b->used + 1;
2834 if ((digs < MP_WARRAY) &&
2835 MIN(a->used, b->used) <=
2836 (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
2837 res = fast_s_mp_mul_digs (a, b, c, digs);
2838 } else
2839 res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */
2841 c->sign = (c->used > 0) ? neg : MP_ZPOS;
2842 return res;
2845 /* b = a*2 */
2846 int mp_mul_2(const mp_int * a, mp_int * b)
2848 int x, res, oldused;
2850 /* grow to accommodate result */
2851 if (b->alloc < a->used + 1) {
2852 if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) {
2853 return res;
2857 oldused = b->used;
2858 b->used = a->used;
2861 register mp_digit r, rr, *tmpa, *tmpb;
2863 /* alias for source */
2864 tmpa = a->dp;
2866 /* alias for dest */
2867 tmpb = b->dp;
2869 /* carry */
2870 r = 0;
2871 for (x = 0; x < a->used; x++) {
2873 /* get what will be the *next* carry bit from the
2874 * MSB of the current digit
2876 rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1));
2878 /* now shift up this digit, add in the carry [from the previous] */
2879 *tmpb++ = ((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK;
2881 /* copy the carry that would be from the source
2882 * digit into the next iteration
2884 r = rr;
2887 /* new leading digit? */
2888 if (r != 0) {
2889 /* add a MSB which is always 1 at this point */
2890 *tmpb = 1;
2891 ++(b->used);
2894 /* now zero any excess digits on the destination
2895 * that we didn't write to
2897 tmpb = b->dp + b->used;
2898 for (x = b->used; x < oldused; x++) {
2899 *tmpb++ = 0;
2902 b->sign = a->sign;
2903 return MP_OKAY;
2906 /* shift left by a certain bit count */
2907 int mp_mul_2d (const mp_int * a, int b, mp_int * c)
2909 mp_digit d;
2910 int res;
2912 /* copy */
2913 if (a != c) {
2914 if ((res = mp_copy (a, c)) != MP_OKAY) {
2915 return res;
2919 if (c->alloc < c->used + b/DIGIT_BIT + 1) {
2920 if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) {
2921 return res;
2925 /* shift by as many digits in the bit count */
2926 if (b >= DIGIT_BIT) {
2927 if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) {
2928 return res;
2932 /* shift any bit count < DIGIT_BIT */
2933 d = (mp_digit) (b % DIGIT_BIT);
2934 if (d != 0) {
2935 register mp_digit *tmpc, shift, mask, r, rr;
2936 register int x;
2938 /* bitmask for carries */
2939 mask = (((mp_digit)1) << d) - 1;
2941 /* shift for msbs */
2942 shift = DIGIT_BIT - d;
2944 /* alias */
2945 tmpc = c->dp;
2947 /* carry */
2948 r = 0;
2949 for (x = 0; x < c->used; x++) {
2950 /* get the higher bits of the current word */
2951 rr = (*tmpc >> shift) & mask;
2953 /* shift the current word and OR in the carry */
2954 *tmpc = ((*tmpc << d) | r) & MP_MASK;
2955 ++tmpc;
2957 /* set the carry to the carry bits of the current word */
2958 r = rr;
2961 /* set final carry */
2962 if (r != 0) {
2963 c->dp[(c->used)++] = r;
2966 mp_clamp (c);
2967 return MP_OKAY;
2970 /* multiply by a digit */
2972 mp_mul_d (const mp_int * a, mp_digit b, mp_int * c)
2974 mp_digit u, *tmpa, *tmpc;
2975 mp_word r;
2976 int ix, res, olduse;
2978 /* make sure c is big enough to hold a*b */
2979 if (c->alloc < a->used + 1) {
2980 if ((res = mp_grow (c, a->used + 1)) != MP_OKAY) {
2981 return res;
2985 /* get the original destinations used count */
2986 olduse = c->used;
2988 /* set the sign */
2989 c->sign = a->sign;
2991 /* alias for a->dp [source] */
2992 tmpa = a->dp;
2994 /* alias for c->dp [dest] */
2995 tmpc = c->dp;
2997 /* zero carry */
2998 u = 0;
3000 /* compute columns */
3001 for (ix = 0; ix < a->used; ix++) {
3002 /* compute product and carry sum for this term */
3003 r = ((mp_word) u) + ((mp_word)*tmpa++) * ((mp_word)b);
3005 /* mask off higher bits to get a single digit */
3006 *tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK));
3008 /* send carry into next iteration */
3009 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
3012 /* store final carry [if any] */
3013 *tmpc++ = u;
3015 /* now zero digits above the top */
3016 while (ix++ < olduse) {
3017 *tmpc++ = 0;
3020 /* set used count */
3021 c->used = a->used + 1;
3022 mp_clamp(c);
3024 return MP_OKAY;
3027 /* d = a * b (mod c) */
3029 mp_mulmod (const mp_int * a, const mp_int * b, mp_int * c, mp_int * d)
3031 int res;
3032 mp_int t;
3034 if ((res = mp_init (&t)) != MP_OKAY) {
3035 return res;
3038 if ((res = mp_mul (a, b, &t)) != MP_OKAY) {
3039 mp_clear (&t);
3040 return res;
3042 res = mp_mod (&t, c, d);
3043 mp_clear (&t);
3044 return res;
3047 /* table of first PRIME_SIZE primes */
3048 static const mp_digit __prime_tab[] = {
3049 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
3050 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
3051 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
3052 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083,
3053 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
3054 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
3055 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
3056 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
3058 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
3059 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
3060 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
3061 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
3062 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
3063 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
3064 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
3065 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
3067 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
3068 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
3069 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
3070 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
3071 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
3072 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
3073 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
3074 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
3076 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
3077 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
3078 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
3079 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
3080 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
3081 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
3082 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
3083 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
3086 /* determines if an integers is divisible by one
3087 * of the first PRIME_SIZE primes or not
3089 * sets result to 0 if not, 1 if yes
3091 int mp_prime_is_divisible (const mp_int * a, int *result)
3093 int err, ix;
3094 mp_digit res;
3096 /* default to not */
3097 *result = MP_NO;
3099 for (ix = 0; ix < PRIME_SIZE; ix++) {
3100 /* what is a mod __prime_tab[ix] */
3101 if ((err = mp_mod_d (a, __prime_tab[ix], &res)) != MP_OKAY) {
3102 return err;
3105 /* is the residue zero? */
3106 if (res == 0) {
3107 *result = MP_YES;
3108 return MP_OKAY;
3112 return MP_OKAY;
3115 /* performs a variable number of rounds of Miller-Rabin
3117 * Probability of error after t rounds is no more than
3120 * Sets result to 1 if probably prime, 0 otherwise
3122 int mp_prime_is_prime (mp_int * a, int t, int *result)
3124 mp_int b;
3125 int ix, err, res;
3127 /* default to no */
3128 *result = MP_NO;
3130 /* valid value of t? */
3131 if (t <= 0 || t > PRIME_SIZE) {
3132 return MP_VAL;
3135 /* is the input equal to one of the primes in the table? */
3136 for (ix = 0; ix < PRIME_SIZE; ix++) {
3137 if (mp_cmp_d(a, __prime_tab[ix]) == MP_EQ) {
3138 *result = 1;
3139 return MP_OKAY;
3143 /* first perform trial division */
3144 if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) {
3145 return err;
3148 /* return if it was trivially divisible */
3149 if (res == MP_YES) {
3150 return MP_OKAY;
3153 /* now perform the miller-rabin rounds */
3154 if ((err = mp_init (&b)) != MP_OKAY) {
3155 return err;
3158 for (ix = 0; ix < t; ix++) {
3159 /* set the prime */
3160 mp_set (&b, __prime_tab[ix]);
3162 if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) {
3163 goto __B;
3166 if (res == MP_NO) {
3167 goto __B;
3171 /* passed the test */
3172 *result = MP_YES;
3173 __B:mp_clear (&b);
3174 return err;
3177 /* Miller-Rabin test of "a" to the base of "b" as described in
3178 * HAC pp. 139 Algorithm 4.24
3180 * Sets result to 0 if definitely composite or 1 if probably prime.
3181 * Randomly the chance of error is no more than 1/4 and often
3182 * very much lower.
3184 int mp_prime_miller_rabin (mp_int * a, const mp_int * b, int *result)
3186 mp_int n1, y, r;
3187 int s, j, err;
3189 /* default */
3190 *result = MP_NO;
3192 /* ensure b > 1 */
3193 if (mp_cmp_d(b, 1) != MP_GT) {
3194 return MP_VAL;
3197 /* get n1 = a - 1 */
3198 if ((err = mp_init_copy (&n1, a)) != MP_OKAY) {
3199 return err;
3201 if ((err = mp_sub_d (&n1, 1, &n1)) != MP_OKAY) {
3202 goto __N1;
3205 /* set 2**s * r = n1 */
3206 if ((err = mp_init_copy (&r, &n1)) != MP_OKAY) {
3207 goto __N1;
3210 /* count the number of least significant bits
3211 * which are zero
3213 s = mp_cnt_lsb(&r);
3215 /* now divide n - 1 by 2**s */
3216 if ((err = mp_div_2d (&r, s, &r, NULL)) != MP_OKAY) {
3217 goto __R;
3220 /* compute y = b**r mod a */
3221 if ((err = mp_init (&y)) != MP_OKAY) {
3222 goto __R;
3224 if ((err = mp_exptmod (b, &r, a, &y)) != MP_OKAY) {
3225 goto __Y;
3228 /* if y != 1 and y != n1 do */
3229 if (mp_cmp_d (&y, 1) != MP_EQ && mp_cmp (&y, &n1) != MP_EQ) {
3230 j = 1;
3231 /* while j <= s-1 and y != n1 */
3232 while ((j <= (s - 1)) && mp_cmp (&y, &n1) != MP_EQ) {
3233 if ((err = mp_sqrmod (&y, a, &y)) != MP_OKAY) {
3234 goto __Y;
3237 /* if y == 1 then composite */
3238 if (mp_cmp_d (&y, 1) == MP_EQ) {
3239 goto __Y;
3242 ++j;
3245 /* if y != n1 then composite */
3246 if (mp_cmp (&y, &n1) != MP_EQ) {
3247 goto __Y;
3251 /* probably prime now */
3252 *result = MP_YES;
3253 __Y:mp_clear (&y);
3254 __R:mp_clear (&r);
3255 __N1:mp_clear (&n1);
3256 return err;
3259 static const struct {
3260 int k, t;
3261 } sizes[] = {
3262 { 128, 28 },
3263 { 256, 16 },
3264 { 384, 10 },
3265 { 512, 7 },
3266 { 640, 6 },
3267 { 768, 5 },
3268 { 896, 4 },
3269 { 1024, 4 }
3272 /* returns # of RM trials required for a given bit size */
3273 int mp_prime_rabin_miller_trials(int size)
3275 int x;
3277 for (x = 0; x < (int)(sizeof(sizes)/(sizeof(sizes[0]))); x++) {
3278 if (sizes[x].k == size) {
3279 return sizes[x].t;
3280 } else if (sizes[x].k > size) {
3281 return (x == 0) ? sizes[0].t : sizes[x - 1].t;
3284 return sizes[x-1].t + 1;
3287 /* makes a truly random prime of a given size (bits),
3289 * Flags are as follows:
3291 * LTM_PRIME_BBS - make prime congruent to 3 mod 4
3292 * LTM_PRIME_SAFE - make sure (p-1)/2 is prime as well (implies LTM_PRIME_BBS)
3293 * LTM_PRIME_2MSB_OFF - make the 2nd highest bit zero
3294 * LTM_PRIME_2MSB_ON - make the 2nd highest bit one
3296 * You have to supply a callback which fills in a buffer with random bytes. "dat" is a parameter you can
3297 * have passed to the callback (e.g. a state or something). This function doesn't use "dat" itself
3298 * so it can be NULL
3302 /* This is possibly the mother of all prime generation functions, muahahahahaha! */
3303 int mp_prime_random_ex(mp_int *a, int t, int size, int flags, ltm_prime_callback cb, void *dat)
3305 unsigned char *tmp, maskAND, maskOR_msb, maskOR_lsb;
3306 int res, err, bsize, maskOR_msb_offset;
3308 /* sanity check the input */
3309 if (size <= 1 || t <= 0) {
3310 return MP_VAL;
3313 /* LTM_PRIME_SAFE implies LTM_PRIME_BBS */
3314 if (flags & LTM_PRIME_SAFE) {
3315 flags |= LTM_PRIME_BBS;
3318 /* calc the byte size */
3319 bsize = (size>>3)+((size&7)?1:0);
3321 /* we need a buffer of bsize bytes */
3322 tmp = malloc(bsize);
3323 if (tmp == NULL) {
3324 return MP_MEM;
3327 /* calc the maskAND value for the MSbyte*/
3328 maskAND = ((size&7) == 0) ? 0xFF : (0xFF >> (8 - (size & 7)));
3330 /* calc the maskOR_msb */
3331 maskOR_msb = 0;
3332 maskOR_msb_offset = ((size & 7) == 1) ? 1 : 0;
3333 if (flags & LTM_PRIME_2MSB_ON) {
3334 maskOR_msb |= 1 << ((size - 2) & 7);
3335 } else if (flags & LTM_PRIME_2MSB_OFF) {
3336 maskAND &= ~(1 << ((size - 2) & 7));
3339 /* get the maskOR_lsb */
3340 maskOR_lsb = 0;
3341 if (flags & LTM_PRIME_BBS) {
3342 maskOR_lsb |= 3;
3345 do {
3346 /* read the bytes */
3347 if (cb(tmp, bsize, dat) != bsize) {
3348 err = MP_VAL;
3349 goto error;
3352 /* work over the MSbyte */
3353 tmp[0] &= maskAND;
3354 tmp[0] |= 1 << ((size - 1) & 7);
3356 /* mix in the maskORs */
3357 tmp[maskOR_msb_offset] |= maskOR_msb;
3358 tmp[bsize-1] |= maskOR_lsb;
3360 /* read it in */
3361 if ((err = mp_read_unsigned_bin(a, tmp, bsize)) != MP_OKAY) { goto error; }
3363 /* is it prime? */
3364 if ((err = mp_prime_is_prime(a, t, &res)) != MP_OKAY) { goto error; }
3365 if (res == MP_NO) {
3366 continue;
3369 if (flags & LTM_PRIME_SAFE) {
3370 /* see if (a-1)/2 is prime */
3371 if ((err = mp_sub_d(a, 1, a)) != MP_OKAY) { goto error; }
3372 if ((err = mp_div_2(a, a)) != MP_OKAY) { goto error; }
3374 /* is it prime? */
3375 if ((err = mp_prime_is_prime(a, t, &res)) != MP_OKAY) { goto error; }
3377 } while (res == MP_NO);
3379 if (flags & LTM_PRIME_SAFE) {
3380 /* restore a to the original value */
3381 if ((err = mp_mul_2(a, a)) != MP_OKAY) { goto error; }
3382 if ((err = mp_add_d(a, 1, a)) != MP_OKAY) { goto error; }
3385 err = MP_OKAY;
3386 error:
3387 free(tmp);
3388 return err;
3391 /* reads an unsigned char array, assumes the msb is stored first [big endian] */
3393 mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
3395 int res;
3397 /* make sure there are at least two digits */
3398 if (a->alloc < 2) {
3399 if ((res = mp_grow(a, 2)) != MP_OKAY) {
3400 return res;
3404 /* zero the int */
3405 mp_zero (a);
3407 /* read the bytes in */
3408 while (c-- > 0) {
3409 if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) {
3410 return res;
3413 a->dp[0] |= *b++;
3414 a->used += 1;
3416 mp_clamp (a);
3417 return MP_OKAY;
3420 /* reduces x mod m, assumes 0 < x < m**2, mu is
3421 * precomputed via mp_reduce_setup.
3422 * From HAC pp.604 Algorithm 14.42
3425 mp_reduce (mp_int * x, const mp_int * m, const mp_int * mu)
3427 mp_int q;
3428 int res, um = m->used;
3430 /* q = x */
3431 if ((res = mp_init_copy (&q, x)) != MP_OKAY) {
3432 return res;
3435 /* q1 = x / b**(k-1) */
3436 mp_rshd (&q, um - 1);
3438 /* according to HAC this optimization is ok */
3439 if (((unsigned long) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) {
3440 if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) {
3441 goto CLEANUP;
3443 } else {
3444 if ((res = s_mp_mul_high_digs (&q, mu, &q, um - 1)) != MP_OKAY) {
3445 goto CLEANUP;
3449 /* q3 = q2 / b**(k+1) */
3450 mp_rshd (&q, um + 1);
3452 /* x = x mod b**(k+1), quick (no division) */
3453 if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
3454 goto CLEANUP;
3457 /* q = q * m mod b**(k+1), quick (no division) */
3458 if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) {
3459 goto CLEANUP;
3462 /* x = x - q */
3463 if ((res = mp_sub (x, &q, x)) != MP_OKAY) {
3464 goto CLEANUP;
3467 /* If x < 0, add b**(k+1) to it */
3468 if (mp_cmp_d (x, 0) == MP_LT) {
3469 mp_set (&q, 1);
3470 if ((res = mp_lshd (&q, um + 1)) != MP_OKAY)
3471 goto CLEANUP;
3472 if ((res = mp_add (x, &q, x)) != MP_OKAY)
3473 goto CLEANUP;
3476 /* Back off if it's too big */
3477 while (mp_cmp (x, m) != MP_LT) {
3478 if ((res = s_mp_sub (x, m, x)) != MP_OKAY) {
3479 goto CLEANUP;
3483 CLEANUP:
3484 mp_clear (&q);
3486 return res;
3489 /* reduces a modulo n where n is of the form 2**p - d */
3491 mp_reduce_2k(mp_int *a, const mp_int *n, mp_digit d)
3493 mp_int q;
3494 int p, res;
3496 if ((res = mp_init(&q)) != MP_OKAY) {
3497 return res;
3500 p = mp_count_bits(n);
3501 top:
3502 /* q = a/2**p, a = a mod 2**p */
3503 if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
3504 goto ERR;
3507 if (d != 1) {
3508 /* q = q * d */
3509 if ((res = mp_mul_d(&q, d, &q)) != MP_OKAY) {
3510 goto ERR;
3514 /* a = a + q */
3515 if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
3516 goto ERR;
3519 if (mp_cmp_mag(a, n) != MP_LT) {
3520 s_mp_sub(a, n, a);
3521 goto top;
3524 ERR:
3525 mp_clear(&q);
3526 return res;
3529 /* determines the setup value */
3530 int
3531 mp_reduce_2k_setup(const mp_int *a, mp_digit *d)
3533 int res, p;
3534 mp_int tmp;
3536 if ((res = mp_init(&tmp)) != MP_OKAY) {
3537 return res;
3540 p = mp_count_bits(a);
3541 if ((res = mp_2expt(&tmp, p)) != MP_OKAY) {
3542 mp_clear(&tmp);
3543 return res;
3546 if ((res = s_mp_sub(&tmp, a, &tmp)) != MP_OKAY) {
3547 mp_clear(&tmp);
3548 return res;
3551 *d = tmp.dp[0];
3552 mp_clear(&tmp);
3553 return MP_OKAY;
3556 /* pre-calculate the value required for Barrett reduction
3557 * For a given modulus "b" it calulates the value required in "a"
3559 int mp_reduce_setup (mp_int * a, const mp_int * b)
3561 int res;
3563 if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
3564 return res;
3566 return mp_div (a, b, a, NULL);
3569 /* shift right a certain amount of digits */
3570 void mp_rshd (mp_int * a, int b)
3572 int x;
3574 /* if b <= 0 then ignore it */
3575 if (b <= 0) {
3576 return;
3579 /* if b > used then simply zero it and return */
3580 if (a->used <= b) {
3581 mp_zero (a);
3582 return;
3586 register mp_digit *bottom, *top;
3588 /* shift the digits down */
3590 /* bottom */
3591 bottom = a->dp;
3593 /* top [offset into digits] */
3594 top = a->dp + b;
3596 /* this is implemented as a sliding window where
3597 * the window is b-digits long and digits from
3598 * the top of the window are copied to the bottom
3600 * e.g.
3602 b-2 | b-1 | b0 | b1 | b2 | ... | bb | ---->
3603 /\ | ---->
3604 \-------------------/ ---->
3606 for (x = 0; x < (a->used - b); x++) {
3607 *bottom++ = *top++;
3610 /* zero the top digits */
3611 for (; x < a->used; x++) {
3612 *bottom++ = 0;
3616 /* remove excess digits */
3617 a->used -= b;
3620 /* set to a digit */
3621 void mp_set (mp_int * a, mp_digit b)
3623 mp_zero (a);
3624 a->dp[0] = b & MP_MASK;
3625 a->used = (a->dp[0] != 0) ? 1 : 0;
3628 /* set a 32-bit const */
3629 int mp_set_int (mp_int * a, unsigned long b)
3631 int x, res;
3633 mp_zero (a);
3635 /* set four bits at a time */
3636 for (x = 0; x < 8; x++) {
3637 /* shift the number up four bits */
3638 if ((res = mp_mul_2d (a, 4, a)) != MP_OKAY) {
3639 return res;
3642 /* OR in the top four bits of the source */
3643 a->dp[0] |= (b >> 28) & 15;
3645 /* shift the source up to the next four bits */
3646 b <<= 4;
3648 /* ensure that digits are not clamped off */
3649 a->used += 1;
3651 mp_clamp (a);
3652 return MP_OKAY;
3655 /* shrink a bignum */
3656 int mp_shrink (mp_int * a)
3658 mp_digit *tmp;
3659 if (a->alloc != a->used && a->used > 0) {
3660 if ((tmp = realloc (a->dp, sizeof (mp_digit) * a->used)) == NULL) {
3661 return MP_MEM;
3663 a->dp = tmp;
3664 a->alloc = a->used;
3666 return MP_OKAY;
3669 /* get the size for an signed equivalent */
3670 int mp_signed_bin_size (const mp_int * a)
3672 return 1 + mp_unsigned_bin_size (a);
3675 /* computes b = a*a */
3677 mp_sqr (const mp_int * a, mp_int * b)
3679 int res;
3681 if (a->used >= KARATSUBA_SQR_CUTOFF) {
3682 res = mp_karatsuba_sqr (a, b);
3683 } else
3685 /* can we use the fast comba multiplier? */
3686 if ((a->used * 2 + 1) < MP_WARRAY &&
3687 a->used <
3688 (1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
3689 res = fast_s_mp_sqr (a, b);
3690 } else
3691 res = s_mp_sqr (a, b);
3693 b->sign = MP_ZPOS;
3694 return res;
3697 /* c = a * a (mod b) */
3699 mp_sqrmod (const mp_int * a, mp_int * b, mp_int * c)
3701 int res;
3702 mp_int t;
3704 if ((res = mp_init (&t)) != MP_OKAY) {
3705 return res;
3708 if ((res = mp_sqr (a, &t)) != MP_OKAY) {
3709 mp_clear (&t);
3710 return res;
3712 res = mp_mod (&t, b, c);
3713 mp_clear (&t);
3714 return res;
3717 /* high level subtraction (handles signs) */
3719 mp_sub (mp_int * a, mp_int * b, mp_int * c)
3721 int sa, sb, res;
3723 sa = a->sign;
3724 sb = b->sign;
3726 if (sa != sb) {
3727 /* subtract a negative from a positive, OR */
3728 /* subtract a positive from a negative. */
3729 /* In either case, ADD their magnitudes, */
3730 /* and use the sign of the first number. */
3731 c->sign = sa;
3732 res = s_mp_add (a, b, c);
3733 } else {
3734 /* subtract a positive from a positive, OR */
3735 /* subtract a negative from a negative. */
3736 /* First, take the difference between their */
3737 /* magnitudes, then... */
3738 if (mp_cmp_mag (a, b) != MP_LT) {
3739 /* Copy the sign from the first */
3740 c->sign = sa;
3741 /* The first has a larger or equal magnitude */
3742 res = s_mp_sub (a, b, c);
3743 } else {
3744 /* The result has the *opposite* sign from */
3745 /* the first number. */
3746 c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS;
3747 /* The second has a larger magnitude */
3748 res = s_mp_sub (b, a, c);
3751 return res;
3754 /* single digit subtraction */
3756 mp_sub_d (mp_int * a, mp_digit b, mp_int * c)
3758 mp_digit *tmpa, *tmpc, mu;
3759 int res, ix, oldused;
3761 /* grow c as required */
3762 if (c->alloc < a->used + 1) {
3763 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
3764 return res;
3768 /* if a is negative just do an unsigned
3769 * addition [with fudged signs]
3771 if (a->sign == MP_NEG) {
3772 a->sign = MP_ZPOS;
3773 res = mp_add_d(a, b, c);
3774 a->sign = c->sign = MP_NEG;
3775 return res;
3778 /* setup regs */
3779 oldused = c->used;
3780 tmpa = a->dp;
3781 tmpc = c->dp;
3783 /* if a <= b simply fix the single digit */
3784 if ((a->used == 1 && a->dp[0] <= b) || a->used == 0) {
3785 if (a->used == 1) {
3786 *tmpc++ = b - *tmpa;
3787 } else {
3788 *tmpc++ = b;
3790 ix = 1;
3792 /* negative/1digit */
3793 c->sign = MP_NEG;
3794 c->used = 1;
3795 } else {
3796 /* positive/size */
3797 c->sign = MP_ZPOS;
3798 c->used = a->used;
3800 /* subtract first digit */
3801 *tmpc = *tmpa++ - b;
3802 mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
3803 *tmpc++ &= MP_MASK;
3805 /* handle rest of the digits */
3806 for (ix = 1; ix < a->used; ix++) {
3807 *tmpc = *tmpa++ - mu;
3808 mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
3809 *tmpc++ &= MP_MASK;
3813 /* zero excess digits */
3814 while (ix++ < oldused) {
3815 *tmpc++ = 0;
3817 mp_clamp(c);
3818 return MP_OKAY;
3821 /* store in unsigned [big endian] format */
3823 mp_to_unsigned_bin (const mp_int * a, unsigned char *b)
3825 int x, res;
3826 mp_int t;
3828 if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
3829 return res;
3832 x = 0;
3833 while (mp_iszero (&t) == 0) {
3834 b[x++] = (unsigned char) (t.dp[0] & 255);
3835 if ((res = mp_div_2d (&t, 8, &t, NULL)) != MP_OKAY) {
3836 mp_clear (&t);
3837 return res;
3840 bn_reverse (b, x);
3841 mp_clear (&t);
3842 return MP_OKAY;
3845 /* get the size for an unsigned equivalent */
3847 mp_unsigned_bin_size (const mp_int * a)
3849 int size = mp_count_bits (a);
3850 return (size / 8 + ((size & 7) != 0 ? 1 : 0));
3853 /* set to zero */
3854 void
3855 mp_zero (mp_int * a)
3857 a->sign = MP_ZPOS;
3858 a->used = 0;
3859 memset (a->dp, 0, sizeof (mp_digit) * a->alloc);
3862 /* reverse an array, used for radix code */
3863 static void
3864 bn_reverse (unsigned char *s, int len)
3866 int ix, iy;
3867 unsigned char t;
3869 ix = 0;
3870 iy = len - 1;
3871 while (ix < iy) {
3872 t = s[ix];
3873 s[ix] = s[iy];
3874 s[iy] = t;
3875 ++ix;
3876 --iy;
3880 /* low level addition, based on HAC pp.594, Algorithm 14.7 */
3881 static int
3882 s_mp_add (mp_int * a, mp_int * b, mp_int * c)
3884 mp_int *x;
3885 int olduse, res, min, max;
3887 /* find sizes, we let |a| <= |b| which means we have to sort
3888 * them. "x" will point to the input with the most digits
3890 if (a->used > b->used) {
3891 min = b->used;
3892 max = a->used;
3893 x = a;
3894 } else {
3895 min = a->used;
3896 max = b->used;
3897 x = b;
3900 /* init result */
3901 if (c->alloc < max + 1) {
3902 if ((res = mp_grow (c, max + 1)) != MP_OKAY) {
3903 return res;
3907 /* get old used digit count and set new one */
3908 olduse = c->used;
3909 c->used = max + 1;
3912 register mp_digit u, *tmpa, *tmpb, *tmpc;
3913 register int i;
3915 /* alias for digit pointers */
3917 /* first input */
3918 tmpa = a->dp;
3920 /* second input */
3921 tmpb = b->dp;
3923 /* destination */
3924 tmpc = c->dp;
3926 /* zero the carry */
3927 u = 0;
3928 for (i = 0; i < min; i++) {
3929 /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
3930 *tmpc = *tmpa++ + *tmpb++ + u;
3932 /* U = carry bit of T[i] */
3933 u = *tmpc >> ((mp_digit)DIGIT_BIT);
3935 /* take away carry bit from T[i] */
3936 *tmpc++ &= MP_MASK;
3939 /* now copy higher words if any, that is in A+B
3940 * if A or B has more digits add those in
3942 if (min != max) {
3943 for (; i < max; i++) {
3944 /* T[i] = X[i] + U */
3945 *tmpc = x->dp[i] + u;
3947 /* U = carry bit of T[i] */
3948 u = *tmpc >> ((mp_digit)DIGIT_BIT);
3950 /* take away carry bit from T[i] */
3951 *tmpc++ &= MP_MASK;
3955 /* add carry */
3956 *tmpc++ = u;
3958 /* clear digits above oldused */
3959 for (i = c->used; i < olduse; i++) {
3960 *tmpc++ = 0;
3964 mp_clamp (c);
3965 return MP_OKAY;
3968 static int s_mp_exptmod (const mp_int * G, const mp_int * X, mp_int * P, mp_int * Y)
3970 mp_int M[256], res, mu;
3971 mp_digit buf;
3972 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
3974 /* find window size */
3975 x = mp_count_bits (X);
3976 if (x <= 7) {
3977 winsize = 2;
3978 } else if (x <= 36) {
3979 winsize = 3;
3980 } else if (x <= 140) {
3981 winsize = 4;
3982 } else if (x <= 450) {
3983 winsize = 5;
3984 } else if (x <= 1303) {
3985 winsize = 6;
3986 } else if (x <= 3529) {
3987 winsize = 7;
3988 } else {
3989 winsize = 8;
3992 /* init M array */
3993 /* init first cell */
3994 if ((err = mp_init(&M[1])) != MP_OKAY) {
3995 return err;
3998 /* now init the second half of the array */
3999 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
4000 if ((err = mp_init(&M[x])) != MP_OKAY) {
4001 for (y = 1<<(winsize-1); y < x; y++) {
4002 mp_clear (&M[y]);
4004 mp_clear(&M[1]);
4005 return err;
4009 /* create mu, used for Barrett reduction */
4010 if ((err = mp_init (&mu)) != MP_OKAY) {
4011 goto __M;
4013 if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
4014 goto __MU;
4017 /* create M table
4019 * The M table contains powers of the base,
4020 * e.g. M[x] = G**x mod P
4022 * The first half of the table is not
4023 * computed though accept for M[0] and M[1]
4025 if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
4026 goto __MU;
4029 /* compute the value at M[1<<(winsize-1)] by squaring
4030 * M[1] (winsize-1) times
4032 if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
4033 goto __MU;
4036 for (x = 0; x < (winsize - 1); x++) {
4037 if ((err = mp_sqr (&M[1 << (winsize - 1)],
4038 &M[1 << (winsize - 1)])) != MP_OKAY) {
4039 goto __MU;
4041 if ((err = mp_reduce (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) {
4042 goto __MU;
4046 /* create upper table, that is M[x] = M[x-1] * M[1] (mod P)
4047 * for x = (2**(winsize - 1) + 1) to (2**winsize - 1)
4049 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
4050 if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
4051 goto __MU;
4053 if ((err = mp_reduce (&M[x], P, &mu)) != MP_OKAY) {
4054 goto __MU;
4058 /* setup result */
4059 if ((err = mp_init (&res)) != MP_OKAY) {
4060 goto __MU;
4062 mp_set (&res, 1);
4064 /* set initial mode and bit cnt */
4065 mode = 0;
4066 bitcnt = 1;
4067 buf = 0;
4068 digidx = X->used - 1;
4069 bitcpy = 0;
4070 bitbuf = 0;
4072 for (;;) {
4073 /* grab next digit as required */
4074 if (--bitcnt == 0) {
4075 /* if digidx == -1 we are out of digits */
4076 if (digidx == -1) {
4077 break;
4079 /* read next digit and reset the bitcnt */
4080 buf = X->dp[digidx--];
4081 bitcnt = DIGIT_BIT;
4084 /* grab the next msb from the exponent */
4085 y = (buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
4086 buf <<= (mp_digit)1;
4088 /* if the bit is zero and mode == 0 then we ignore it
4089 * These represent the leading zero bits before the first 1 bit
4090 * in the exponent. Technically this opt is not required but it
4091 * does lower the # of trivial squaring/reductions used
4093 if (mode == 0 && y == 0) {
4094 continue;
4097 /* if the bit is zero and mode == 1 then we square */
4098 if (mode == 1 && y == 0) {
4099 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
4100 goto __RES;
4102 if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
4103 goto __RES;
4105 continue;
4108 /* else we add it to the window */
4109 bitbuf |= (y << (winsize - ++bitcpy));
4110 mode = 2;
4112 if (bitcpy == winsize) {
4113 /* ok window is filled so square as required and multiply */
4114 /* square first */
4115 for (x = 0; x < winsize; x++) {
4116 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
4117 goto __RES;
4119 if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
4120 goto __RES;
4124 /* then multiply */
4125 if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
4126 goto __RES;
4128 if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
4129 goto __RES;
4132 /* empty window and reset */
4133 bitcpy = 0;
4134 bitbuf = 0;
4135 mode = 1;
4139 /* if bits remain then square/multiply */
4140 if (mode == 2 && bitcpy > 0) {
4141 /* square then multiply if the bit is set */
4142 for (x = 0; x < bitcpy; x++) {
4143 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
4144 goto __RES;
4146 if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
4147 goto __RES;
4150 bitbuf <<= 1;
4151 if ((bitbuf & (1 << winsize)) != 0) {
4152 /* then multiply */
4153 if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
4154 goto __RES;
4156 if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
4157 goto __RES;
4163 mp_exch (&res, Y);
4164 err = MP_OKAY;
4165 __RES:mp_clear (&res);
4166 __MU:mp_clear (&mu);
4167 __M:
4168 mp_clear(&M[1]);
4169 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
4170 mp_clear (&M[x]);
4172 return err;
4175 /* multiplies |a| * |b| and only computes up to digs digits of result
4176 * HAC pp. 595, Algorithm 14.12 Modified so you can control how
4177 * many digits of output are created.
4179 static int
4180 s_mp_mul_digs (const mp_int * a, const mp_int * b, mp_int * c, int digs)
4182 mp_int t;
4183 int res, pa, pb, ix, iy;
4184 mp_digit u;
4185 mp_word r;
4186 mp_digit tmpx, *tmpt, *tmpy;
4188 /* can we use the fast multiplier? */
4189 if (((digs) < MP_WARRAY) &&
4190 MIN (a->used, b->used) <
4191 (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
4192 return fast_s_mp_mul_digs (a, b, c, digs);
4195 if ((res = mp_init_size (&t, digs)) != MP_OKAY) {
4196 return res;
4198 t.used = digs;
4200 /* compute the digits of the product directly */
4201 pa = a->used;
4202 for (ix = 0; ix < pa; ix++) {
4203 /* set the carry to zero */
4204 u = 0;
4206 /* limit ourselves to making digs digits of output */
4207 pb = MIN (b->used, digs - ix);
4209 /* setup some aliases */
4210 /* copy of the digit from a used within the nested loop */
4211 tmpx = a->dp[ix];
4213 /* an alias for the destination shifted ix places */
4214 tmpt = t.dp + ix;
4216 /* an alias for the digits of b */
4217 tmpy = b->dp;
4219 /* compute the columns of the output and propagate the carry */
4220 for (iy = 0; iy < pb; iy++) {
4221 /* compute the column as a mp_word */
4222 r = ((mp_word)*tmpt) +
4223 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
4224 ((mp_word) u);
4226 /* the new column is the lower part of the result */
4227 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
4229 /* get the carry word from the result */
4230 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
4232 /* set carry if it is placed below digs */
4233 if (ix + iy < digs) {
4234 *tmpt = u;
4238 mp_clamp (&t);
4239 mp_exch (&t, c);
4241 mp_clear (&t);
4242 return MP_OKAY;
4245 /* multiplies |a| * |b| and does not compute the lower digs digits
4246 * [meant to get the higher part of the product]
4248 static int
4249 s_mp_mul_high_digs (const mp_int * a, const mp_int * b, mp_int * c, int digs)
4251 mp_int t;
4252 int res, pa, pb, ix, iy;
4253 mp_digit u;
4254 mp_word r;
4255 mp_digit tmpx, *tmpt, *tmpy;
4257 /* can we use the fast multiplier? */
4258 if (((a->used + b->used + 1) < MP_WARRAY)
4259 && MIN (a->used, b->used) < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
4260 return fast_s_mp_mul_high_digs (a, b, c, digs);
4263 if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) {
4264 return res;
4266 t.used = a->used + b->used + 1;
4268 pa = a->used;
4269 pb = b->used;
4270 for (ix = 0; ix < pa; ix++) {
4271 /* clear the carry */
4272 u = 0;
4274 /* left hand side of A[ix] * B[iy] */
4275 tmpx = a->dp[ix];
4277 /* alias to the address of where the digits will be stored */
4278 tmpt = &(t.dp[digs]);
4280 /* alias for where to read the right hand side from */
4281 tmpy = b->dp + (digs - ix);
4283 for (iy = digs - ix; iy < pb; iy++) {
4284 /* calculate the double precision result */
4285 r = ((mp_word)*tmpt) +
4286 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
4287 ((mp_word) u);
4289 /* get the lower part */
4290 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
4292 /* carry the carry */
4293 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
4295 *tmpt = u;
4297 mp_clamp (&t);
4298 mp_exch (&t, c);
4299 mp_clear (&t);
4300 return MP_OKAY;
4303 /* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
4304 static int
4305 s_mp_sqr (const mp_int * a, mp_int * b)
4307 mp_int t;
4308 int res, ix, iy, pa;
4309 mp_word r;
4310 mp_digit u, tmpx, *tmpt;
4312 pa = a->used;
4313 if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) {
4314 return res;
4317 /* default used is maximum possible size */
4318 t.used = 2*pa + 1;
4320 for (ix = 0; ix < pa; ix++) {
4321 /* first calculate the digit at 2*ix */
4322 /* calculate double precision result */
4323 r = ((mp_word) t.dp[2*ix]) +
4324 ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]);
4326 /* store lower part in result */
4327 t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK));
4329 /* get the carry */
4330 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
4332 /* left hand side of A[ix] * A[iy] */
4333 tmpx = a->dp[ix];
4335 /* alias for where to store the results */
4336 tmpt = t.dp + (2*ix + 1);
4338 for (iy = ix + 1; iy < pa; iy++) {
4339 /* first calculate the product */
4340 r = ((mp_word)tmpx) * ((mp_word)a->dp[iy]);
4342 /* now calculate the double precision result, note we use
4343 * addition instead of *2 since it's easier to optimize
4345 r = ((mp_word) *tmpt) + r + r + ((mp_word) u);
4347 /* store lower part */
4348 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
4350 /* get carry */
4351 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
4353 /* propagate upwards */
4354 while (u != ((mp_digit) 0)) {
4355 r = ((mp_word) *tmpt) + ((mp_word) u);
4356 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
4357 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
4361 mp_clamp (&t);
4362 mp_exch (&t, b);
4363 mp_clear (&t);
4364 return MP_OKAY;
4367 /* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */
4369 s_mp_sub (const mp_int * a, const mp_int * b, mp_int * c)
4371 int olduse, res, min, max;
4373 /* find sizes */
4374 min = b->used;
4375 max = a->used;
4377 /* init result */
4378 if (c->alloc < max) {
4379 if ((res = mp_grow (c, max)) != MP_OKAY) {
4380 return res;
4383 olduse = c->used;
4384 c->used = max;
4387 register mp_digit u, *tmpa, *tmpb, *tmpc;
4388 register int i;
4390 /* alias for digit pointers */
4391 tmpa = a->dp;
4392 tmpb = b->dp;
4393 tmpc = c->dp;
4395 /* set carry to zero */
4396 u = 0;
4397 for (i = 0; i < min; i++) {
4398 /* T[i] = A[i] - B[i] - U */
4399 *tmpc = *tmpa++ - *tmpb++ - u;
4401 /* U = carry bit of T[i]
4402 * Note this saves performing an AND operation since
4403 * if a carry does occur it will propagate all the way to the
4404 * MSB. As a result a single shift is enough to get the carry
4406 u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
4408 /* Clear carry from T[i] */
4409 *tmpc++ &= MP_MASK;
4412 /* now copy higher words if any, e.g. if A has more digits than B */
4413 for (; i < max; i++) {
4414 /* T[i] = A[i] - U */
4415 *tmpc = *tmpa++ - u;
4417 /* U = carry bit of T[i] */
4418 u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
4420 /* Clear carry from T[i] */
4421 *tmpc++ &= MP_MASK;
4424 /* clear digits above used (since we may not have grown result above) */
4425 for (i = c->used; i < olduse; i++) {
4426 *tmpc++ = 0;
4430 mp_clamp (c);
4431 return MP_OKAY;