rsaenh: Avoid double 'va_end(args)' in case of a premature loop termination.
[wine.git] / dlls / rsaenh / mpi.c
blobe05fa59f8bde8cb016f014683bba76d73085ed0a
1 /*
2 * dlls/rsaenh/mpi.c
3 * Multi Precision Integer functions
5 * Copyright 2004 Michael Jung
6 * Based on public domain code by Tom St Denis (tomstdenis@iahu.ca)
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
24 * This file contains code from the LibTomCrypt cryptographic
25 * library written by Tom St Denis (tomstdenis@iahu.ca). LibTomCrypt
26 * is in the public domain. The code in this file is tailored to
27 * special requirements. Take a look at http://libtomcrypt.org for the
28 * original version.
31 #include <stdarg.h>
33 #include "windef.h"
34 #include "winbase.h"
35 #include "tomcrypt.h"
37 /* Known optimal configurations
38 CPU /Compiler /MUL CUTOFF/SQR CUTOFF
39 -------------------------------------------------------------
40 Intel P4 Northwood /GCC v3.4.1 / 88/ 128/LTM 0.32 ;-)
42 static const int KARATSUBA_MUL_CUTOFF = 88, /* Min. number of digits before Karatsuba multiplication is used. */
43 KARATSUBA_SQR_CUTOFF = 128; /* Min. number of digits before Karatsuba squaring is used. */
46 /* trim unused digits */
47 static void mp_clamp(mp_int *a);
49 /* compare |a| to |b| */
50 static int mp_cmp_mag(const mp_int *a, const mp_int *b);
52 /* Counts the number of lsbs which are zero before the first zero bit */
53 static int mp_cnt_lsb(const mp_int *a);
55 /* computes a = B**n mod b without division or multiplication useful for
56 * normalizing numbers in a Montgomery system.
58 static int mp_montgomery_calc_normalization(mp_int *a, const mp_int *b);
60 /* computes x/R == x (mod N) via Montgomery Reduction */
61 static int mp_montgomery_reduce(mp_int *a, const mp_int *m, mp_digit mp);
63 /* setups the montgomery reduction */
64 static int mp_montgomery_setup(const mp_int *a, mp_digit *mp);
66 /* Barrett Reduction, computes a (mod b) with a precomputed value c
68 * Assumes that 0 < a <= b*b, note if 0 > a > -(b*b) then you can merely
69 * compute the reduction as -1 * mp_reduce(mp_abs(a)) [pseudo code].
71 static int mp_reduce(mp_int *a, const mp_int *b, const mp_int *c);
73 /* reduces a modulo b where b is of the form 2**p - k [0 <= a] */
74 static int mp_reduce_2k(mp_int *a, const mp_int *n, mp_digit d);
76 /* determines k value for 2k reduction */
77 static int mp_reduce_2k_setup(const mp_int *a, mp_digit *d);
79 /* used to setup the Barrett reduction for a given modulus b */
80 static int mp_reduce_setup(mp_int *a, const mp_int *b);
82 /* set to a digit */
83 static void mp_set(mp_int *a, mp_digit b);
85 /* b = a*a */
86 static int mp_sqr(const mp_int *a, mp_int *b);
88 /* c = a * a (mod b) */
89 static int mp_sqrmod(const mp_int *a, mp_int *b, mp_int *c);
92 static void bn_reverse(unsigned char *s, int len);
93 static int s_mp_add(mp_int *a, mp_int *b, mp_int *c);
94 static int s_mp_exptmod (const mp_int * G, const mp_int * X, mp_int * P, mp_int * Y);
95 #define s_mp_mul(a, b, c) s_mp_mul_digs(a, b, c, (a)->used + (b)->used + 1)
96 static int s_mp_mul_digs(const mp_int *a, const mp_int *b, mp_int *c, int digs);
97 static int s_mp_mul_high_digs(const mp_int *a, const mp_int *b, mp_int *c, int digs);
98 static int s_mp_sqr(const mp_int *a, mp_int *b);
99 static int s_mp_sub(const mp_int *a, const mp_int *b, mp_int *c);
100 static int mp_exptmod_fast(const mp_int *G, const mp_int *X, mp_int *P, mp_int *Y, int mode);
101 static int mp_invmod_slow (const mp_int * a, mp_int * b, mp_int * c);
102 static int mp_karatsuba_mul(const mp_int *a, const mp_int *b, mp_int *c);
103 static int mp_karatsuba_sqr(const mp_int *a, mp_int *b);
105 /* grow as required */
106 static int mp_grow (mp_int * a, int size)
108 int i;
109 mp_digit *tmp;
111 /* if the alloc size is smaller alloc more ram */
112 if (a->alloc < size) {
113 /* ensure there are always at least MP_PREC digits extra on top */
114 size += (MP_PREC * 2) - (size % MP_PREC);
116 /* reallocate the array a->dp
118 * We store the return in a temporary variable
119 * in case the operation failed we don't want
120 * to overwrite the dp member of a.
122 tmp = HeapReAlloc(GetProcessHeap(), 0, a->dp, sizeof (mp_digit) * size);
123 if (tmp == NULL) {
124 /* reallocation failed but "a" is still valid [can be freed] */
125 return MP_MEM;
128 /* reallocation succeeded so set a->dp */
129 a->dp = tmp;
131 /* zero excess digits */
132 i = a->alloc;
133 a->alloc = size;
134 for (; i < a->alloc; i++) {
135 a->dp[i] = 0;
138 return MP_OKAY;
141 /* b = a/2 */
142 static int mp_div_2(const mp_int * a, mp_int * b)
144 int x, res, oldused;
146 /* copy */
147 if (b->alloc < a->used) {
148 if ((res = mp_grow (b, a->used)) != MP_OKAY) {
149 return res;
153 oldused = b->used;
154 b->used = a->used;
156 register mp_digit r, rr, *tmpa, *tmpb;
158 /* source alias */
159 tmpa = a->dp + b->used - 1;
161 /* dest alias */
162 tmpb = b->dp + b->used - 1;
164 /* carry */
165 r = 0;
166 for (x = b->used - 1; x >= 0; x--) {
167 /* get the carry for the next iteration */
168 rr = *tmpa & 1;
170 /* shift the current digit, add in carry and store */
171 *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
173 /* forward carry to next iteration */
174 r = rr;
177 /* zero excess digits */
178 tmpb = b->dp + b->used;
179 for (x = b->used; x < oldused; x++) {
180 *tmpb++ = 0;
183 b->sign = a->sign;
184 mp_clamp (b);
185 return MP_OKAY;
188 /* swap the elements of two integers, for cases where you can't simply swap the
189 * mp_int pointers around
191 static void
192 mp_exch (mp_int * a, mp_int * b)
194 mp_int t;
196 t = *a;
197 *a = *b;
198 *b = t;
201 /* init a new mp_int */
202 static int mp_init (mp_int * a)
204 int i;
206 /* allocate memory required and clear it */
207 a->dp = HeapAlloc(GetProcessHeap(), 0, sizeof (mp_digit) * MP_PREC);
208 if (a->dp == NULL) {
209 return MP_MEM;
212 /* set the digits to zero */
213 for (i = 0; i < MP_PREC; i++) {
214 a->dp[i] = 0;
217 /* set the used to zero, allocated digits to the default precision
218 * and sign to positive */
219 a->used = 0;
220 a->alloc = MP_PREC;
221 a->sign = MP_ZPOS;
223 return MP_OKAY;
226 /* init an mp_init for a given size */
227 static int mp_init_size (mp_int * a, int size)
229 int x;
231 /* pad size so there are always extra digits */
232 size += (MP_PREC * 2) - (size % MP_PREC);
234 /* alloc mem */
235 a->dp = HeapAlloc(GetProcessHeap(), 0, sizeof (mp_digit) * size);
236 if (a->dp == NULL) {
237 return MP_MEM;
240 /* set the members */
241 a->used = 0;
242 a->alloc = size;
243 a->sign = MP_ZPOS;
245 /* zero the digits */
246 for (x = 0; x < size; x++) {
247 a->dp[x] = 0;
250 return MP_OKAY;
253 /* clear one (frees) */
254 static void
255 mp_clear (mp_int * a)
257 int i;
259 /* only do anything if a hasn't been freed previously */
260 if (a->dp != NULL) {
261 /* first zero the digits */
262 for (i = 0; i < a->used; i++) {
263 a->dp[i] = 0;
266 /* free ram */
267 HeapFree(GetProcessHeap(), 0, a->dp);
269 /* reset members to make debugging easier */
270 a->dp = NULL;
271 a->alloc = a->used = 0;
272 a->sign = MP_ZPOS;
276 /* set to zero */
277 static void
278 mp_zero (mp_int * a)
280 a->sign = MP_ZPOS;
281 a->used = 0;
282 memset (a->dp, 0, sizeof (mp_digit) * a->alloc);
285 /* b = |a|
287 * Simple function copies the input and fixes the sign to positive
289 static int
290 mp_abs (const mp_int * a, mp_int * b)
292 int res;
294 /* copy a to b */
295 if (a != b) {
296 if ((res = mp_copy (a, b)) != MP_OKAY) {
297 return res;
301 /* force the sign of b to positive */
302 b->sign = MP_ZPOS;
304 return MP_OKAY;
307 /* computes the modular inverse via binary extended euclidean algorithm,
308 * that is c = 1/a mod b
310 * Based on slow invmod except this is optimized for the case where b is
311 * odd as per HAC Note 14.64 on pp. 610
313 static int
314 fast_mp_invmod (const mp_int * a, mp_int * b, mp_int * c)
316 mp_int x, y, u, v, B, D;
317 int res, neg;
319 /* 2. [modified] b must be odd */
320 if (mp_iseven (b) == 1) {
321 return MP_VAL;
324 /* init all our temps */
325 if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D, NULL)) != MP_OKAY) {
326 return res;
329 /* x == modulus, y == value to invert */
330 if ((res = mp_copy (b, &x)) != MP_OKAY) {
331 goto __ERR;
334 /* we need y = |a| */
335 if ((res = mp_abs (a, &y)) != MP_OKAY) {
336 goto __ERR;
339 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
340 if ((res = mp_copy (&x, &u)) != MP_OKAY) {
341 goto __ERR;
343 if ((res = mp_copy (&y, &v)) != MP_OKAY) {
344 goto __ERR;
346 mp_set (&D, 1);
348 top:
349 /* 4. while u is even do */
350 while (mp_iseven (&u) == 1) {
351 /* 4.1 u = u/2 */
352 if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
353 goto __ERR;
355 /* 4.2 if B is odd then */
356 if (mp_isodd (&B) == 1) {
357 if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
358 goto __ERR;
361 /* B = B/2 */
362 if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
363 goto __ERR;
367 /* 5. while v is even do */
368 while (mp_iseven (&v) == 1) {
369 /* 5.1 v = v/2 */
370 if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
371 goto __ERR;
373 /* 5.2 if D is odd then */
374 if (mp_isodd (&D) == 1) {
375 /* D = (D-x)/2 */
376 if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
377 goto __ERR;
380 /* D = D/2 */
381 if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
382 goto __ERR;
386 /* 6. if u >= v then */
387 if (mp_cmp (&u, &v) != MP_LT) {
388 /* u = u - v, B = B - D */
389 if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
390 goto __ERR;
393 if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
394 goto __ERR;
396 } else {
397 /* v - v - u, D = D - B */
398 if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
399 goto __ERR;
402 if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
403 goto __ERR;
407 /* if not zero goto step 4 */
408 if (mp_iszero (&u) == 0) {
409 goto top;
412 /* now a = C, b = D, gcd == g*v */
414 /* if v != 1 then there is no inverse */
415 if (mp_cmp_d (&v, 1) != MP_EQ) {
416 res = MP_VAL;
417 goto __ERR;
420 /* b is now the inverse */
421 neg = a->sign;
422 while (D.sign == MP_NEG) {
423 if ((res = mp_add (&D, b, &D)) != MP_OKAY) {
424 goto __ERR;
427 mp_exch (&D, c);
428 c->sign = neg;
429 res = MP_OKAY;
431 __ERR:mp_clear_multi (&x, &y, &u, &v, &B, &D, NULL);
432 return res;
435 /* computes xR**-1 == x (mod N) via Montgomery Reduction
437 * This is an optimized implementation of montgomery_reduce
438 * which uses the comba method to quickly calculate the columns of the
439 * reduction.
441 * Based on Algorithm 14.32 on pp.601 of HAC.
443 static int
444 fast_mp_montgomery_reduce (mp_int * x, const mp_int * n, mp_digit rho)
446 int ix, res, olduse;
447 mp_word W[MP_WARRAY];
449 /* get old used count */
450 olduse = x->used;
452 /* grow a as required */
453 if (x->alloc < n->used + 1) {
454 if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) {
455 return res;
459 /* first we have to get the digits of the input into
460 * an array of double precision words W[...]
463 register mp_word *_W;
464 register mp_digit *tmpx;
466 /* alias for the W[] array */
467 _W = W;
469 /* alias for the digits of x*/
470 tmpx = x->dp;
472 /* copy the digits of a into W[0..a->used-1] */
473 for (ix = 0; ix < x->used; ix++) {
474 *_W++ = *tmpx++;
477 /* zero the high words of W[a->used..m->used*2] */
478 for (; ix < n->used * 2 + 1; ix++) {
479 *_W++ = 0;
483 /* now we proceed to zero successive digits
484 * from the least significant upwards
486 for (ix = 0; ix < n->used; ix++) {
487 /* mu = ai * m' mod b
489 * We avoid a double precision multiplication (which isn't required)
490 * by casting the value down to a mp_digit. Note this requires
491 * that W[ix-1] have the carry cleared (see after the inner loop)
493 register mp_digit mu;
494 mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK);
496 /* a = a + mu * m * b**i
498 * This is computed in place and on the fly. The multiplication
499 * by b**i is handled by offsetting which columns the results
500 * are added to.
502 * Note the comba method normally doesn't handle carries in the
503 * inner loop In this case we fix the carry from the previous
504 * column since the Montgomery reduction requires digits of the
505 * result (so far) [see above] to work. This is
506 * handled by fixing up one carry after the inner loop. The
507 * carry fixups are done in order so after these loops the
508 * first m->used words of W[] have the carries fixed
511 register int iy;
512 register mp_digit *tmpn;
513 register mp_word *_W;
515 /* alias for the digits of the modulus */
516 tmpn = n->dp;
518 /* Alias for the columns set by an offset of ix */
519 _W = W + ix;
521 /* inner loop */
522 for (iy = 0; iy < n->used; iy++) {
523 *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++);
527 /* now fix carry for next digit, W[ix+1] */
528 W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT);
531 /* now we have to propagate the carries and
532 * shift the words downward [all those least
533 * significant digits we zeroed].
536 register mp_digit *tmpx;
537 register mp_word *_W, *_W1;
539 /* nox fix rest of carries */
541 /* alias for current word */
542 _W1 = W + ix;
544 /* alias for next word, where the carry goes */
545 _W = W + ++ix;
547 for (; ix <= n->used * 2 + 1; ix++) {
548 *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT);
551 /* copy out, A = A/b**n
553 * The result is A/b**n but instead of converting from an
554 * array of mp_word to mp_digit than calling mp_rshd
555 * we just copy them in the right order
558 /* alias for destination word */
559 tmpx = x->dp;
561 /* alias for shifted double precision result */
562 _W = W + n->used;
564 for (ix = 0; ix < n->used + 1; ix++) {
565 *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK));
568 /* zero oldused digits, if the input a was larger than
569 * m->used+1 we'll have to clear the digits
571 for (; ix < olduse; ix++) {
572 *tmpx++ = 0;
576 /* set the max used and clamp */
577 x->used = n->used + 1;
578 mp_clamp (x);
580 /* if A >= m then A = A - m */
581 if (mp_cmp_mag (x, n) != MP_LT) {
582 return s_mp_sub (x, n, x);
584 return MP_OKAY;
587 /* Fast (comba) multiplier
589 * This is the fast column-array [comba] multiplier. It is
590 * designed to compute the columns of the product first
591 * then handle the carries afterwards. This has the effect
592 * of making the nested loops that compute the columns very
593 * simple and schedulable on super-scalar processors.
595 * This has been modified to produce a variable number of
596 * digits of output so if say only a half-product is required
597 * you don't have to compute the upper half (a feature
598 * required for fast Barrett reduction).
600 * Based on Algorithm 14.12 on pp.595 of HAC.
603 static int
604 fast_s_mp_mul_digs (const mp_int * a, const mp_int * b, mp_int * c, int digs)
606 int olduse, res, pa, ix, iz;
607 mp_digit W[MP_WARRAY];
608 register mp_word _W;
610 /* grow the destination as required */
611 if (c->alloc < digs) {
612 if ((res = mp_grow (c, digs)) != MP_OKAY) {
613 return res;
617 /* number of output digits to produce */
618 pa = MIN(digs, a->used + b->used);
620 /* clear the carry */
621 _W = 0;
622 for (ix = 0; ix <= pa; ix++) {
623 int tx, ty;
624 int iy;
625 mp_digit *tmpx, *tmpy;
627 /* get offsets into the two bignums */
628 ty = MIN(b->used-1, ix);
629 tx = ix - ty;
631 /* setup temp aliases */
632 tmpx = a->dp + tx;
633 tmpy = b->dp + ty;
635 /* This is the number of times the loop will iterate, essentially it's
636 while (tx++ < a->used && ty-- >= 0) { ... }
638 iy = MIN(a->used-tx, ty+1);
640 /* execute loop */
641 for (iz = 0; iz < iy; ++iz) {
642 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
645 /* store term */
646 W[ix] = ((mp_digit)_W) & MP_MASK;
648 /* make next carry */
649 _W = _W >> ((mp_word)DIGIT_BIT);
652 /* setup dest */
653 olduse = c->used;
654 c->used = digs;
657 register mp_digit *tmpc;
658 tmpc = c->dp;
659 for (ix = 0; ix < digs; ix++) {
660 /* now extract the previous digit [below the carry] */
661 *tmpc++ = W[ix];
664 /* clear unused digits [that existed in the old copy of c] */
665 for (; ix < olduse; ix++) {
666 *tmpc++ = 0;
669 mp_clamp (c);
670 return MP_OKAY;
673 /* this is a modified version of fast_s_mul_digs that only produces
674 * output digits *above* digs. See the comments for fast_s_mul_digs
675 * to see how it works.
677 * This is used in the Barrett reduction since for one of the multiplications
678 * only the higher digits were needed. This essentially halves the work.
680 * Based on Algorithm 14.12 on pp.595 of HAC.
682 static int
683 fast_s_mp_mul_high_digs (const mp_int * a, const mp_int * b, mp_int * c, int digs)
685 int olduse, res, pa, ix, iz;
686 mp_digit W[MP_WARRAY];
687 mp_word _W;
689 /* grow the destination as required */
690 pa = a->used + b->used;
691 if (c->alloc < pa) {
692 if ((res = mp_grow (c, pa)) != MP_OKAY) {
693 return res;
697 /* number of output digits to produce */
698 pa = a->used + b->used;
699 _W = 0;
700 for (ix = digs; ix <= pa; ix++) {
701 int tx, ty, iy;
702 mp_digit *tmpx, *tmpy;
704 /* get offsets into the two bignums */
705 ty = MIN(b->used-1, ix);
706 tx = ix - ty;
708 /* setup temp aliases */
709 tmpx = a->dp + tx;
710 tmpy = b->dp + ty;
712 /* This is the number of times the loop will iterate, essentially it's
713 while (tx++ < a->used && ty-- >= 0) { ... }
715 iy = MIN(a->used-tx, ty+1);
717 /* execute loop */
718 for (iz = 0; iz < iy; iz++) {
719 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
722 /* store term */
723 W[ix] = ((mp_digit)_W) & MP_MASK;
725 /* make next carry */
726 _W = _W >> ((mp_word)DIGIT_BIT);
729 /* setup dest */
730 olduse = c->used;
731 c->used = pa;
734 register mp_digit *tmpc;
736 tmpc = c->dp + digs;
737 for (ix = digs; ix <= pa; ix++) {
738 /* now extract the previous digit [below the carry] */
739 *tmpc++ = W[ix];
742 /* clear unused digits [that existed in the old copy of c] */
743 for (; ix < olduse; ix++) {
744 *tmpc++ = 0;
747 mp_clamp (c);
748 return MP_OKAY;
751 /* fast squaring
753 * This is the comba method where the columns of the product
754 * are computed first then the carries are computed. This
755 * has the effect of making a very simple inner loop that
756 * is executed the most
758 * W2 represents the outer products and W the inner.
760 * A further optimizations is made because the inner
761 * products are of the form "A * B * 2". The *2 part does
762 * not need to be computed until the end which is good
763 * because 64-bit shifts are slow!
765 * Based on Algorithm 14.16 on pp.597 of HAC.
768 /* the jist of squaring...
770 you do like mult except the offset of the tmpx [one that starts closer to zero]
771 can't equal the offset of tmpy. So basically you set up iy like before then you min it with
772 (ty-tx) so that it never happens. You double all those you add in the inner loop
774 After that loop you do the squares and add them in.
776 Remove W2 and don't memset W
780 static int fast_s_mp_sqr (const mp_int * a, mp_int * b)
782 int olduse, res, pa, ix, iz;
783 mp_digit W[MP_WARRAY], *tmpx;
784 mp_word W1;
786 /* grow the destination as required */
787 pa = a->used + a->used;
788 if (b->alloc < pa) {
789 if ((res = mp_grow (b, pa)) != MP_OKAY) {
790 return res;
794 /* number of output digits to produce */
795 W1 = 0;
796 for (ix = 0; ix <= pa; ix++) {
797 int tx, ty, iy;
798 mp_word _W;
799 mp_digit *tmpy;
801 /* clear counter */
802 _W = 0;
804 /* get offsets into the two bignums */
805 ty = MIN(a->used-1, ix);
806 tx = ix - ty;
808 /* setup temp aliases */
809 tmpx = a->dp + tx;
810 tmpy = a->dp + ty;
812 /* This is the number of times the loop will iterate, essentially it's
813 while (tx++ < a->used && ty-- >= 0) { ... }
815 iy = MIN(a->used-tx, ty+1);
817 /* now for squaring tx can never equal ty
818 * we halve the distance since they approach at a rate of 2x
819 * and we have to round because odd cases need to be executed
821 iy = MIN(iy, (ty-tx+1)>>1);
823 /* execute loop */
824 for (iz = 0; iz < iy; iz++) {
825 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
828 /* double the inner product and add carry */
829 _W = _W + _W + W1;
831 /* even columns have the square term in them */
832 if ((ix&1) == 0) {
833 _W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]);
836 /* store it */
837 W[ix] = _W;
839 /* make next carry */
840 W1 = _W >> ((mp_word)DIGIT_BIT);
843 /* setup dest */
844 olduse = b->used;
845 b->used = a->used+a->used;
848 mp_digit *tmpb;
849 tmpb = b->dp;
850 for (ix = 0; ix < pa; ix++) {
851 *tmpb++ = W[ix] & MP_MASK;
854 /* clear unused digits [that existed in the old copy of c] */
855 for (; ix < olduse; ix++) {
856 *tmpb++ = 0;
859 mp_clamp (b);
860 return MP_OKAY;
863 /* computes a = 2**b
865 * Simple algorithm which zeroes the int, grows it then just sets one bit
866 * as required.
868 static int
869 mp_2expt (mp_int * a, int b)
871 int res;
873 /* zero a as per default */
874 mp_zero (a);
876 /* grow a to accommodate the single bit */
877 if ((res = mp_grow (a, b / DIGIT_BIT + 1)) != MP_OKAY) {
878 return res;
881 /* set the used count of where the bit will go */
882 a->used = b / DIGIT_BIT + 1;
884 /* put the single bit in its place */
885 a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT);
887 return MP_OKAY;
890 /* high level addition (handles signs) */
891 int mp_add (mp_int * a, mp_int * b, mp_int * c)
893 int sa, sb, res;
895 /* get sign of both inputs */
896 sa = a->sign;
897 sb = b->sign;
899 /* handle two cases, not four */
900 if (sa == sb) {
901 /* both positive or both negative */
902 /* add their magnitudes, copy the sign */
903 c->sign = sa;
904 res = s_mp_add (a, b, c);
905 } else {
906 /* one positive, the other negative */
907 /* subtract the one with the greater magnitude from */
908 /* the one of the lesser magnitude. The result gets */
909 /* the sign of the one with the greater magnitude. */
910 if (mp_cmp_mag (a, b) == MP_LT) {
911 c->sign = sb;
912 res = s_mp_sub (b, a, c);
913 } else {
914 c->sign = sa;
915 res = s_mp_sub (a, b, c);
918 return res;
922 /* single digit addition */
923 static int
924 mp_add_d (mp_int * a, mp_digit b, mp_int * c)
926 int res, ix, oldused;
927 mp_digit *tmpa, *tmpc, mu;
929 /* grow c as required */
930 if (c->alloc < a->used + 1) {
931 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
932 return res;
936 /* if a is negative and |a| >= b, call c = |a| - b */
937 if (a->sign == MP_NEG && (a->used > 1 || a->dp[0] >= b)) {
938 /* temporarily fix sign of a */
939 a->sign = MP_ZPOS;
941 /* c = |a| - b */
942 res = mp_sub_d(a, b, c);
944 /* fix sign */
945 a->sign = c->sign = MP_NEG;
947 return res;
950 /* old number of used digits in c */
951 oldused = c->used;
953 /* sign always positive */
954 c->sign = MP_ZPOS;
956 /* source alias */
957 tmpa = a->dp;
959 /* destination alias */
960 tmpc = c->dp;
962 /* if a is positive */
963 if (a->sign == MP_ZPOS) {
964 /* add digit, after this we're propagating
965 * the carry.
967 *tmpc = *tmpa++ + b;
968 mu = *tmpc >> DIGIT_BIT;
969 *tmpc++ &= MP_MASK;
971 /* now handle rest of the digits */
972 for (ix = 1; ix < a->used; ix++) {
973 *tmpc = *tmpa++ + mu;
974 mu = *tmpc >> DIGIT_BIT;
975 *tmpc++ &= MP_MASK;
977 /* set final carry */
978 ix++;
979 *tmpc++ = mu;
981 /* setup size */
982 c->used = a->used + 1;
983 } else {
984 /* a was negative and |a| < b */
985 c->used = 1;
987 /* the result is a single digit */
988 if (a->used == 1) {
989 *tmpc++ = b - a->dp[0];
990 } else {
991 *tmpc++ = b;
994 /* setup count so the clearing of oldused
995 * can fall through correctly
997 ix = 1;
1000 /* now zero to oldused */
1001 while (ix++ < oldused) {
1002 *tmpc++ = 0;
1004 mp_clamp(c);
1006 return MP_OKAY;
1009 /* trim unused digits
1011 * This is used to ensure that leading zero digits are
1012 * trimed and the leading "used" digit will be non-zero
1013 * Typically very fast. Also fixes the sign if there
1014 * are no more leading digits
1016 void
1017 mp_clamp (mp_int * a)
1019 /* decrease used while the most significant digit is
1020 * zero.
1022 while (a->used > 0 && a->dp[a->used - 1] == 0) {
1023 --(a->used);
1026 /* reset the sign flag if used == 0 */
1027 if (a->used == 0) {
1028 a->sign = MP_ZPOS;
1032 void mp_clear_multi(mp_int *mp, ...)
1034 mp_int* next_mp = mp;
1035 va_list args;
1036 va_start(args, mp);
1037 while (next_mp != NULL) {
1038 mp_clear(next_mp);
1039 next_mp = va_arg(args, mp_int*);
1041 va_end(args);
1044 /* compare two ints (signed)*/
1046 mp_cmp (const mp_int * a, const mp_int * b)
1048 /* compare based on sign */
1049 if (a->sign != b->sign) {
1050 if (a->sign == MP_NEG) {
1051 return MP_LT;
1052 } else {
1053 return MP_GT;
1057 /* compare digits */
1058 if (a->sign == MP_NEG) {
1059 /* if negative compare opposite direction */
1060 return mp_cmp_mag(b, a);
1061 } else {
1062 return mp_cmp_mag(a, b);
1066 /* compare a digit */
1067 int mp_cmp_d(const mp_int * a, mp_digit b)
1069 /* compare based on sign */
1070 if (a->sign == MP_NEG) {
1071 return MP_LT;
1074 /* compare based on magnitude */
1075 if (a->used > 1) {
1076 return MP_GT;
1079 /* compare the only digit of a to b */
1080 if (a->dp[0] > b) {
1081 return MP_GT;
1082 } else if (a->dp[0] < b) {
1083 return MP_LT;
1084 } else {
1085 return MP_EQ;
1089 /* compare maginitude of two ints (unsigned) */
1090 int mp_cmp_mag (const mp_int * a, const mp_int * b)
1092 int n;
1093 mp_digit *tmpa, *tmpb;
1095 /* compare based on # of non-zero digits */
1096 if (a->used > b->used) {
1097 return MP_GT;
1100 if (a->used < b->used) {
1101 return MP_LT;
1104 /* alias for a */
1105 tmpa = a->dp + (a->used - 1);
1107 /* alias for b */
1108 tmpb = b->dp + (a->used - 1);
1110 /* compare based on digits */
1111 for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {
1112 if (*tmpa > *tmpb) {
1113 return MP_GT;
1116 if (*tmpa < *tmpb) {
1117 return MP_LT;
1120 return MP_EQ;
1123 static const int lnz[16] = {
1124 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
1127 /* Counts the number of lsbs which are zero before the first zero bit */
1128 int mp_cnt_lsb(const mp_int *a)
1130 int x;
1131 mp_digit q, qq;
1133 /* easy out */
1134 if (mp_iszero(a) == 1) {
1135 return 0;
1138 /* scan lower digits until non-zero */
1139 for (x = 0; x < a->used && a->dp[x] == 0; x++);
1140 q = a->dp[x];
1141 x *= DIGIT_BIT;
1143 /* now scan this digit until a 1 is found */
1144 if ((q & 1) == 0) {
1145 do {
1146 qq = q & 15;
1147 x += lnz[qq];
1148 q >>= 4;
1149 } while (qq == 0);
1151 return x;
1154 /* copy, b = a */
1156 mp_copy (const mp_int * a, mp_int * b)
1158 int res, n;
1160 /* if dst == src do nothing */
1161 if (a == b) {
1162 return MP_OKAY;
1165 /* grow dest */
1166 if (b->alloc < a->used) {
1167 if ((res = mp_grow (b, a->used)) != MP_OKAY) {
1168 return res;
1172 /* zero b and copy the parameters over */
1174 register mp_digit *tmpa, *tmpb;
1176 /* pointer aliases */
1178 /* source */
1179 tmpa = a->dp;
1181 /* destination */
1182 tmpb = b->dp;
1184 /* copy all the digits */
1185 for (n = 0; n < a->used; n++) {
1186 *tmpb++ = *tmpa++;
1189 /* clear high digits */
1190 for (; n < b->used; n++) {
1191 *tmpb++ = 0;
1195 /* copy used count and sign */
1196 b->used = a->used;
1197 b->sign = a->sign;
1198 return MP_OKAY;
1201 /* returns the number of bits in an int */
1203 mp_count_bits (const mp_int * a)
1205 int r;
1206 mp_digit q;
1208 /* shortcut */
1209 if (a->used == 0) {
1210 return 0;
1213 /* get number of digits and add that */
1214 r = (a->used - 1) * DIGIT_BIT;
1216 /* take the last digit and count the bits in it */
1217 q = a->dp[a->used - 1];
1218 while (q > 0) {
1219 ++r;
1220 q >>= ((mp_digit) 1);
1222 return r;
1225 /* calc a value mod 2**b */
1226 static int
1227 mp_mod_2d (const mp_int * a, int b, mp_int * c)
1229 int x, res;
1231 /* if b is <= 0 then zero the int */
1232 if (b <= 0) {
1233 mp_zero (c);
1234 return MP_OKAY;
1237 /* if the modulus is larger than the value than return */
1238 if (b > a->used * DIGIT_BIT) {
1239 res = mp_copy (a, c);
1240 return res;
1243 /* copy */
1244 if ((res = mp_copy (a, c)) != MP_OKAY) {
1245 return res;
1248 /* zero digits above the last digit of the modulus */
1249 for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
1250 c->dp[x] = 0;
1252 /* clear the digit that is not completely outside/inside the modulus */
1253 c->dp[b / DIGIT_BIT] &= (1 << ((mp_digit)b % DIGIT_BIT)) - 1;
1254 mp_clamp (c);
1255 return MP_OKAY;
1258 /* shift right a certain amount of digits */
1259 static void mp_rshd (mp_int * a, int b)
1261 int x;
1263 /* if b <= 0 then ignore it */
1264 if (b <= 0) {
1265 return;
1268 /* if b > used then simply zero it and return */
1269 if (a->used <= b) {
1270 mp_zero (a);
1271 return;
1275 register mp_digit *bottom, *top;
1277 /* shift the digits down */
1279 /* bottom */
1280 bottom = a->dp;
1282 /* top [offset into digits] */
1283 top = a->dp + b;
1285 /* this is implemented as a sliding window where
1286 * the window is b-digits long and digits from
1287 * the top of the window are copied to the bottom
1289 * e.g.
1291 b-2 | b-1 | b0 | b1 | b2 | ... | bb | ---->
1292 /\ | ---->
1293 \-------------------/ ---->
1295 for (x = 0; x < (a->used - b); x++) {
1296 *bottom++ = *top++;
1299 /* zero the top digits */
1300 for (; x < a->used; x++) {
1301 *bottom++ = 0;
1305 /* remove excess digits */
1306 a->used -= b;
1309 /* shift right by a certain bit count (store quotient in c, optional remainder in d) */
1310 static int mp_div_2d (const mp_int * a, int b, mp_int * c, mp_int * d)
1312 mp_digit D, r, rr;
1313 int x, res;
1314 mp_int t;
1317 /* if the shift count is <= 0 then we do no work */
1318 if (b <= 0) {
1319 res = mp_copy (a, c);
1320 if (d != NULL) {
1321 mp_zero (d);
1323 return res;
1326 if ((res = mp_init (&t)) != MP_OKAY) {
1327 return res;
1330 /* get the remainder */
1331 if (d != NULL) {
1332 if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) {
1333 mp_clear (&t);
1334 return res;
1338 /* copy */
1339 if ((res = mp_copy (a, c)) != MP_OKAY) {
1340 mp_clear (&t);
1341 return res;
1344 /* shift by as many digits in the bit count */
1345 if (b >= DIGIT_BIT) {
1346 mp_rshd (c, b / DIGIT_BIT);
1349 /* shift any bit count < DIGIT_BIT */
1350 D = (mp_digit) (b % DIGIT_BIT);
1351 if (D != 0) {
1352 register mp_digit *tmpc, mask, shift;
1354 /* mask */
1355 mask = (((mp_digit)1) << D) - 1;
1357 /* shift for lsb */
1358 shift = DIGIT_BIT - D;
1360 /* alias */
1361 tmpc = c->dp + (c->used - 1);
1363 /* carry */
1364 r = 0;
1365 for (x = c->used - 1; x >= 0; x--) {
1366 /* get the lower bits of this word in a temp */
1367 rr = *tmpc & mask;
1369 /* shift the current word and mix in the carry bits from the previous word */
1370 *tmpc = (*tmpc >> D) | (r << shift);
1371 --tmpc;
1373 /* set the carry to the carry bits of the current word found above */
1374 r = rr;
1377 mp_clamp (c);
1378 if (d != NULL) {
1379 mp_exch (&t, d);
1381 mp_clear (&t);
1382 return MP_OKAY;
1385 /* shift left a certain amount of digits */
1386 static int mp_lshd (mp_int * a, int b)
1388 int x, res;
1390 /* if it's less than zero return */
1391 if (b <= 0) {
1392 return MP_OKAY;
1395 /* grow to fit the new digits */
1396 if (a->alloc < a->used + b) {
1397 if ((res = mp_grow (a, a->used + b)) != MP_OKAY) {
1398 return res;
1403 register mp_digit *top, *bottom;
1405 /* increment the used by the shift amount then copy upwards */
1406 a->used += b;
1408 /* top */
1409 top = a->dp + a->used - 1;
1411 /* base */
1412 bottom = a->dp + a->used - 1 - b;
1414 /* much like mp_rshd this is implemented using a sliding window
1415 * except the window goes the other way around. Copying from
1416 * the bottom to the top. see bn_mp_rshd.c for more info.
1418 for (x = a->used - 1; x >= b; x--) {
1419 *top-- = *bottom--;
1422 /* zero the lower digits */
1423 top = a->dp;
1424 for (x = 0; x < b; x++) {
1425 *top++ = 0;
1428 return MP_OKAY;
1431 /* shift left by a certain bit count */
1432 static int mp_mul_2d (const mp_int * a, int b, mp_int * c)
1434 mp_digit d;
1435 int res;
1437 /* copy */
1438 if (a != c) {
1439 if ((res = mp_copy (a, c)) != MP_OKAY) {
1440 return res;
1444 if (c->alloc < c->used + b/DIGIT_BIT + 1) {
1445 if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) {
1446 return res;
1450 /* shift by as many digits in the bit count */
1451 if (b >= DIGIT_BIT) {
1452 if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) {
1453 return res;
1457 /* shift any bit count < DIGIT_BIT */
1458 d = (mp_digit) (b % DIGIT_BIT);
1459 if (d != 0) {
1460 register mp_digit *tmpc, shift, mask, r, rr;
1461 register int x;
1463 /* bitmask for carries */
1464 mask = (((mp_digit)1) << d) - 1;
1466 /* shift for msbs */
1467 shift = DIGIT_BIT - d;
1469 /* alias */
1470 tmpc = c->dp;
1472 /* carry */
1473 r = 0;
1474 for (x = 0; x < c->used; x++) {
1475 /* get the higher bits of the current word */
1476 rr = (*tmpc >> shift) & mask;
1478 /* shift the current word and OR in the carry */
1479 *tmpc = ((*tmpc << d) | r) & MP_MASK;
1480 ++tmpc;
1482 /* set the carry to the carry bits of the current word */
1483 r = rr;
1486 /* set final carry */
1487 if (r != 0) {
1488 c->dp[(c->used)++] = r;
1491 mp_clamp (c);
1492 return MP_OKAY;
1495 /* multiply by a digit */
1496 static int
1497 mp_mul_d (const mp_int * a, mp_digit b, mp_int * c)
1499 mp_digit u, *tmpa, *tmpc;
1500 mp_word r;
1501 int ix, res, olduse;
1503 /* make sure c is big enough to hold a*b */
1504 if (c->alloc < a->used + 1) {
1505 if ((res = mp_grow (c, a->used + 1)) != MP_OKAY) {
1506 return res;
1510 /* get the original destinations used count */
1511 olduse = c->used;
1513 /* set the sign */
1514 c->sign = a->sign;
1516 /* alias for a->dp [source] */
1517 tmpa = a->dp;
1519 /* alias for c->dp [dest] */
1520 tmpc = c->dp;
1522 /* zero carry */
1523 u = 0;
1525 /* compute columns */
1526 for (ix = 0; ix < a->used; ix++) {
1527 /* compute product and carry sum for this term */
1528 r = ((mp_word) u) + ((mp_word)*tmpa++) * ((mp_word)b);
1530 /* mask off higher bits to get a single digit */
1531 *tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK));
1533 /* send carry into next iteration */
1534 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
1537 /* store final carry [if any] */
1538 *tmpc++ = u;
1540 /* now zero digits above the top */
1541 while (ix++ < olduse) {
1542 *tmpc++ = 0;
1545 /* set used count */
1546 c->used = a->used + 1;
1547 mp_clamp(c);
1549 return MP_OKAY;
1552 /* integer signed division.
1553 * c*b + d == a [e.g. a/b, c=quotient, d=remainder]
1554 * HAC pp.598 Algorithm 14.20
1556 * Note that the description in HAC is horribly
1557 * incomplete. For example, it doesn't consider
1558 * the case where digits are removed from 'x' in
1559 * the inner loop. It also doesn't consider the
1560 * case that y has fewer than three digits, etc..
1562 * The overall algorithm is as described as
1563 * 14.20 from HAC but fixed to treat these cases.
1565 static int mp_div (const mp_int * a, const mp_int * b, mp_int * c, mp_int * d)
1567 mp_int q, x, y, t1, t2;
1568 int res, n, t, i, norm, neg;
1570 /* is divisor zero ? */
1571 if (mp_iszero (b) == 1) {
1572 return MP_VAL;
1575 /* if a < b then q=0, r = a */
1576 if (mp_cmp_mag (a, b) == MP_LT) {
1577 if (d != NULL) {
1578 res = mp_copy (a, d);
1579 } else {
1580 res = MP_OKAY;
1582 if (c != NULL) {
1583 mp_zero (c);
1585 return res;
1588 if ((res = mp_init_size (&q, a->used + 2)) != MP_OKAY) {
1589 return res;
1591 q.used = a->used + 2;
1593 if ((res = mp_init (&t1)) != MP_OKAY) {
1594 goto __Q;
1597 if ((res = mp_init (&t2)) != MP_OKAY) {
1598 goto __T1;
1601 if ((res = mp_init_copy (&x, a)) != MP_OKAY) {
1602 goto __T2;
1605 if ((res = mp_init_copy (&y, b)) != MP_OKAY) {
1606 goto __X;
1609 /* fix the sign */
1610 neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
1611 x.sign = y.sign = MP_ZPOS;
1613 /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */
1614 norm = mp_count_bits(&y) % DIGIT_BIT;
1615 if (norm < DIGIT_BIT-1) {
1616 norm = (DIGIT_BIT-1) - norm;
1617 if ((res = mp_mul_2d (&x, norm, &x)) != MP_OKAY) {
1618 goto __Y;
1620 if ((res = mp_mul_2d (&y, norm, &y)) != MP_OKAY) {
1621 goto __Y;
1623 } else {
1624 norm = 0;
1627 /* note hac does 0 based, so if used==5 then it's 0,1,2,3,4, e.g. use 4 */
1628 n = x.used - 1;
1629 t = y.used - 1;
1631 /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */
1632 if ((res = mp_lshd (&y, n - t)) != MP_OKAY) { /* y = y*b**{n-t} */
1633 goto __Y;
1636 while (mp_cmp (&x, &y) != MP_LT) {
1637 ++(q.dp[n - t]);
1638 if ((res = mp_sub (&x, &y, &x)) != MP_OKAY) {
1639 goto __Y;
1643 /* reset y by shifting it back down */
1644 mp_rshd (&y, n - t);
1646 /* step 3. for i from n down to (t + 1) */
1647 for (i = n; i >= (t + 1); i--) {
1648 if (i > x.used) {
1649 continue;
1652 /* step 3.1 if xi == yt then set q{i-t-1} to b-1,
1653 * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */
1654 if (x.dp[i] == y.dp[t]) {
1655 q.dp[i - t - 1] = ((((mp_digit)1) << DIGIT_BIT) - 1);
1656 } else {
1657 mp_word tmp;
1658 tmp = ((mp_word) x.dp[i]) << ((mp_word) DIGIT_BIT);
1659 tmp |= ((mp_word) x.dp[i - 1]);
1660 tmp /= ((mp_word) y.dp[t]);
1661 if (tmp > (mp_word) MP_MASK)
1662 tmp = MP_MASK;
1663 q.dp[i - t - 1] = (mp_digit) (tmp & (mp_word) (MP_MASK));
1666 /* while (q{i-t-1} * (yt * b + y{t-1})) >
1667 xi * b**2 + xi-1 * b + xi-2
1669 do q{i-t-1} -= 1;
1671 q.dp[i - t - 1] = (q.dp[i - t - 1] + 1) & MP_MASK;
1672 do {
1673 q.dp[i - t - 1] = (q.dp[i - t - 1] - 1) & MP_MASK;
1675 /* find left hand */
1676 mp_zero (&t1);
1677 t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1];
1678 t1.dp[1] = y.dp[t];
1679 t1.used = 2;
1680 if ((res = mp_mul_d (&t1, q.dp[i - t - 1], &t1)) != MP_OKAY) {
1681 goto __Y;
1684 /* find right hand */
1685 t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2];
1686 t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1];
1687 t2.dp[2] = x.dp[i];
1688 t2.used = 3;
1689 } while (mp_cmp_mag(&t1, &t2) == MP_GT);
1691 /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */
1692 if ((res = mp_mul_d (&y, q.dp[i - t - 1], &t1)) != MP_OKAY) {
1693 goto __Y;
1696 if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
1697 goto __Y;
1700 if ((res = mp_sub (&x, &t1, &x)) != MP_OKAY) {
1701 goto __Y;
1704 /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */
1705 if (x.sign == MP_NEG) {
1706 if ((res = mp_copy (&y, &t1)) != MP_OKAY) {
1707 goto __Y;
1709 if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
1710 goto __Y;
1712 if ((res = mp_add (&x, &t1, &x)) != MP_OKAY) {
1713 goto __Y;
1716 q.dp[i - t - 1] = (q.dp[i - t - 1] - 1UL) & MP_MASK;
1720 /* now q is the quotient and x is the remainder
1721 * [which we have to normalize]
1724 /* get sign before writing to c */
1725 x.sign = x.used == 0 ? MP_ZPOS : a->sign;
1727 if (c != NULL) {
1728 mp_clamp (&q);
1729 mp_exch (&q, c);
1730 c->sign = neg;
1733 if (d != NULL) {
1734 mp_div_2d (&x, norm, &x, NULL);
1735 mp_exch (&x, d);
1738 res = MP_OKAY;
1740 __Y:mp_clear (&y);
1741 __X:mp_clear (&x);
1742 __T2:mp_clear (&t2);
1743 __T1:mp_clear (&t1);
1744 __Q:mp_clear (&q);
1745 return res;
1748 static BOOL s_is_power_of_two(mp_digit b, int *p)
1750 int x;
1752 for (x = 1; x < DIGIT_BIT; x++) {
1753 if (b == (((mp_digit)1)<<x)) {
1754 *p = x;
1755 return TRUE;
1758 return FALSE;
1761 /* single digit division (based on routine from MPI) */
1762 static int mp_div_d (const mp_int * a, mp_digit b, mp_int * c, mp_digit * d)
1764 mp_int q;
1765 mp_word w;
1766 mp_digit t;
1767 int res, ix;
1769 /* cannot divide by zero */
1770 if (b == 0) {
1771 return MP_VAL;
1774 /* quick outs */
1775 if (b == 1 || mp_iszero(a) == 1) {
1776 if (d != NULL) {
1777 *d = 0;
1779 if (c != NULL) {
1780 return mp_copy(a, c);
1782 return MP_OKAY;
1785 /* power of two ? */
1786 if (s_is_power_of_two(b, &ix)) {
1787 if (d != NULL) {
1788 *d = a->dp[0] & ((((mp_digit)1)<<ix) - 1);
1790 if (c != NULL) {
1791 return mp_div_2d(a, ix, c, NULL);
1793 return MP_OKAY;
1796 /* no easy answer [c'est la vie]. Just division */
1797 if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
1798 return res;
1801 q.used = a->used;
1802 q.sign = a->sign;
1803 w = 0;
1804 for (ix = a->used - 1; ix >= 0; ix--) {
1805 w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
1807 if (w >= b) {
1808 t = (mp_digit)(w / b);
1809 w -= ((mp_word)t) * ((mp_word)b);
1810 } else {
1811 t = 0;
1813 q.dp[ix] = t;
1816 if (d != NULL) {
1817 *d = (mp_digit)w;
1820 if (c != NULL) {
1821 mp_clamp(&q);
1822 mp_exch(&q, c);
1824 mp_clear(&q);
1826 return res;
1829 /* reduce "x" in place modulo "n" using the Diminished Radix algorithm.
1831 * Based on algorithm from the paper
1833 * "Generating Efficient Primes for Discrete Log Cryptosystems"
1834 * Chae Hoon Lim, Pil Loong Lee,
1835 * POSTECH Information Research Laboratories
1837 * The modulus must be of a special format [see manual]
1839 * Has been modified to use algorithm 7.10 from the LTM book instead
1841 * Input x must be in the range 0 <= x <= (n-1)**2
1843 static int
1844 mp_dr_reduce (mp_int * x, const mp_int * n, mp_digit k)
1846 int err, i, m;
1847 mp_word r;
1848 mp_digit mu, *tmpx1, *tmpx2;
1850 /* m = digits in modulus */
1851 m = n->used;
1853 /* ensure that "x" has at least 2m digits */
1854 if (x->alloc < m + m) {
1855 if ((err = mp_grow (x, m + m)) != MP_OKAY) {
1856 return err;
1860 /* top of loop, this is where the code resumes if
1861 * another reduction pass is required.
1863 top:
1864 /* aliases for digits */
1865 /* alias for lower half of x */
1866 tmpx1 = x->dp;
1868 /* alias for upper half of x, or x/B**m */
1869 tmpx2 = x->dp + m;
1871 /* set carry to zero */
1872 mu = 0;
1874 /* compute (x mod B**m) + k * [x/B**m] inline and inplace */
1875 for (i = 0; i < m; i++) {
1876 r = ((mp_word)*tmpx2++) * ((mp_word)k) + *tmpx1 + mu;
1877 *tmpx1++ = (mp_digit)(r & MP_MASK);
1878 mu = (mp_digit)(r >> ((mp_word)DIGIT_BIT));
1881 /* set final carry */
1882 *tmpx1++ = mu;
1884 /* zero words above m */
1885 for (i = m + 1; i < x->used; i++) {
1886 *tmpx1++ = 0;
1889 /* clamp, sub and return */
1890 mp_clamp (x);
1892 /* if x >= n then subtract and reduce again
1893 * Each successive "recursion" makes the input smaller and smaller.
1895 if (mp_cmp_mag (x, n) != MP_LT) {
1896 s_mp_sub(x, n, x);
1897 goto top;
1899 return MP_OKAY;
1902 /* sets the value of "d" required for mp_dr_reduce */
1903 static void mp_dr_setup(const mp_int *a, mp_digit *d)
1905 /* the casts are required if DIGIT_BIT is one less than
1906 * the number of bits in a mp_digit [e.g. DIGIT_BIT==31]
1908 *d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) -
1909 ((mp_word)a->dp[0]));
1912 /* this is a shell function that calls either the normal or Montgomery
1913 * exptmod functions. Originally the call to the montgomery code was
1914 * embedded in the normal function but that wasted a lot of stack space
1915 * for nothing (since 99% of the time the Montgomery code would be called)
1917 int mp_exptmod (const mp_int * G, const mp_int * X, mp_int * P, mp_int * Y)
1919 int dr;
1921 /* modulus P must be positive */
1922 if (P->sign == MP_NEG) {
1923 return MP_VAL;
1926 /* if exponent X is negative we have to recurse */
1927 if (X->sign == MP_NEG) {
1928 mp_int tmpG, tmpX;
1929 int err;
1931 /* first compute 1/G mod P */
1932 if ((err = mp_init(&tmpG)) != MP_OKAY) {
1933 return err;
1935 if ((err = mp_invmod(G, P, &tmpG)) != MP_OKAY) {
1936 mp_clear(&tmpG);
1937 return err;
1940 /* now get |X| */
1941 if ((err = mp_init(&tmpX)) != MP_OKAY) {
1942 mp_clear(&tmpG);
1943 return err;
1945 if ((err = mp_abs(X, &tmpX)) != MP_OKAY) {
1946 mp_clear_multi(&tmpG, &tmpX, NULL);
1947 return err;
1950 /* and now compute (1/G)**|X| instead of G**X [X < 0] */
1951 err = mp_exptmod(&tmpG, &tmpX, P, Y);
1952 mp_clear_multi(&tmpG, &tmpX, NULL);
1953 return err;
1956 dr = 0;
1958 /* if the modulus is odd use the fast method */
1959 if (mp_isodd (P) == 1) {
1960 return mp_exptmod_fast (G, X, P, Y, dr);
1961 } else {
1962 /* otherwise use the generic Barrett reduction technique */
1963 return s_mp_exptmod (G, X, P, Y);
1967 /* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
1969 * Uses a left-to-right k-ary sliding window to compute the modular exponentiation.
1970 * The value of k changes based on the size of the exponent.
1972 * Uses Montgomery or Diminished Radix reduction [whichever appropriate]
1976 mp_exptmod_fast (const mp_int * G, const mp_int * X, mp_int * P, mp_int * Y, int redmode)
1978 mp_int M[256], res;
1979 mp_digit buf, mp;
1980 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
1982 /* use a pointer to the reduction algorithm. This allows us to use
1983 * one of many reduction algorithms without modding the guts of
1984 * the code with if statements everywhere.
1986 int (*redux)(mp_int*,const mp_int*,mp_digit);
1988 /* find window size */
1989 x = mp_count_bits (X);
1990 if (x <= 7) {
1991 winsize = 2;
1992 } else if (x <= 36) {
1993 winsize = 3;
1994 } else if (x <= 140) {
1995 winsize = 4;
1996 } else if (x <= 450) {
1997 winsize = 5;
1998 } else if (x <= 1303) {
1999 winsize = 6;
2000 } else if (x <= 3529) {
2001 winsize = 7;
2002 } else {
2003 winsize = 8;
2006 /* init M array */
2007 /* init first cell */
2008 if ((err = mp_init(&M[1])) != MP_OKAY) {
2009 return err;
2012 /* now init the second half of the array */
2013 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
2014 if ((err = mp_init(&M[x])) != MP_OKAY) {
2015 for (y = 1<<(winsize-1); y < x; y++) {
2016 mp_clear (&M[y]);
2018 mp_clear(&M[1]);
2019 return err;
2023 /* determine and setup reduction code */
2024 if (redmode == 0) {
2025 /* now setup montgomery */
2026 if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
2027 goto __M;
2030 /* automatically pick the comba one if available (saves quite a few calls/ifs) */
2031 if (((P->used * 2 + 1) < MP_WARRAY) &&
2032 P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
2033 redux = fast_mp_montgomery_reduce;
2034 } else {
2035 /* use slower baseline Montgomery method */
2036 redux = mp_montgomery_reduce;
2038 } else if (redmode == 1) {
2039 /* setup DR reduction for moduli of the form B**k - b */
2040 mp_dr_setup(P, &mp);
2041 redux = mp_dr_reduce;
2042 } else {
2043 /* setup DR reduction for moduli of the form 2**k - b */
2044 if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
2045 goto __M;
2047 redux = mp_reduce_2k;
2050 /* setup result */
2051 if ((err = mp_init (&res)) != MP_OKAY) {
2052 goto __M;
2055 /* create M table
2059 * The first half of the table is not computed though accept for M[0] and M[1]
2062 if (redmode == 0) {
2063 /* now we need R mod m */
2064 if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) {
2065 goto __RES;
2068 /* now set M[1] to G * R mod m */
2069 if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) {
2070 goto __RES;
2072 } else {
2073 mp_set(&res, 1);
2074 if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
2075 goto __RES;
2079 /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
2080 if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
2081 goto __RES;
2084 for (x = 0; x < (winsize - 1); x++) {
2085 if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) {
2086 goto __RES;
2088 if ((err = redux (&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) {
2089 goto __RES;
2093 /* create upper table */
2094 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
2095 if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
2096 goto __RES;
2098 if ((err = redux (&M[x], P, mp)) != MP_OKAY) {
2099 goto __RES;
2103 /* set initial mode and bit cnt */
2104 mode = 0;
2105 bitcnt = 1;
2106 buf = 0;
2107 digidx = X->used - 1;
2108 bitcpy = 0;
2109 bitbuf = 0;
2111 for (;;) {
2112 /* grab next digit as required */
2113 if (--bitcnt == 0) {
2114 /* if digidx == -1 we are out of digits so break */
2115 if (digidx == -1) {
2116 break;
2118 /* read next digit and reset bitcnt */
2119 buf = X->dp[digidx--];
2120 bitcnt = DIGIT_BIT;
2123 /* grab the next msb from the exponent */
2124 y = (buf >> (DIGIT_BIT - 1)) & 1;
2125 buf <<= (mp_digit)1;
2127 /* if the bit is zero and mode == 0 then we ignore it
2128 * These represent the leading zero bits before the first 1 bit
2129 * in the exponent. Technically this opt is not required but it
2130 * does lower the # of trivial squaring/reductions used
2132 if (mode == 0 && y == 0) {
2133 continue;
2136 /* if the bit is zero and mode == 1 then we square */
2137 if (mode == 1 && y == 0) {
2138 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
2139 goto __RES;
2141 if ((err = redux (&res, P, mp)) != MP_OKAY) {
2142 goto __RES;
2144 continue;
2147 /* else we add it to the window */
2148 bitbuf |= (y << (winsize - ++bitcpy));
2149 mode = 2;
2151 if (bitcpy == winsize) {
2152 /* ok window is filled so square as required and multiply */
2153 /* square first */
2154 for (x = 0; x < winsize; x++) {
2155 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
2156 goto __RES;
2158 if ((err = redux (&res, P, mp)) != MP_OKAY) {
2159 goto __RES;
2163 /* then multiply */
2164 if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
2165 goto __RES;
2167 if ((err = redux (&res, P, mp)) != MP_OKAY) {
2168 goto __RES;
2171 /* empty window and reset */
2172 bitcpy = 0;
2173 bitbuf = 0;
2174 mode = 1;
2178 /* if bits remain then square/multiply */
2179 if (mode == 2 && bitcpy > 0) {
2180 /* square then multiply if the bit is set */
2181 for (x = 0; x < bitcpy; x++) {
2182 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
2183 goto __RES;
2185 if ((err = redux (&res, P, mp)) != MP_OKAY) {
2186 goto __RES;
2189 /* get next bit of the window */
2190 bitbuf <<= 1;
2191 if ((bitbuf & (1 << winsize)) != 0) {
2192 /* then multiply */
2193 if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
2194 goto __RES;
2196 if ((err = redux (&res, P, mp)) != MP_OKAY) {
2197 goto __RES;
2203 if (redmode == 0) {
2204 /* fixup result if Montgomery reduction is used
2205 * recall that any value in a Montgomery system is
2206 * actually multiplied by R mod n. So we have
2207 * to reduce one more time to cancel out the factor
2208 * of R.
2210 if ((err = redux(&res, P, mp)) != MP_OKAY) {
2211 goto __RES;
2215 /* swap res with Y */
2216 mp_exch (&res, Y);
2217 err = MP_OKAY;
2218 __RES:mp_clear (&res);
2219 __M:
2220 mp_clear(&M[1]);
2221 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
2222 mp_clear (&M[x]);
2224 return err;
2227 /* Greatest Common Divisor using the binary method */
2228 int mp_gcd (const mp_int * a, const mp_int * b, mp_int * c)
2230 mp_int u, v;
2231 int k, u_lsb, v_lsb, res;
2233 /* either zero than gcd is the largest */
2234 if (mp_iszero (a) == 1 && mp_iszero (b) == 0) {
2235 return mp_abs (b, c);
2237 if (mp_iszero (a) == 0 && mp_iszero (b) == 1) {
2238 return mp_abs (a, c);
2241 /* optimized. At this point if a == 0 then
2242 * b must equal zero too
2244 if (mp_iszero (a) == 1) {
2245 mp_zero(c);
2246 return MP_OKAY;
2249 /* get copies of a and b we can modify */
2250 if ((res = mp_init_copy (&u, a)) != MP_OKAY) {
2251 return res;
2254 if ((res = mp_init_copy (&v, b)) != MP_OKAY) {
2255 goto __U;
2258 /* must be positive for the remainder of the algorithm */
2259 u.sign = v.sign = MP_ZPOS;
2261 /* B1. Find the common power of two for u and v */
2262 u_lsb = mp_cnt_lsb(&u);
2263 v_lsb = mp_cnt_lsb(&v);
2264 k = MIN(u_lsb, v_lsb);
2266 if (k > 0) {
2267 /* divide the power of two out */
2268 if ((res = mp_div_2d(&u, k, &u, NULL)) != MP_OKAY) {
2269 goto __V;
2272 if ((res = mp_div_2d(&v, k, &v, NULL)) != MP_OKAY) {
2273 goto __V;
2277 /* divide any remaining factors of two out */
2278 if (u_lsb != k) {
2279 if ((res = mp_div_2d(&u, u_lsb - k, &u, NULL)) != MP_OKAY) {
2280 goto __V;
2284 if (v_lsb != k) {
2285 if ((res = mp_div_2d(&v, v_lsb - k, &v, NULL)) != MP_OKAY) {
2286 goto __V;
2290 while (mp_iszero(&v) == 0) {
2291 /* make sure v is the largest */
2292 if (mp_cmp_mag(&u, &v) == MP_GT) {
2293 /* swap u and v to make sure v is >= u */
2294 mp_exch(&u, &v);
2297 /* subtract smallest from largest */
2298 if ((res = s_mp_sub(&v, &u, &v)) != MP_OKAY) {
2299 goto __V;
2302 /* Divide out all factors of two */
2303 if ((res = mp_div_2d(&v, mp_cnt_lsb(&v), &v, NULL)) != MP_OKAY) {
2304 goto __V;
2308 /* multiply by 2**k which we divided out at the beginning */
2309 if ((res = mp_mul_2d (&u, k, c)) != MP_OKAY) {
2310 goto __V;
2312 c->sign = MP_ZPOS;
2313 res = MP_OKAY;
2314 __V:mp_clear (&u);
2315 __U:mp_clear (&v);
2316 return res;
2319 /* get the lower 32-bits of an mp_int */
2320 unsigned long mp_get_int(const mp_int * a)
2322 int i;
2323 unsigned long res;
2325 if (a->used == 0) {
2326 return 0;
2329 /* get number of digits of the lsb we have to read */
2330 i = MIN(a->used,(int)((sizeof(unsigned long)*CHAR_BIT+DIGIT_BIT-1)/DIGIT_BIT))-1;
2332 /* get most significant digit of result */
2333 res = DIGIT(a,i);
2335 while (--i >= 0) {
2336 res = (res << DIGIT_BIT) | DIGIT(a,i);
2339 /* force result to 32-bits always so it is consistent on non 32-bit platforms */
2340 return res & 0xFFFFFFFFUL;
2343 /* creates "a" then copies b into it */
2344 int mp_init_copy (mp_int * a, const mp_int * b)
2346 int res;
2348 if ((res = mp_init (a)) != MP_OKAY) {
2349 return res;
2351 return mp_copy (b, a);
2354 int mp_init_multi(mp_int *mp, ...)
2356 mp_err res = MP_OKAY; /* Assume ok until proven otherwise */
2357 int n = 0; /* Number of ok inits */
2358 mp_int* cur_arg = mp;
2359 va_list args;
2361 va_start(args, mp); /* init args to next argument from caller */
2362 while (cur_arg != NULL) {
2363 if (mp_init(cur_arg) != MP_OKAY) {
2364 /* Oops - error! Back-track and mp_clear what we already
2365 succeeded in init-ing, then return error.
2367 va_list clean_args;
2369 /* now start cleaning up */
2370 cur_arg = mp;
2371 va_start(clean_args, mp);
2372 while (n--) {
2373 mp_clear(cur_arg);
2374 cur_arg = va_arg(clean_args, mp_int*);
2376 va_end(clean_args);
2377 res = MP_MEM;
2378 break;
2380 n++;
2381 cur_arg = va_arg(args, mp_int*);
2383 va_end(args);
2384 return res; /* Assumed ok, if error flagged above. */
2387 /* hac 14.61, pp608 */
2388 int mp_invmod (const mp_int * a, mp_int * b, mp_int * c)
2390 /* b cannot be negative */
2391 if (b->sign == MP_NEG || mp_iszero(b) == 1) {
2392 return MP_VAL;
2395 /* if the modulus is odd we can use a faster routine instead */
2396 if (mp_isodd (b) == 1) {
2397 return fast_mp_invmod (a, b, c);
2400 return mp_invmod_slow(a, b, c);
2403 /* hac 14.61, pp608 */
2404 int mp_invmod_slow (const mp_int * a, mp_int * b, mp_int * c)
2406 mp_int x, y, u, v, A, B, C, D;
2407 int res;
2409 /* b cannot be negative */
2410 if (b->sign == MP_NEG || mp_iszero(b) == 1) {
2411 return MP_VAL;
2414 /* init temps */
2415 if ((res = mp_init_multi(&x, &y, &u, &v,
2416 &A, &B, &C, &D, NULL)) != MP_OKAY) {
2417 return res;
2420 /* x = a, y = b */
2421 if ((res = mp_copy (a, &x)) != MP_OKAY) {
2422 goto __ERR;
2424 if ((res = mp_copy (b, &y)) != MP_OKAY) {
2425 goto __ERR;
2428 /* 2. [modified] if x,y are both even then return an error! */
2429 if (mp_iseven (&x) == 1 && mp_iseven (&y) == 1) {
2430 res = MP_VAL;
2431 goto __ERR;
2434 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
2435 if ((res = mp_copy (&x, &u)) != MP_OKAY) {
2436 goto __ERR;
2438 if ((res = mp_copy (&y, &v)) != MP_OKAY) {
2439 goto __ERR;
2441 mp_set (&A, 1);
2442 mp_set (&D, 1);
2444 top:
2445 /* 4. while u is even do */
2446 while (mp_iseven (&u) == 1) {
2447 /* 4.1 u = u/2 */
2448 if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
2449 goto __ERR;
2451 /* 4.2 if A or B is odd then */
2452 if (mp_isodd (&A) == 1 || mp_isodd (&B) == 1) {
2453 /* A = (A+y)/2, B = (B-x)/2 */
2454 if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
2455 goto __ERR;
2457 if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
2458 goto __ERR;
2461 /* A = A/2, B = B/2 */
2462 if ((res = mp_div_2 (&A, &A)) != MP_OKAY) {
2463 goto __ERR;
2465 if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
2466 goto __ERR;
2470 /* 5. while v is even do */
2471 while (mp_iseven (&v) == 1) {
2472 /* 5.1 v = v/2 */
2473 if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
2474 goto __ERR;
2476 /* 5.2 if C or D is odd then */
2477 if (mp_isodd (&C) == 1 || mp_isodd (&D) == 1) {
2478 /* C = (C+y)/2, D = (D-x)/2 */
2479 if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
2480 goto __ERR;
2482 if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
2483 goto __ERR;
2486 /* C = C/2, D = D/2 */
2487 if ((res = mp_div_2 (&C, &C)) != MP_OKAY) {
2488 goto __ERR;
2490 if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
2491 goto __ERR;
2495 /* 6. if u >= v then */
2496 if (mp_cmp (&u, &v) != MP_LT) {
2497 /* u = u - v, A = A - C, B = B - D */
2498 if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
2499 goto __ERR;
2502 if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) {
2503 goto __ERR;
2506 if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
2507 goto __ERR;
2509 } else {
2510 /* v - v - u, C = C - A, D = D - B */
2511 if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
2512 goto __ERR;
2515 if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) {
2516 goto __ERR;
2519 if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
2520 goto __ERR;
2524 /* if not zero goto step 4 */
2525 if (mp_iszero (&u) == 0)
2526 goto top;
2528 /* now a = C, b = D, gcd == g*v */
2530 /* if v != 1 then there is no inverse */
2531 if (mp_cmp_d (&v, 1) != MP_EQ) {
2532 res = MP_VAL;
2533 goto __ERR;
2536 /* if it's too low */
2537 while (mp_cmp_d(&C, 0) == MP_LT) {
2538 if ((res = mp_add(&C, b, &C)) != MP_OKAY) {
2539 goto __ERR;
2543 /* too big */
2544 while (mp_cmp_mag(&C, b) != MP_LT) {
2545 if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {
2546 goto __ERR;
2550 /* C is now the inverse */
2551 mp_exch (&C, c);
2552 res = MP_OKAY;
2553 __ERR:mp_clear_multi (&x, &y, &u, &v, &A, &B, &C, &D, NULL);
2554 return res;
2557 /* c = |a| * |b| using Karatsuba Multiplication using
2558 * three half size multiplications
2560 * Let B represent the radix [e.g. 2**DIGIT_BIT] and
2561 * let n represent half of the number of digits in
2562 * the min(a,b)
2564 * a = a1 * B**n + a0
2565 * b = b1 * B**n + b0
2567 * Then, a * b =>
2568 a1b1 * B**2n + ((a1 - a0)(b1 - b0) + a0b0 + a1b1) * B + a0b0
2570 * Note that a1b1 and a0b0 are used twice and only need to be
2571 * computed once. So in total three half size (half # of
2572 * digit) multiplications are performed, a0b0, a1b1 and
2573 * (a1-b1)(a0-b0)
2575 * Note that a multiplication of half the digits requires
2576 * 1/4th the number of single precision multiplications so in
2577 * total after one call 25% of the single precision multiplications
2578 * are saved. Note also that the call to mp_mul can end up back
2579 * in this function if the a0, a1, b0, or b1 are above the threshold.
2580 * This is known as divide-and-conquer and leads to the famous
2581 * O(N**lg(3)) or O(N**1.584) work which is asymptotically lower than
2582 * the standard O(N**2) that the baseline/comba methods use.
2583 * Generally though the overhead of this method doesn't pay off
2584 * until a certain size (N ~ 80) is reached.
2586 int mp_karatsuba_mul (const mp_int * a, const mp_int * b, mp_int * c)
2588 mp_int x0, x1, y0, y1, t1, x0y0, x1y1;
2589 int B, err;
2591 /* default the return code to an error */
2592 err = MP_MEM;
2594 /* min # of digits */
2595 B = MIN (a->used, b->used);
2597 /* now divide in two */
2598 B = B >> 1;
2600 /* init copy all the temps */
2601 if (mp_init_size (&x0, B) != MP_OKAY)
2602 goto ERR;
2603 if (mp_init_size (&x1, a->used - B) != MP_OKAY)
2604 goto X0;
2605 if (mp_init_size (&y0, B) != MP_OKAY)
2606 goto X1;
2607 if (mp_init_size (&y1, b->used - B) != MP_OKAY)
2608 goto Y0;
2610 /* init temps */
2611 if (mp_init_size (&t1, B * 2) != MP_OKAY)
2612 goto Y1;
2613 if (mp_init_size (&x0y0, B * 2) != MP_OKAY)
2614 goto T1;
2615 if (mp_init_size (&x1y1, B * 2) != MP_OKAY)
2616 goto X0Y0;
2618 /* now shift the digits */
2619 x0.used = y0.used = B;
2620 x1.used = a->used - B;
2621 y1.used = b->used - B;
2624 register int x;
2625 register mp_digit *tmpa, *tmpb, *tmpx, *tmpy;
2627 /* we copy the digits directly instead of using higher level functions
2628 * since we also need to shift the digits
2630 tmpa = a->dp;
2631 tmpb = b->dp;
2633 tmpx = x0.dp;
2634 tmpy = y0.dp;
2635 for (x = 0; x < B; x++) {
2636 *tmpx++ = *tmpa++;
2637 *tmpy++ = *tmpb++;
2640 tmpx = x1.dp;
2641 for (x = B; x < a->used; x++) {
2642 *tmpx++ = *tmpa++;
2645 tmpy = y1.dp;
2646 for (x = B; x < b->used; x++) {
2647 *tmpy++ = *tmpb++;
2651 /* only need to clamp the lower words since by definition the
2652 * upper words x1/y1 must have a known number of digits
2654 mp_clamp (&x0);
2655 mp_clamp (&y0);
2657 /* now calc the products x0y0 and x1y1 */
2658 /* after this x0 is no longer required, free temp [x0==t2]! */
2659 if (mp_mul (&x0, &y0, &x0y0) != MP_OKAY)
2660 goto X1Y1; /* x0y0 = x0*y0 */
2661 if (mp_mul (&x1, &y1, &x1y1) != MP_OKAY)
2662 goto X1Y1; /* x1y1 = x1*y1 */
2664 /* now calc x1-x0 and y1-y0 */
2665 if (mp_sub (&x1, &x0, &t1) != MP_OKAY)
2666 goto X1Y1; /* t1 = x1 - x0 */
2667 if (mp_sub (&y1, &y0, &x0) != MP_OKAY)
2668 goto X1Y1; /* t2 = y1 - y0 */
2669 if (mp_mul (&t1, &x0, &t1) != MP_OKAY)
2670 goto X1Y1; /* t1 = (x1 - x0) * (y1 - y0) */
2672 /* add x0y0 */
2673 if (mp_add (&x0y0, &x1y1, &x0) != MP_OKAY)
2674 goto X1Y1; /* t2 = x0y0 + x1y1 */
2675 if (mp_sub (&x0, &t1, &t1) != MP_OKAY)
2676 goto X1Y1; /* t1 = x0y0 + x1y1 - (x1-x0)*(y1-y0) */
2678 /* shift by B */
2679 if (mp_lshd (&t1, B) != MP_OKAY)
2680 goto X1Y1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */
2681 if (mp_lshd (&x1y1, B * 2) != MP_OKAY)
2682 goto X1Y1; /* x1y1 = x1y1 << 2*B */
2684 if (mp_add (&x0y0, &t1, &t1) != MP_OKAY)
2685 goto X1Y1; /* t1 = x0y0 + t1 */
2686 if (mp_add (&t1, &x1y1, c) != MP_OKAY)
2687 goto X1Y1; /* t1 = x0y0 + t1 + x1y1 */
2689 /* Algorithm succeeded set the return code to MP_OKAY */
2690 err = MP_OKAY;
2692 X1Y1:mp_clear (&x1y1);
2693 X0Y0:mp_clear (&x0y0);
2694 T1:mp_clear (&t1);
2695 Y1:mp_clear (&y1);
2696 Y0:mp_clear (&y0);
2697 X1:mp_clear (&x1);
2698 X0:mp_clear (&x0);
2699 ERR:
2700 return err;
2703 /* Karatsuba squaring, computes b = a*a using three
2704 * half size squarings
2706 * See comments of karatsuba_mul for details. It
2707 * is essentially the same algorithm but merely
2708 * tuned to perform recursive squarings.
2710 int mp_karatsuba_sqr (const mp_int * a, mp_int * b)
2712 mp_int x0, x1, t1, t2, x0x0, x1x1;
2713 int B, err;
2715 err = MP_MEM;
2717 /* min # of digits */
2718 B = a->used;
2720 /* now divide in two */
2721 B = B >> 1;
2723 /* init copy all the temps */
2724 if (mp_init_size (&x0, B) != MP_OKAY)
2725 goto ERR;
2726 if (mp_init_size (&x1, a->used - B) != MP_OKAY)
2727 goto X0;
2729 /* init temps */
2730 if (mp_init_size (&t1, a->used * 2) != MP_OKAY)
2731 goto X1;
2732 if (mp_init_size (&t2, a->used * 2) != MP_OKAY)
2733 goto T1;
2734 if (mp_init_size (&x0x0, B * 2) != MP_OKAY)
2735 goto T2;
2736 if (mp_init_size (&x1x1, (a->used - B) * 2) != MP_OKAY)
2737 goto X0X0;
2740 register int x;
2741 register mp_digit *dst, *src;
2743 src = a->dp;
2745 /* now shift the digits */
2746 dst = x0.dp;
2747 for (x = 0; x < B; x++) {
2748 *dst++ = *src++;
2751 dst = x1.dp;
2752 for (x = B; x < a->used; x++) {
2753 *dst++ = *src++;
2757 x0.used = B;
2758 x1.used = a->used - B;
2760 mp_clamp (&x0);
2762 /* now calc the products x0*x0 and x1*x1 */
2763 if (mp_sqr (&x0, &x0x0) != MP_OKAY)
2764 goto X1X1; /* x0x0 = x0*x0 */
2765 if (mp_sqr (&x1, &x1x1) != MP_OKAY)
2766 goto X1X1; /* x1x1 = x1*x1 */
2768 /* now calc (x1-x0)**2 */
2769 if (mp_sub (&x1, &x0, &t1) != MP_OKAY)
2770 goto X1X1; /* t1 = x1 - x0 */
2771 if (mp_sqr (&t1, &t1) != MP_OKAY)
2772 goto X1X1; /* t1 = (x1 - x0) * (x1 - x0) */
2774 /* add x0y0 */
2775 if (s_mp_add (&x0x0, &x1x1, &t2) != MP_OKAY)
2776 goto X1X1; /* t2 = x0x0 + x1x1 */
2777 if (mp_sub (&t2, &t1, &t1) != MP_OKAY)
2778 goto X1X1; /* t1 = x0x0 + x1x1 - (x1-x0)*(x1-x0) */
2780 /* shift by B */
2781 if (mp_lshd (&t1, B) != MP_OKAY)
2782 goto X1X1; /* t1 = (x0x0 + x1x1 - (x1-x0)*(x1-x0))<<B */
2783 if (mp_lshd (&x1x1, B * 2) != MP_OKAY)
2784 goto X1X1; /* x1x1 = x1x1 << 2*B */
2786 if (mp_add (&x0x0, &t1, &t1) != MP_OKAY)
2787 goto X1X1; /* t1 = x0x0 + t1 */
2788 if (mp_add (&t1, &x1x1, b) != MP_OKAY)
2789 goto X1X1; /* t1 = x0x0 + t1 + x1x1 */
2791 err = MP_OKAY;
2793 X1X1:mp_clear (&x1x1);
2794 X0X0:mp_clear (&x0x0);
2795 T2:mp_clear (&t2);
2796 T1:mp_clear (&t1);
2797 X1:mp_clear (&x1);
2798 X0:mp_clear (&x0);
2799 ERR:
2800 return err;
2803 /* computes least common multiple as |a*b|/(a, b) */
2804 int mp_lcm (const mp_int * a, const mp_int * b, mp_int * c)
2806 int res;
2807 mp_int t1, t2;
2810 if ((res = mp_init_multi (&t1, &t2, NULL)) != MP_OKAY) {
2811 return res;
2814 /* t1 = get the GCD of the two inputs */
2815 if ((res = mp_gcd (a, b, &t1)) != MP_OKAY) {
2816 goto __T;
2819 /* divide the smallest by the GCD */
2820 if (mp_cmp_mag(a, b) == MP_LT) {
2821 /* store quotient in t2 so that t2 * b is the LCM */
2822 if ((res = mp_div(a, &t1, &t2, NULL)) != MP_OKAY) {
2823 goto __T;
2825 res = mp_mul(b, &t2, c);
2826 } else {
2827 /* store quotient in t2 so that t2 * a is the LCM */
2828 if ((res = mp_div(b, &t1, &t2, NULL)) != MP_OKAY) {
2829 goto __T;
2831 res = mp_mul(a, &t2, c);
2834 /* fix the sign to positive */
2835 c->sign = MP_ZPOS;
2837 __T:
2838 mp_clear_multi (&t1, &t2, NULL);
2839 return res;
2842 /* c = a mod b, 0 <= c < b */
2844 mp_mod (const mp_int * a, mp_int * b, mp_int * c)
2846 mp_int t;
2847 int res;
2849 if ((res = mp_init (&t)) != MP_OKAY) {
2850 return res;
2853 if ((res = mp_div (a, b, NULL, &t)) != MP_OKAY) {
2854 mp_clear (&t);
2855 return res;
2858 if (t.sign != b->sign) {
2859 res = mp_add (b, &t, c);
2860 } else {
2861 res = MP_OKAY;
2862 mp_exch (&t, c);
2865 mp_clear (&t);
2866 return res;
2869 static int
2870 mp_mod_d (const mp_int * a, mp_digit b, mp_digit * c)
2872 return mp_div_d(a, b, NULL, c);
2875 /* b = a*2 */
2876 static int mp_mul_2(const mp_int * a, mp_int * b)
2878 int x, res, oldused;
2880 /* grow to accommodate result */
2881 if (b->alloc < a->used + 1) {
2882 if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) {
2883 return res;
2887 oldused = b->used;
2888 b->used = a->used;
2891 register mp_digit r, rr, *tmpa, *tmpb;
2893 /* alias for source */
2894 tmpa = a->dp;
2896 /* alias for dest */
2897 tmpb = b->dp;
2899 /* carry */
2900 r = 0;
2901 for (x = 0; x < a->used; x++) {
2903 /* get what will be the *next* carry bit from the
2904 * MSB of the current digit
2906 rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1));
2908 /* now shift up this digit, add in the carry [from the previous] */
2909 *tmpb++ = ((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK;
2911 /* copy the carry that would be from the source
2912 * digit into the next iteration
2914 r = rr;
2917 /* new leading digit? */
2918 if (r != 0) {
2919 /* add a MSB which is always 1 at this point */
2920 *tmpb = 1;
2921 ++(b->used);
2924 /* now zero any excess digits on the destination
2925 * that we didn't write to
2927 tmpb = b->dp + b->used;
2928 for (x = b->used; x < oldused; x++) {
2929 *tmpb++ = 0;
2932 b->sign = a->sign;
2933 return MP_OKAY;
2937 * shifts with subtractions when the result is greater than b.
2939 * The method is slightly modified to shift B unconditionally up to just under
2940 * the leading bit of b. This saves a lot of multiple precision shifting.
2942 int mp_montgomery_calc_normalization (mp_int * a, const mp_int * b)
2944 int x, bits, res;
2946 /* how many bits of last digit does b use */
2947 bits = mp_count_bits (b) % DIGIT_BIT;
2950 if (b->used > 1) {
2951 if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) {
2952 return res;
2954 } else {
2955 mp_set(a, 1);
2956 bits = 1;
2960 /* now compute C = A * B mod b */
2961 for (x = bits - 1; x < DIGIT_BIT; x++) {
2962 if ((res = mp_mul_2 (a, a)) != MP_OKAY) {
2963 return res;
2965 if (mp_cmp_mag (a, b) != MP_LT) {
2966 if ((res = s_mp_sub (a, b, a)) != MP_OKAY) {
2967 return res;
2972 return MP_OKAY;
2975 /* computes xR**-1 == x (mod N) via Montgomery Reduction */
2977 mp_montgomery_reduce (mp_int * x, const mp_int * n, mp_digit rho)
2979 int ix, res, digs;
2980 mp_digit mu;
2982 /* can the fast reduction [comba] method be used?
2984 * Note that unlike in mul you're safely allowed *less*
2985 * than the available columns [255 per default] since carries
2986 * are fixed up in the inner loop.
2988 digs = n->used * 2 + 1;
2989 if ((digs < MP_WARRAY) &&
2990 n->used <
2991 (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
2992 return fast_mp_montgomery_reduce (x, n, rho);
2995 /* grow the input as required */
2996 if (x->alloc < digs) {
2997 if ((res = mp_grow (x, digs)) != MP_OKAY) {
2998 return res;
3001 x->used = digs;
3003 for (ix = 0; ix < n->used; ix++) {
3004 /* mu = ai * rho mod b
3006 * The value of rho must be precalculated via
3007 * montgomery_setup() such that
3008 * it equals -1/n0 mod b this allows the
3009 * following inner loop to reduce the
3010 * input one digit at a time
3012 mu = (mp_digit) (((mp_word)x->dp[ix]) * ((mp_word)rho) & MP_MASK);
3014 /* a = a + mu * m * b**i */
3016 register int iy;
3017 register mp_digit *tmpn, *tmpx, u;
3018 register mp_word r;
3020 /* alias for digits of the modulus */
3021 tmpn = n->dp;
3023 /* alias for the digits of x [the input] */
3024 tmpx = x->dp + ix;
3026 /* set the carry to zero */
3027 u = 0;
3029 /* Multiply and add in place */
3030 for (iy = 0; iy < n->used; iy++) {
3031 /* compute product and sum */
3032 r = ((mp_word)mu) * ((mp_word)*tmpn++) +
3033 ((mp_word) u) + ((mp_word) * tmpx);
3035 /* get carry */
3036 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
3038 /* fix digit */
3039 *tmpx++ = (mp_digit)(r & ((mp_word) MP_MASK));
3041 /* At this point the ix'th digit of x should be zero */
3044 /* propagate carries upwards as required*/
3045 while (u) {
3046 *tmpx += u;
3047 u = *tmpx >> DIGIT_BIT;
3048 *tmpx++ &= MP_MASK;
3053 /* at this point the n.used'th least
3054 * significant digits of x are all zero
3055 * which means we can shift x to the
3056 * right by n.used digits and the
3057 * residue is unchanged.
3060 /* x = x/b**n.used */
3061 mp_clamp(x);
3062 mp_rshd (x, n->used);
3064 /* if x >= n then x = x - n */
3065 if (mp_cmp_mag (x, n) != MP_LT) {
3066 return s_mp_sub (x, n, x);
3069 return MP_OKAY;
3072 /* setups the montgomery reduction stuff */
3074 mp_montgomery_setup (const mp_int * n, mp_digit * rho)
3076 mp_digit x, b;
3078 /* fast inversion mod 2**k
3080 * Based on the fact that
3082 * XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n)
3083 * => 2*X*A - X*X*A*A = 1
3084 * => 2*(1) - (1) = 1
3086 b = n->dp[0];
3088 if ((b & 1) == 0) {
3089 return MP_VAL;
3092 x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
3093 x *= 2 - b * x; /* here x*a==1 mod 2**8 */
3094 x *= 2 - b * x; /* here x*a==1 mod 2**16 */
3095 x *= 2 - b * x; /* here x*a==1 mod 2**32 */
3097 /* rho = -1/m mod b */
3098 *rho = (((mp_word)1 << ((mp_word) DIGIT_BIT)) - x) & MP_MASK;
3100 return MP_OKAY;
3103 /* high level multiplication (handles sign) */
3104 int mp_mul (const mp_int * a, const mp_int * b, mp_int * c)
3106 int res, neg;
3107 neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
3109 /* use Karatsuba? */
3110 if (MIN (a->used, b->used) >= KARATSUBA_MUL_CUTOFF) {
3111 res = mp_karatsuba_mul (a, b, c);
3112 } else
3114 /* can we use the fast multiplier?
3116 * The fast multiplier can be used if the output will
3117 * have less than MP_WARRAY digits and the number of
3118 * digits won't affect carry propagation
3120 int digs = a->used + b->used + 1;
3122 if ((digs < MP_WARRAY) &&
3123 MIN(a->used, b->used) <=
3124 (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
3125 res = fast_s_mp_mul_digs (a, b, c, digs);
3126 } else
3127 res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */
3129 c->sign = (c->used > 0) ? neg : MP_ZPOS;
3130 return res;
3133 /* d = a * b (mod c) */
3135 mp_mulmod (const mp_int * a, const mp_int * b, mp_int * c, mp_int * d)
3137 int res;
3138 mp_int t;
3140 if ((res = mp_init (&t)) != MP_OKAY) {
3141 return res;
3144 if ((res = mp_mul (a, b, &t)) != MP_OKAY) {
3145 mp_clear (&t);
3146 return res;
3148 res = mp_mod (&t, c, d);
3149 mp_clear (&t);
3150 return res;
3153 /* table of first PRIME_SIZE primes */
3154 static const mp_digit __prime_tab[] = {
3155 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
3156 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
3157 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
3158 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083,
3159 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
3160 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
3161 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
3162 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
3164 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
3165 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
3166 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
3167 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
3168 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
3169 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
3170 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
3171 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
3173 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
3174 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
3175 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
3176 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
3177 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
3178 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
3179 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
3180 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
3182 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
3183 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
3184 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
3185 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
3186 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
3187 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
3188 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
3189 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
3192 /* determines if an integers is divisible by one
3193 * of the first PRIME_SIZE primes or not
3195 * sets result to 0 if not, 1 if yes
3197 static int mp_prime_is_divisible (const mp_int * a, int *result)
3199 int err, ix;
3200 mp_digit res;
3202 /* default to not */
3203 *result = MP_NO;
3205 for (ix = 0; ix < PRIME_SIZE; ix++) {
3206 /* what is a mod __prime_tab[ix] */
3207 if ((err = mp_mod_d (a, __prime_tab[ix], &res)) != MP_OKAY) {
3208 return err;
3211 /* is the residue zero? */
3212 if (res == 0) {
3213 *result = MP_YES;
3214 return MP_OKAY;
3218 return MP_OKAY;
3221 /* Miller-Rabin test of "a" to the base of "b" as described in
3222 * HAC pp. 139 Algorithm 4.24
3224 * Sets result to 0 if definitely composite or 1 if probably prime.
3225 * Randomly the chance of error is no more than 1/4 and often
3226 * very much lower.
3228 static int mp_prime_miller_rabin (mp_int * a, const mp_int * b, int *result)
3230 mp_int n1, y, r;
3231 int s, j, err;
3233 /* default */
3234 *result = MP_NO;
3236 /* ensure b > 1 */
3237 if (mp_cmp_d(b, 1) != MP_GT) {
3238 return MP_VAL;
3241 /* get n1 = a - 1 */
3242 if ((err = mp_init_copy (&n1, a)) != MP_OKAY) {
3243 return err;
3245 if ((err = mp_sub_d (&n1, 1, &n1)) != MP_OKAY) {
3246 goto __N1;
3249 /* set 2**s * r = n1 */
3250 if ((err = mp_init_copy (&r, &n1)) != MP_OKAY) {
3251 goto __N1;
3254 /* count the number of least significant bits
3255 * which are zero
3257 s = mp_cnt_lsb(&r);
3259 /* now divide n - 1 by 2**s */
3260 if ((err = mp_div_2d (&r, s, &r, NULL)) != MP_OKAY) {
3261 goto __R;
3264 /* compute y = b**r mod a */
3265 if ((err = mp_init (&y)) != MP_OKAY) {
3266 goto __R;
3268 if ((err = mp_exptmod (b, &r, a, &y)) != MP_OKAY) {
3269 goto __Y;
3272 /* if y != 1 and y != n1 do */
3273 if (mp_cmp_d (&y, 1) != MP_EQ && mp_cmp (&y, &n1) != MP_EQ) {
3274 j = 1;
3275 /* while j <= s-1 and y != n1 */
3276 while ((j <= (s - 1)) && mp_cmp (&y, &n1) != MP_EQ) {
3277 if ((err = mp_sqrmod (&y, a, &y)) != MP_OKAY) {
3278 goto __Y;
3281 /* if y == 1 then composite */
3282 if (mp_cmp_d (&y, 1) == MP_EQ) {
3283 goto __Y;
3286 ++j;
3289 /* if y != n1 then composite */
3290 if (mp_cmp (&y, &n1) != MP_EQ) {
3291 goto __Y;
3295 /* probably prime now */
3296 *result = MP_YES;
3297 __Y:mp_clear (&y);
3298 __R:mp_clear (&r);
3299 __N1:mp_clear (&n1);
3300 return err;
3303 /* performs a variable number of rounds of Miller-Rabin
3305 * Probability of error after t rounds is no more than
3308 * Sets result to 1 if probably prime, 0 otherwise
3310 static int mp_prime_is_prime (mp_int * a, int t, int *result)
3312 mp_int b;
3313 int ix, err, res;
3315 /* default to no */
3316 *result = MP_NO;
3318 /* valid value of t? */
3319 if (t <= 0 || t > PRIME_SIZE) {
3320 return MP_VAL;
3323 /* is the input equal to one of the primes in the table? */
3324 for (ix = 0; ix < PRIME_SIZE; ix++) {
3325 if (mp_cmp_d(a, __prime_tab[ix]) == MP_EQ) {
3326 *result = 1;
3327 return MP_OKAY;
3331 /* first perform trial division */
3332 if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) {
3333 return err;
3336 /* return if it was trivially divisible */
3337 if (res == MP_YES) {
3338 return MP_OKAY;
3341 /* now perform the miller-rabin rounds */
3342 if ((err = mp_init (&b)) != MP_OKAY) {
3343 return err;
3346 for (ix = 0; ix < t; ix++) {
3347 /* set the prime */
3348 mp_set (&b, __prime_tab[ix]);
3350 if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) {
3351 goto __B;
3354 if (res == MP_NO) {
3355 goto __B;
3359 /* passed the test */
3360 *result = MP_YES;
3361 __B:mp_clear (&b);
3362 return err;
3365 static const struct {
3366 int k, t;
3367 } sizes[] = {
3368 { 128, 28 },
3369 { 256, 16 },
3370 { 384, 10 },
3371 { 512, 7 },
3372 { 640, 6 },
3373 { 768, 5 },
3374 { 896, 4 },
3375 { 1024, 4 }
3378 /* returns # of RM trials required for a given bit size */
3379 int mp_prime_rabin_miller_trials(int size)
3381 int x;
3383 for (x = 0; x < (int)(sizeof(sizes)/(sizeof(sizes[0]))); x++) {
3384 if (sizes[x].k == size) {
3385 return sizes[x].t;
3386 } else if (sizes[x].k > size) {
3387 return (x == 0) ? sizes[0].t : sizes[x - 1].t;
3390 return sizes[x-1].t + 1;
3393 /* makes a truly random prime of a given size (bits),
3395 * Flags are as follows:
3397 * LTM_PRIME_BBS - make prime congruent to 3 mod 4
3398 * LTM_PRIME_SAFE - make sure (p-1)/2 is prime as well (implies LTM_PRIME_BBS)
3399 * LTM_PRIME_2MSB_OFF - make the 2nd highest bit zero
3400 * LTM_PRIME_2MSB_ON - make the 2nd highest bit one
3402 * You have to supply a callback which fills in a buffer with random bytes. "dat" is a parameter you can
3403 * have passed to the callback (e.g. a state or something). This function doesn't use "dat" itself
3404 * so it can be NULL
3408 /* This is possibly the mother of all prime generation functions, muahahahahaha! */
3409 int mp_prime_random_ex(mp_int *a, int t, int size, int flags, ltm_prime_callback cb, void *dat)
3411 unsigned char *tmp, maskAND, maskOR_msb, maskOR_lsb;
3412 int res, err, bsize, maskOR_msb_offset;
3414 /* sanity check the input */
3415 if (size <= 1 || t <= 0) {
3416 return MP_VAL;
3419 /* LTM_PRIME_SAFE implies LTM_PRIME_BBS */
3420 if (flags & LTM_PRIME_SAFE) {
3421 flags |= LTM_PRIME_BBS;
3424 /* calc the byte size */
3425 bsize = (size>>3)+((size&7)?1:0);
3427 /* we need a buffer of bsize bytes */
3428 tmp = HeapAlloc(GetProcessHeap(), 0, bsize);
3429 if (tmp == NULL) {
3430 return MP_MEM;
3433 /* calc the maskAND value for the MSbyte*/
3434 maskAND = ((size&7) == 0) ? 0xFF : (0xFF >> (8 - (size & 7)));
3436 /* calc the maskOR_msb */
3437 maskOR_msb = 0;
3438 maskOR_msb_offset = ((size & 7) == 1) ? 1 : 0;
3439 if (flags & LTM_PRIME_2MSB_ON) {
3440 maskOR_msb |= 1 << ((size - 2) & 7);
3441 } else if (flags & LTM_PRIME_2MSB_OFF) {
3442 maskAND &= ~(1 << ((size - 2) & 7));
3445 /* get the maskOR_lsb */
3446 maskOR_lsb = 0;
3447 if (flags & LTM_PRIME_BBS) {
3448 maskOR_lsb |= 3;
3451 do {
3452 /* read the bytes */
3453 if (cb(tmp, bsize, dat) != bsize) {
3454 err = MP_VAL;
3455 goto error;
3458 /* work over the MSbyte */
3459 tmp[0] &= maskAND;
3460 tmp[0] |= 1 << ((size - 1) & 7);
3462 /* mix in the maskORs */
3463 tmp[maskOR_msb_offset] |= maskOR_msb;
3464 tmp[bsize-1] |= maskOR_lsb;
3466 /* read it in */
3467 if ((err = mp_read_unsigned_bin(a, tmp, bsize)) != MP_OKAY) { goto error; }
3469 /* is it prime? */
3470 if ((err = mp_prime_is_prime(a, t, &res)) != MP_OKAY) { goto error; }
3471 if (res == MP_NO) {
3472 continue;
3475 if (flags & LTM_PRIME_SAFE) {
3476 /* see if (a-1)/2 is prime */
3477 if ((err = mp_sub_d(a, 1, a)) != MP_OKAY) { goto error; }
3478 if ((err = mp_div_2(a, a)) != MP_OKAY) { goto error; }
3480 /* is it prime? */
3481 if ((err = mp_prime_is_prime(a, t, &res)) != MP_OKAY) { goto error; }
3483 } while (res == MP_NO);
3485 if (flags & LTM_PRIME_SAFE) {
3486 /* restore a to the original value */
3487 if ((err = mp_mul_2(a, a)) != MP_OKAY) { goto error; }
3488 if ((err = mp_add_d(a, 1, a)) != MP_OKAY) { goto error; }
3491 err = MP_OKAY;
3492 error:
3493 HeapFree(GetProcessHeap(), 0, tmp);
3494 return err;
3497 /* reads an unsigned char array, assumes the msb is stored first [big endian] */
3499 mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
3501 int res;
3503 /* make sure there are at least two digits */
3504 if (a->alloc < 2) {
3505 if ((res = mp_grow(a, 2)) != MP_OKAY) {
3506 return res;
3510 /* zero the int */
3511 mp_zero (a);
3513 /* read the bytes in */
3514 while (c-- > 0) {
3515 if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) {
3516 return res;
3519 a->dp[0] |= *b++;
3520 a->used += 1;
3522 mp_clamp (a);
3523 return MP_OKAY;
3526 /* reduces x mod m, assumes 0 < x < m**2, mu is
3527 * precomputed via mp_reduce_setup.
3528 * From HAC pp.604 Algorithm 14.42
3531 mp_reduce (mp_int * x, const mp_int * m, const mp_int * mu)
3533 mp_int q;
3534 int res, um = m->used;
3536 /* q = x */
3537 if ((res = mp_init_copy (&q, x)) != MP_OKAY) {
3538 return res;
3541 /* q1 = x / b**(k-1) */
3542 mp_rshd (&q, um - 1);
3544 /* according to HAC this optimization is ok */
3545 if (((unsigned long) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) {
3546 if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) {
3547 goto CLEANUP;
3549 } else {
3550 if ((res = s_mp_mul_high_digs (&q, mu, &q, um - 1)) != MP_OKAY) {
3551 goto CLEANUP;
3555 /* q3 = q2 / b**(k+1) */
3556 mp_rshd (&q, um + 1);
3558 /* x = x mod b**(k+1), quick (no division) */
3559 if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
3560 goto CLEANUP;
3563 /* q = q * m mod b**(k+1), quick (no division) */
3564 if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) {
3565 goto CLEANUP;
3568 /* x = x - q */
3569 if ((res = mp_sub (x, &q, x)) != MP_OKAY) {
3570 goto CLEANUP;
3573 /* If x < 0, add b**(k+1) to it */
3574 if (mp_cmp_d (x, 0) == MP_LT) {
3575 mp_set (&q, 1);
3576 if ((res = mp_lshd (&q, um + 1)) != MP_OKAY)
3577 goto CLEANUP;
3578 if ((res = mp_add (x, &q, x)) != MP_OKAY)
3579 goto CLEANUP;
3582 /* Back off if it's too big */
3583 while (mp_cmp (x, m) != MP_LT) {
3584 if ((res = s_mp_sub (x, m, x)) != MP_OKAY) {
3585 goto CLEANUP;
3589 CLEANUP:
3590 mp_clear (&q);
3592 return res;
3595 /* reduces a modulo n where n is of the form 2**p - d */
3597 mp_reduce_2k(mp_int *a, const mp_int *n, mp_digit d)
3599 mp_int q;
3600 int p, res;
3602 if ((res = mp_init(&q)) != MP_OKAY) {
3603 return res;
3606 p = mp_count_bits(n);
3607 top:
3608 /* q = a/2**p, a = a mod 2**p */
3609 if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
3610 goto ERR;
3613 if (d != 1) {
3614 /* q = q * d */
3615 if ((res = mp_mul_d(&q, d, &q)) != MP_OKAY) {
3616 goto ERR;
3620 /* a = a + q */
3621 if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
3622 goto ERR;
3625 if (mp_cmp_mag(a, n) != MP_LT) {
3626 s_mp_sub(a, n, a);
3627 goto top;
3630 ERR:
3631 mp_clear(&q);
3632 return res;
3635 /* determines the setup value */
3636 static int
3637 mp_reduce_2k_setup(const mp_int *a, mp_digit *d)
3639 int res, p;
3640 mp_int tmp;
3642 if ((res = mp_init(&tmp)) != MP_OKAY) {
3643 return res;
3646 p = mp_count_bits(a);
3647 if ((res = mp_2expt(&tmp, p)) != MP_OKAY) {
3648 mp_clear(&tmp);
3649 return res;
3652 if ((res = s_mp_sub(&tmp, a, &tmp)) != MP_OKAY) {
3653 mp_clear(&tmp);
3654 return res;
3657 *d = tmp.dp[0];
3658 mp_clear(&tmp);
3659 return MP_OKAY;
3662 /* pre-calculate the value required for Barrett reduction
3663 * For a given modulus "b" it calculates the value required in "a"
3665 int mp_reduce_setup (mp_int * a, const mp_int * b)
3667 int res;
3669 if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
3670 return res;
3672 return mp_div (a, b, a, NULL);
3675 /* set to a digit */
3676 void mp_set (mp_int * a, mp_digit b)
3678 mp_zero (a);
3679 a->dp[0] = b & MP_MASK;
3680 a->used = (a->dp[0] != 0) ? 1 : 0;
3683 /* set a 32-bit const */
3684 int mp_set_int (mp_int * a, unsigned long b)
3686 int x, res;
3688 mp_zero (a);
3690 /* set four bits at a time */
3691 for (x = 0; x < 8; x++) {
3692 /* shift the number up four bits */
3693 if ((res = mp_mul_2d (a, 4, a)) != MP_OKAY) {
3694 return res;
3697 /* OR in the top four bits of the source */
3698 a->dp[0] |= (b >> 28) & 15;
3700 /* shift the source up to the next four bits */
3701 b <<= 4;
3703 /* ensure that digits are not clamped off */
3704 a->used += 1;
3706 mp_clamp (a);
3707 return MP_OKAY;
3710 /* shrink a bignum */
3711 int mp_shrink (mp_int * a)
3713 mp_digit *tmp;
3714 if (a->alloc != a->used && a->used > 0) {
3715 if ((tmp = HeapReAlloc(GetProcessHeap(), 0, a->dp, sizeof (mp_digit) * a->used)) == NULL) {
3716 return MP_MEM;
3718 a->dp = tmp;
3719 a->alloc = a->used;
3721 return MP_OKAY;
3724 /* computes b = a*a */
3726 mp_sqr (const mp_int * a, mp_int * b)
3728 int res;
3730 if (a->used >= KARATSUBA_SQR_CUTOFF) {
3731 res = mp_karatsuba_sqr (a, b);
3732 } else
3734 /* can we use the fast comba multiplier? */
3735 if ((a->used * 2 + 1) < MP_WARRAY &&
3736 a->used <
3737 (1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
3738 res = fast_s_mp_sqr (a, b);
3739 } else
3740 res = s_mp_sqr (a, b);
3742 b->sign = MP_ZPOS;
3743 return res;
3746 /* c = a * a (mod b) */
3748 mp_sqrmod (const mp_int * a, mp_int * b, mp_int * c)
3750 int res;
3751 mp_int t;
3753 if ((res = mp_init (&t)) != MP_OKAY) {
3754 return res;
3757 if ((res = mp_sqr (a, &t)) != MP_OKAY) {
3758 mp_clear (&t);
3759 return res;
3761 res = mp_mod (&t, b, c);
3762 mp_clear (&t);
3763 return res;
3766 /* high level subtraction (handles signs) */
3768 mp_sub (mp_int * a, mp_int * b, mp_int * c)
3770 int sa, sb, res;
3772 sa = a->sign;
3773 sb = b->sign;
3775 if (sa != sb) {
3776 /* subtract a negative from a positive, OR */
3777 /* subtract a positive from a negative. */
3778 /* In either case, ADD their magnitudes, */
3779 /* and use the sign of the first number. */
3780 c->sign = sa;
3781 res = s_mp_add (a, b, c);
3782 } else {
3783 /* subtract a positive from a positive, OR */
3784 /* subtract a negative from a negative. */
3785 /* First, take the difference between their */
3786 /* magnitudes, then... */
3787 if (mp_cmp_mag (a, b) != MP_LT) {
3788 /* Copy the sign from the first */
3789 c->sign = sa;
3790 /* The first has a larger or equal magnitude */
3791 res = s_mp_sub (a, b, c);
3792 } else {
3793 /* The result has the *opposite* sign from */
3794 /* the first number. */
3795 c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS;
3796 /* The second has a larger magnitude */
3797 res = s_mp_sub (b, a, c);
3800 return res;
3803 /* single digit subtraction */
3805 mp_sub_d (mp_int * a, mp_digit b, mp_int * c)
3807 mp_digit *tmpa, *tmpc, mu;
3808 int res, ix, oldused;
3810 /* grow c as required */
3811 if (c->alloc < a->used + 1) {
3812 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
3813 return res;
3817 /* if a is negative just do an unsigned
3818 * addition [with fudged signs]
3820 if (a->sign == MP_NEG) {
3821 a->sign = MP_ZPOS;
3822 res = mp_add_d(a, b, c);
3823 a->sign = c->sign = MP_NEG;
3824 return res;
3827 /* setup regs */
3828 oldused = c->used;
3829 tmpa = a->dp;
3830 tmpc = c->dp;
3832 /* if a <= b simply fix the single digit */
3833 if ((a->used == 1 && a->dp[0] <= b) || a->used == 0) {
3834 if (a->used == 1) {
3835 *tmpc++ = b - *tmpa;
3836 } else {
3837 *tmpc++ = b;
3839 ix = 1;
3841 /* negative/1digit */
3842 c->sign = MP_NEG;
3843 c->used = 1;
3844 } else {
3845 /* positive/size */
3846 c->sign = MP_ZPOS;
3847 c->used = a->used;
3849 /* subtract first digit */
3850 *tmpc = *tmpa++ - b;
3851 mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
3852 *tmpc++ &= MP_MASK;
3854 /* handle rest of the digits */
3855 for (ix = 1; ix < a->used; ix++) {
3856 *tmpc = *tmpa++ - mu;
3857 mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
3858 *tmpc++ &= MP_MASK;
3862 /* zero excess digits */
3863 while (ix++ < oldused) {
3864 *tmpc++ = 0;
3866 mp_clamp(c);
3867 return MP_OKAY;
3870 /* store in unsigned [big endian] format */
3872 mp_to_unsigned_bin (const mp_int * a, unsigned char *b)
3874 int x, res;
3875 mp_int t;
3877 if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
3878 return res;
3881 x = 0;
3882 while (mp_iszero (&t) == 0) {
3883 b[x++] = (unsigned char) (t.dp[0] & 255);
3884 if ((res = mp_div_2d (&t, 8, &t, NULL)) != MP_OKAY) {
3885 mp_clear (&t);
3886 return res;
3889 bn_reverse (b, x);
3890 mp_clear (&t);
3891 return MP_OKAY;
3894 /* get the size for an unsigned equivalent */
3896 mp_unsigned_bin_size (const mp_int * a)
3898 int size = mp_count_bits (a);
3899 return (size / 8 + ((size & 7) != 0 ? 1 : 0));
3902 /* reverse an array, used for radix code */
3903 static void
3904 bn_reverse (unsigned char *s, int len)
3906 int ix, iy;
3907 unsigned char t;
3909 ix = 0;
3910 iy = len - 1;
3911 while (ix < iy) {
3912 t = s[ix];
3913 s[ix] = s[iy];
3914 s[iy] = t;
3915 ++ix;
3916 --iy;
3920 /* low level addition, based on HAC pp.594, Algorithm 14.7 */
3921 static int
3922 s_mp_add (mp_int * a, mp_int * b, mp_int * c)
3924 mp_int *x;
3925 int olduse, res, min, max;
3927 /* find sizes, we let |a| <= |b| which means we have to sort
3928 * them. "x" will point to the input with the most digits
3930 if (a->used > b->used) {
3931 min = b->used;
3932 max = a->used;
3933 x = a;
3934 } else {
3935 min = a->used;
3936 max = b->used;
3937 x = b;
3940 /* init result */
3941 if (c->alloc < max + 1) {
3942 if ((res = mp_grow (c, max + 1)) != MP_OKAY) {
3943 return res;
3947 /* get old used digit count and set new one */
3948 olduse = c->used;
3949 c->used = max + 1;
3952 register mp_digit u, *tmpa, *tmpb, *tmpc;
3953 register int i;
3955 /* alias for digit pointers */
3957 /* first input */
3958 tmpa = a->dp;
3960 /* second input */
3961 tmpb = b->dp;
3963 /* destination */
3964 tmpc = c->dp;
3966 /* zero the carry */
3967 u = 0;
3968 for (i = 0; i < min; i++) {
3969 /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
3970 *tmpc = *tmpa++ + *tmpb++ + u;
3972 /* U = carry bit of T[i] */
3973 u = *tmpc >> ((mp_digit)DIGIT_BIT);
3975 /* take away carry bit from T[i] */
3976 *tmpc++ &= MP_MASK;
3979 /* now copy higher words if any, that is in A+B
3980 * if A or B has more digits add those in
3982 if (min != max) {
3983 for (; i < max; i++) {
3984 /* T[i] = X[i] + U */
3985 *tmpc = x->dp[i] + u;
3987 /* U = carry bit of T[i] */
3988 u = *tmpc >> ((mp_digit)DIGIT_BIT);
3990 /* take away carry bit from T[i] */
3991 *tmpc++ &= MP_MASK;
3995 /* add carry */
3996 *tmpc++ = u;
3998 /* clear digits above oldused */
3999 for (i = c->used; i < olduse; i++) {
4000 *tmpc++ = 0;
4004 mp_clamp (c);
4005 return MP_OKAY;
4008 static int s_mp_exptmod (const mp_int * G, const mp_int * X, mp_int * P, mp_int * Y)
4010 mp_int M[256], res, mu;
4011 mp_digit buf;
4012 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
4014 /* find window size */
4015 x = mp_count_bits (X);
4016 if (x <= 7) {
4017 winsize = 2;
4018 } else if (x <= 36) {
4019 winsize = 3;
4020 } else if (x <= 140) {
4021 winsize = 4;
4022 } else if (x <= 450) {
4023 winsize = 5;
4024 } else if (x <= 1303) {
4025 winsize = 6;
4026 } else if (x <= 3529) {
4027 winsize = 7;
4028 } else {
4029 winsize = 8;
4032 /* init M array */
4033 /* init first cell */
4034 if ((err = mp_init(&M[1])) != MP_OKAY) {
4035 return err;
4038 /* now init the second half of the array */
4039 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
4040 if ((err = mp_init(&M[x])) != MP_OKAY) {
4041 for (y = 1<<(winsize-1); y < x; y++) {
4042 mp_clear (&M[y]);
4044 mp_clear(&M[1]);
4045 return err;
4049 /* create mu, used for Barrett reduction */
4050 if ((err = mp_init (&mu)) != MP_OKAY) {
4051 goto __M;
4053 if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
4054 goto __MU;
4057 /* create M table
4059 * The M table contains powers of the base,
4060 * e.g. M[x] = G**x mod P
4062 * The first half of the table is not
4063 * computed though accept for M[0] and M[1]
4065 if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
4066 goto __MU;
4069 /* compute the value at M[1<<(winsize-1)] by squaring
4070 * M[1] (winsize-1) times
4072 if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
4073 goto __MU;
4076 for (x = 0; x < (winsize - 1); x++) {
4077 if ((err = mp_sqr (&M[1 << (winsize - 1)],
4078 &M[1 << (winsize - 1)])) != MP_OKAY) {
4079 goto __MU;
4081 if ((err = mp_reduce (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) {
4082 goto __MU;
4086 /* create upper table, that is M[x] = M[x-1] * M[1] (mod P)
4087 * for x = (2**(winsize - 1) + 1) to (2**winsize - 1)
4089 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
4090 if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
4091 goto __MU;
4093 if ((err = mp_reduce (&M[x], P, &mu)) != MP_OKAY) {
4094 goto __MU;
4098 /* setup result */
4099 if ((err = mp_init (&res)) != MP_OKAY) {
4100 goto __MU;
4102 mp_set (&res, 1);
4104 /* set initial mode and bit cnt */
4105 mode = 0;
4106 bitcnt = 1;
4107 buf = 0;
4108 digidx = X->used - 1;
4109 bitcpy = 0;
4110 bitbuf = 0;
4112 for (;;) {
4113 /* grab next digit as required */
4114 if (--bitcnt == 0) {
4115 /* if digidx == -1 we are out of digits */
4116 if (digidx == -1) {
4117 break;
4119 /* read next digit and reset the bitcnt */
4120 buf = X->dp[digidx--];
4121 bitcnt = DIGIT_BIT;
4124 /* grab the next msb from the exponent */
4125 y = (buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
4126 buf <<= (mp_digit)1;
4128 /* if the bit is zero and mode == 0 then we ignore it
4129 * These represent the leading zero bits before the first 1 bit
4130 * in the exponent. Technically this opt is not required but it
4131 * does lower the # of trivial squaring/reductions used
4133 if (mode == 0 && y == 0) {
4134 continue;
4137 /* if the bit is zero and mode == 1 then we square */
4138 if (mode == 1 && y == 0) {
4139 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
4140 goto __RES;
4142 if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
4143 goto __RES;
4145 continue;
4148 /* else we add it to the window */
4149 bitbuf |= (y << (winsize - ++bitcpy));
4150 mode = 2;
4152 if (bitcpy == winsize) {
4153 /* ok window is filled so square as required and multiply */
4154 /* square first */
4155 for (x = 0; x < winsize; x++) {
4156 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
4157 goto __RES;
4159 if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
4160 goto __RES;
4164 /* then multiply */
4165 if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
4166 goto __RES;
4168 if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
4169 goto __RES;
4172 /* empty window and reset */
4173 bitcpy = 0;
4174 bitbuf = 0;
4175 mode = 1;
4179 /* if bits remain then square/multiply */
4180 if (mode == 2 && bitcpy > 0) {
4181 /* square then multiply if the bit is set */
4182 for (x = 0; x < bitcpy; x++) {
4183 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
4184 goto __RES;
4186 if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
4187 goto __RES;
4190 bitbuf <<= 1;
4191 if ((bitbuf & (1 << winsize)) != 0) {
4192 /* then multiply */
4193 if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
4194 goto __RES;
4196 if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
4197 goto __RES;
4203 mp_exch (&res, Y);
4204 err = MP_OKAY;
4205 __RES:mp_clear (&res);
4206 __MU:mp_clear (&mu);
4207 __M:
4208 mp_clear(&M[1]);
4209 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
4210 mp_clear (&M[x]);
4212 return err;
4215 /* multiplies |a| * |b| and only computes up to digs digits of result
4216 * HAC pp. 595, Algorithm 14.12 Modified so you can control how
4217 * many digits of output are created.
4219 static int
4220 s_mp_mul_digs (const mp_int * a, const mp_int * b, mp_int * c, int digs)
4222 mp_int t;
4223 int res, pa, pb, ix, iy;
4224 mp_digit u;
4225 mp_word r;
4226 mp_digit tmpx, *tmpt, *tmpy;
4228 /* can we use the fast multiplier? */
4229 if (((digs) < MP_WARRAY) &&
4230 MIN (a->used, b->used) <
4231 (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
4232 return fast_s_mp_mul_digs (a, b, c, digs);
4235 if ((res = mp_init_size (&t, digs)) != MP_OKAY) {
4236 return res;
4238 t.used = digs;
4240 /* compute the digits of the product directly */
4241 pa = a->used;
4242 for (ix = 0; ix < pa; ix++) {
4243 /* set the carry to zero */
4244 u = 0;
4246 /* limit ourselves to making digs digits of output */
4247 pb = MIN (b->used, digs - ix);
4249 /* setup some aliases */
4250 /* copy of the digit from a used within the nested loop */
4251 tmpx = a->dp[ix];
4253 /* an alias for the destination shifted ix places */
4254 tmpt = t.dp + ix;
4256 /* an alias for the digits of b */
4257 tmpy = b->dp;
4259 /* compute the columns of the output and propagate the carry */
4260 for (iy = 0; iy < pb; iy++) {
4261 /* compute the column as a mp_word */
4262 r = ((mp_word)*tmpt) +
4263 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
4264 ((mp_word) u);
4266 /* the new column is the lower part of the result */
4267 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
4269 /* get the carry word from the result */
4270 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
4272 /* set carry if it is placed below digs */
4273 if (ix + iy < digs) {
4274 *tmpt = u;
4278 mp_clamp (&t);
4279 mp_exch (&t, c);
4281 mp_clear (&t);
4282 return MP_OKAY;
4285 /* multiplies |a| * |b| and does not compute the lower digs digits
4286 * [meant to get the higher part of the product]
4288 static int
4289 s_mp_mul_high_digs (const mp_int * a, const mp_int * b, mp_int * c, int digs)
4291 mp_int t;
4292 int res, pa, pb, ix, iy;
4293 mp_digit u;
4294 mp_word r;
4295 mp_digit tmpx, *tmpt, *tmpy;
4297 /* can we use the fast multiplier? */
4298 if (((a->used + b->used + 1) < MP_WARRAY)
4299 && MIN (a->used, b->used) < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
4300 return fast_s_mp_mul_high_digs (a, b, c, digs);
4303 if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) {
4304 return res;
4306 t.used = a->used + b->used + 1;
4308 pa = a->used;
4309 pb = b->used;
4310 for (ix = 0; ix < pa; ix++) {
4311 /* clear the carry */
4312 u = 0;
4314 /* left hand side of A[ix] * B[iy] */
4315 tmpx = a->dp[ix];
4317 /* alias to the address of where the digits will be stored */
4318 tmpt = &(t.dp[digs]);
4320 /* alias for where to read the right hand side from */
4321 tmpy = b->dp + (digs - ix);
4323 for (iy = digs - ix; iy < pb; iy++) {
4324 /* calculate the double precision result */
4325 r = ((mp_word)*tmpt) +
4326 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
4327 ((mp_word) u);
4329 /* get the lower part */
4330 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
4332 /* carry the carry */
4333 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
4335 *tmpt = u;
4337 mp_clamp (&t);
4338 mp_exch (&t, c);
4339 mp_clear (&t);
4340 return MP_OKAY;
4343 /* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
4344 static int
4345 s_mp_sqr (const mp_int * a, mp_int * b)
4347 mp_int t;
4348 int res, ix, iy, pa;
4349 mp_word r;
4350 mp_digit u, tmpx, *tmpt;
4352 pa = a->used;
4353 if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) {
4354 return res;
4357 /* default used is maximum possible size */
4358 t.used = 2*pa + 1;
4360 for (ix = 0; ix < pa; ix++) {
4361 /* first calculate the digit at 2*ix */
4362 /* calculate double precision result */
4363 r = ((mp_word) t.dp[2*ix]) +
4364 ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]);
4366 /* store lower part in result */
4367 t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK));
4369 /* get the carry */
4370 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
4372 /* left hand side of A[ix] * A[iy] */
4373 tmpx = a->dp[ix];
4375 /* alias for where to store the results */
4376 tmpt = t.dp + (2*ix + 1);
4378 for (iy = ix + 1; iy < pa; iy++) {
4379 /* first calculate the product */
4380 r = ((mp_word)tmpx) * ((mp_word)a->dp[iy]);
4382 /* now calculate the double precision result, note we use
4383 * addition instead of *2 since it's easier to optimize
4385 r = ((mp_word) *tmpt) + r + r + ((mp_word) u);
4387 /* store lower part */
4388 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
4390 /* get carry */
4391 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
4393 /* propagate upwards */
4394 while (u != 0) {
4395 r = ((mp_word) *tmpt) + ((mp_word) u);
4396 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
4397 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
4401 mp_clamp (&t);
4402 mp_exch (&t, b);
4403 mp_clear (&t);
4404 return MP_OKAY;
4407 /* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */
4409 s_mp_sub (const mp_int * a, const mp_int * b, mp_int * c)
4411 int olduse, res, min, max;
4413 /* find sizes */
4414 min = b->used;
4415 max = a->used;
4417 /* init result */
4418 if (c->alloc < max) {
4419 if ((res = mp_grow (c, max)) != MP_OKAY) {
4420 return res;
4423 olduse = c->used;
4424 c->used = max;
4427 register mp_digit u, *tmpa, *tmpb, *tmpc;
4428 register int i;
4430 /* alias for digit pointers */
4431 tmpa = a->dp;
4432 tmpb = b->dp;
4433 tmpc = c->dp;
4435 /* set carry to zero */
4436 u = 0;
4437 for (i = 0; i < min; i++) {
4438 /* T[i] = A[i] - B[i] - U */
4439 *tmpc = *tmpa++ - *tmpb++ - u;
4441 /* U = carry bit of T[i]
4442 * Note this saves performing an AND operation since
4443 * if a carry does occur it will propagate all the way to the
4444 * MSB. As a result a single shift is enough to get the carry
4446 u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
4448 /* Clear carry from T[i] */
4449 *tmpc++ &= MP_MASK;
4452 /* now copy higher words if any, e.g. if A has more digits than B */
4453 for (; i < max; i++) {
4454 /* T[i] = A[i] - U */
4455 *tmpc = *tmpa++ - u;
4457 /* U = carry bit of T[i] */
4458 u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
4460 /* Clear carry from T[i] */
4461 *tmpc++ &= MP_MASK;
4464 /* clear digits above used (since we may not have grown result above) */
4465 for (i = c->used; i < olduse; i++) {
4466 *tmpc++ = 0;
4470 mp_clamp (c);
4471 return MP_OKAY;