3 * Multi Precision Integer functions
5 * Copyright 2004 Michael Jung
6 * Based on public domain code by Tom St Denis (tomstdenis@iahu.ca)
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
24 * This file contains code from the LibTomCrypt cryptographic
25 * library written by Tom St Denis (tomstdenis@iahu.ca). LibTomCrypt
26 * is in the public domain. The code in this file is tailored to
27 * special requirements. Take a look at http://libtomcrypt.org for the
37 /* Known optimal configurations
38 CPU /Compiler /MUL CUTOFF/SQR CUTOFF
39 -------------------------------------------------------------
40 Intel P4 Northwood /GCC v3.4.1 / 88/ 128/LTM 0.32 ;-)
42 static const int KARATSUBA_MUL_CUTOFF
= 88, /* Min. number of digits before Karatsuba multiplication is used. */
43 KARATSUBA_SQR_CUTOFF
= 128; /* Min. number of digits before Karatsuba squaring is used. */
46 /* trim unused digits */
47 static void mp_clamp(mp_int
*a
);
49 /* compare |a| to |b| */
50 static int mp_cmp_mag(const mp_int
*a
, const mp_int
*b
);
52 /* Counts the number of lsbs which are zero before the first zero bit */
53 static int mp_cnt_lsb(const mp_int
*a
);
55 /* computes a = B**n mod b without division or multiplication useful for
56 * normalizing numbers in a Montgomery system.
58 static int mp_montgomery_calc_normalization(mp_int
*a
, const mp_int
*b
);
60 /* computes x/R == x (mod N) via Montgomery Reduction */
61 static int mp_montgomery_reduce(mp_int
*a
, const mp_int
*m
, mp_digit mp
);
63 /* setups the montgomery reduction */
64 static int mp_montgomery_setup(const mp_int
*a
, mp_digit
*mp
);
66 /* Barrett Reduction, computes a (mod b) with a precomputed value c
68 * Assumes that 0 < a <= b*b, note if 0 > a > -(b*b) then you can merely
69 * compute the reduction as -1 * mp_reduce(mp_abs(a)) [pseudo code].
71 static int mp_reduce(mp_int
*a
, const mp_int
*b
, const mp_int
*c
);
73 /* reduces a modulo b where b is of the form 2**p - k [0 <= a] */
74 static int mp_reduce_2k(mp_int
*a
, const mp_int
*n
, mp_digit d
);
76 /* determines k value for 2k reduction */
77 static int mp_reduce_2k_setup(const mp_int
*a
, mp_digit
*d
);
79 /* used to setup the Barrett reduction for a given modulus b */
80 static int mp_reduce_setup(mp_int
*a
, const mp_int
*b
);
83 static void mp_set(mp_int
*a
, mp_digit b
);
86 static int mp_sqr(const mp_int
*a
, mp_int
*b
);
88 /* c = a * a (mod b) */
89 static int mp_sqrmod(const mp_int
*a
, mp_int
*b
, mp_int
*c
);
92 static void bn_reverse(unsigned char *s
, int len
);
93 static int s_mp_add(mp_int
*a
, mp_int
*b
, mp_int
*c
);
94 static int s_mp_exptmod (const mp_int
* G
, const mp_int
* X
, mp_int
* P
, mp_int
* Y
);
95 #define s_mp_mul(a, b, c) s_mp_mul_digs(a, b, c, (a)->used + (b)->used + 1)
96 static int s_mp_mul_digs(const mp_int
*a
, const mp_int
*b
, mp_int
*c
, int digs
);
97 static int s_mp_mul_high_digs(const mp_int
*a
, const mp_int
*b
, mp_int
*c
, int digs
);
98 static int s_mp_sqr(const mp_int
*a
, mp_int
*b
);
99 static int s_mp_sub(const mp_int
*a
, const mp_int
*b
, mp_int
*c
);
100 static int mp_exptmod_fast(const mp_int
*G
, const mp_int
*X
, mp_int
*P
, mp_int
*Y
, int mode
);
101 static int mp_invmod_slow (const mp_int
* a
, mp_int
* b
, mp_int
* c
);
102 static int mp_karatsuba_mul(const mp_int
*a
, const mp_int
*b
, mp_int
*c
);
103 static int mp_karatsuba_sqr(const mp_int
*a
, mp_int
*b
);
105 /* grow as required */
106 static int mp_grow (mp_int
* a
, int size
)
111 /* if the alloc size is smaller alloc more ram */
112 if (a
->alloc
< size
) {
113 /* ensure there are always at least MP_PREC digits extra on top */
114 size
+= (MP_PREC
* 2) - (size
% MP_PREC
);
116 /* reallocate the array a->dp
118 * We store the return in a temporary variable
119 * in case the operation failed we don't want
120 * to overwrite the dp member of a.
122 tmp
= HeapReAlloc(GetProcessHeap(), 0, a
->dp
, sizeof (mp_digit
) * size
);
124 /* reallocation failed but "a" is still valid [can be freed] */
128 /* reallocation succeeded so set a->dp */
131 /* zero excess digits */
134 for (; i
< a
->alloc
; i
++) {
142 static int mp_div_2(const mp_int
* a
, mp_int
* b
)
147 if (b
->alloc
< a
->used
) {
148 if ((res
= mp_grow (b
, a
->used
)) != MP_OKAY
) {
156 register mp_digit r
, rr
, *tmpa
, *tmpb
;
159 tmpa
= a
->dp
+ b
->used
- 1;
162 tmpb
= b
->dp
+ b
->used
- 1;
166 for (x
= b
->used
- 1; x
>= 0; x
--) {
167 /* get the carry for the next iteration */
170 /* shift the current digit, add in carry and store */
171 *tmpb
-- = (*tmpa
-- >> 1) | (r
<< (DIGIT_BIT
- 1));
173 /* forward carry to next iteration */
177 /* zero excess digits */
178 tmpb
= b
->dp
+ b
->used
;
179 for (x
= b
->used
; x
< oldused
; x
++) {
188 /* swap the elements of two integers, for cases where you can't simply swap the
189 * mp_int pointers around
192 mp_exch (mp_int
* a
, mp_int
* b
)
201 /* init a new mp_int */
202 static int mp_init (mp_int
* a
)
206 /* allocate memory required and clear it */
207 a
->dp
= HeapAlloc(GetProcessHeap(), 0, sizeof (mp_digit
) * MP_PREC
);
212 /* set the digits to zero */
213 for (i
= 0; i
< MP_PREC
; i
++) {
217 /* set the used to zero, allocated digits to the default precision
218 * and sign to positive */
226 /* init an mp_init for a given size */
227 static int mp_init_size (mp_int
* a
, int size
)
231 /* pad size so there are always extra digits */
232 size
+= (MP_PREC
* 2) - (size
% MP_PREC
);
235 a
->dp
= HeapAlloc(GetProcessHeap(), 0, sizeof (mp_digit
) * size
);
240 /* set the members */
245 /* zero the digits */
246 for (x
= 0; x
< size
; x
++) {
253 /* clear one (frees) */
255 mp_clear (mp_int
* a
)
259 /* only do anything if a hasn't been freed previously */
261 /* first zero the digits */
262 for (i
= 0; i
< a
->used
; i
++) {
267 HeapFree(GetProcessHeap(), 0, a
->dp
);
269 /* reset members to make debugging easier */
271 a
->alloc
= a
->used
= 0;
282 memset (a
->dp
, 0, sizeof (mp_digit
) * a
->alloc
);
287 * Simple function copies the input and fixes the sign to positive
290 mp_abs (const mp_int
* a
, mp_int
* b
)
296 if ((res
= mp_copy (a
, b
)) != MP_OKAY
) {
301 /* force the sign of b to positive */
307 /* computes the modular inverse via binary extended euclidean algorithm,
308 * that is c = 1/a mod b
310 * Based on slow invmod except this is optimized for the case where b is
311 * odd as per HAC Note 14.64 on pp. 610
314 fast_mp_invmod (const mp_int
* a
, mp_int
* b
, mp_int
* c
)
316 mp_int x
, y
, u
, v
, B
, D
;
319 /* 2. [modified] b must be odd */
320 if (mp_iseven (b
) == 1) {
324 /* init all our temps */
325 if ((res
= mp_init_multi(&x
, &y
, &u
, &v
, &B
, &D
, NULL
)) != MP_OKAY
) {
329 /* x == modulus, y == value to invert */
330 if ((res
= mp_copy (b
, &x
)) != MP_OKAY
) {
334 /* we need y = |a| */
335 if ((res
= mp_abs (a
, &y
)) != MP_OKAY
) {
339 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
340 if ((res
= mp_copy (&x
, &u
)) != MP_OKAY
) {
343 if ((res
= mp_copy (&y
, &v
)) != MP_OKAY
) {
349 /* 4. while u is even do */
350 while (mp_iseven (&u
) == 1) {
352 if ((res
= mp_div_2 (&u
, &u
)) != MP_OKAY
) {
355 /* 4.2 if B is odd then */
356 if (mp_isodd (&B
) == 1) {
357 if ((res
= mp_sub (&B
, &x
, &B
)) != MP_OKAY
) {
362 if ((res
= mp_div_2 (&B
, &B
)) != MP_OKAY
) {
367 /* 5. while v is even do */
368 while (mp_iseven (&v
) == 1) {
370 if ((res
= mp_div_2 (&v
, &v
)) != MP_OKAY
) {
373 /* 5.2 if D is odd then */
374 if (mp_isodd (&D
) == 1) {
376 if ((res
= mp_sub (&D
, &x
, &D
)) != MP_OKAY
) {
381 if ((res
= mp_div_2 (&D
, &D
)) != MP_OKAY
) {
386 /* 6. if u >= v then */
387 if (mp_cmp (&u
, &v
) != MP_LT
) {
388 /* u = u - v, B = B - D */
389 if ((res
= mp_sub (&u
, &v
, &u
)) != MP_OKAY
) {
393 if ((res
= mp_sub (&B
, &D
, &B
)) != MP_OKAY
) {
397 /* v - v - u, D = D - B */
398 if ((res
= mp_sub (&v
, &u
, &v
)) != MP_OKAY
) {
402 if ((res
= mp_sub (&D
, &B
, &D
)) != MP_OKAY
) {
407 /* if not zero goto step 4 */
408 if (mp_iszero (&u
) == 0) {
412 /* now a = C, b = D, gcd == g*v */
414 /* if v != 1 then there is no inverse */
415 if (mp_cmp_d (&v
, 1) != MP_EQ
) {
420 /* b is now the inverse */
422 while (D
.sign
== MP_NEG
) {
423 if ((res
= mp_add (&D
, b
, &D
)) != MP_OKAY
) {
431 __ERR
:mp_clear_multi (&x
, &y
, &u
, &v
, &B
, &D
, NULL
);
435 /* computes xR**-1 == x (mod N) via Montgomery Reduction
437 * This is an optimized implementation of montgomery_reduce
438 * which uses the comba method to quickly calculate the columns of the
441 * Based on Algorithm 14.32 on pp.601 of HAC.
444 fast_mp_montgomery_reduce (mp_int
* x
, const mp_int
* n
, mp_digit rho
)
447 mp_word W
[MP_WARRAY
];
449 /* get old used count */
452 /* grow a as required */
453 if (x
->alloc
< n
->used
+ 1) {
454 if ((res
= mp_grow (x
, n
->used
+ 1)) != MP_OKAY
) {
459 /* first we have to get the digits of the input into
460 * an array of double precision words W[...]
463 register mp_word
*_W
;
464 register mp_digit
*tmpx
;
466 /* alias for the W[] array */
469 /* alias for the digits of x*/
472 /* copy the digits of a into W[0..a->used-1] */
473 for (ix
= 0; ix
< x
->used
; ix
++) {
477 /* zero the high words of W[a->used..m->used*2] */
478 for (; ix
< n
->used
* 2 + 1; ix
++) {
483 /* now we proceed to zero successive digits
484 * from the least significant upwards
486 for (ix
= 0; ix
< n
->used
; ix
++) {
487 /* mu = ai * m' mod b
489 * We avoid a double precision multiplication (which isn't required)
490 * by casting the value down to a mp_digit. Note this requires
491 * that W[ix-1] have the carry cleared (see after the inner loop)
493 register mp_digit mu
;
494 mu
= (mp_digit
) (((W
[ix
] & MP_MASK
) * rho
) & MP_MASK
);
496 /* a = a + mu * m * b**i
498 * This is computed in place and on the fly. The multiplication
499 * by b**i is handled by offsetting which columns the results
502 * Note the comba method normally doesn't handle carries in the
503 * inner loop In this case we fix the carry from the previous
504 * column since the Montgomery reduction requires digits of the
505 * result (so far) [see above] to work. This is
506 * handled by fixing up one carry after the inner loop. The
507 * carry fixups are done in order so after these loops the
508 * first m->used words of W[] have the carries fixed
512 register mp_digit
*tmpn
;
513 register mp_word
*_W
;
515 /* alias for the digits of the modulus */
518 /* Alias for the columns set by an offset of ix */
522 for (iy
= 0; iy
< n
->used
; iy
++) {
523 *_W
++ += ((mp_word
)mu
) * ((mp_word
)*tmpn
++);
527 /* now fix carry for next digit, W[ix+1] */
528 W
[ix
+ 1] += W
[ix
] >> ((mp_word
) DIGIT_BIT
);
531 /* now we have to propagate the carries and
532 * shift the words downward [all those least
533 * significant digits we zeroed].
536 register mp_digit
*tmpx
;
537 register mp_word
*_W
, *_W1
;
539 /* nox fix rest of carries */
541 /* alias for current word */
544 /* alias for next word, where the carry goes */
547 for (; ix
<= n
->used
* 2 + 1; ix
++) {
548 *_W
++ += *_W1
++ >> ((mp_word
) DIGIT_BIT
);
551 /* copy out, A = A/b**n
553 * The result is A/b**n but instead of converting from an
554 * array of mp_word to mp_digit than calling mp_rshd
555 * we just copy them in the right order
558 /* alias for destination word */
561 /* alias for shifted double precision result */
564 for (ix
= 0; ix
< n
->used
+ 1; ix
++) {
565 *tmpx
++ = (mp_digit
)(*_W
++ & ((mp_word
) MP_MASK
));
568 /* zero oldused digits, if the input a was larger than
569 * m->used+1 we'll have to clear the digits
571 for (; ix
< olduse
; ix
++) {
576 /* set the max used and clamp */
577 x
->used
= n
->used
+ 1;
580 /* if A >= m then A = A - m */
581 if (mp_cmp_mag (x
, n
) != MP_LT
) {
582 return s_mp_sub (x
, n
, x
);
587 /* Fast (comba) multiplier
589 * This is the fast column-array [comba] multiplier. It is
590 * designed to compute the columns of the product first
591 * then handle the carries afterwards. This has the effect
592 * of making the nested loops that compute the columns very
593 * simple and schedulable on super-scalar processors.
595 * This has been modified to produce a variable number of
596 * digits of output so if say only a half-product is required
597 * you don't have to compute the upper half (a feature
598 * required for fast Barrett reduction).
600 * Based on Algorithm 14.12 on pp.595 of HAC.
604 fast_s_mp_mul_digs (const mp_int
* a
, const mp_int
* b
, mp_int
* c
, int digs
)
606 int olduse
, res
, pa
, ix
, iz
;
607 mp_digit W
[MP_WARRAY
];
610 /* grow the destination as required */
611 if (c
->alloc
< digs
) {
612 if ((res
= mp_grow (c
, digs
)) != MP_OKAY
) {
617 /* number of output digits to produce */
618 pa
= MIN(digs
, a
->used
+ b
->used
);
620 /* clear the carry */
622 for (ix
= 0; ix
<= pa
; ix
++) {
625 mp_digit
*tmpx
, *tmpy
;
627 /* get offsets into the two bignums */
628 ty
= MIN(b
->used
-1, ix
);
631 /* setup temp aliases */
635 /* This is the number of times the loop will iterate, essentially it's
636 while (tx++ < a->used && ty-- >= 0) { ... }
638 iy
= MIN(a
->used
-tx
, ty
+1);
641 for (iz
= 0; iz
< iy
; ++iz
) {
642 _W
+= ((mp_word
)*tmpx
++)*((mp_word
)*tmpy
--);
646 W
[ix
] = ((mp_digit
)_W
) & MP_MASK
;
648 /* make next carry */
649 _W
= _W
>> ((mp_word
)DIGIT_BIT
);
657 register mp_digit
*tmpc
;
659 for (ix
= 0; ix
< digs
; ix
++) {
660 /* now extract the previous digit [below the carry] */
664 /* clear unused digits [that existed in the old copy of c] */
665 for (; ix
< olduse
; ix
++) {
673 /* this is a modified version of fast_s_mul_digs that only produces
674 * output digits *above* digs. See the comments for fast_s_mul_digs
675 * to see how it works.
677 * This is used in the Barrett reduction since for one of the multiplications
678 * only the higher digits were needed. This essentially halves the work.
680 * Based on Algorithm 14.12 on pp.595 of HAC.
683 fast_s_mp_mul_high_digs (const mp_int
* a
, const mp_int
* b
, mp_int
* c
, int digs
)
685 int olduse
, res
, pa
, ix
, iz
;
686 mp_digit W
[MP_WARRAY
];
689 /* grow the destination as required */
690 pa
= a
->used
+ b
->used
;
692 if ((res
= mp_grow (c
, pa
)) != MP_OKAY
) {
697 /* number of output digits to produce */
698 pa
= a
->used
+ b
->used
;
700 for (ix
= digs
; ix
<= pa
; ix
++) {
702 mp_digit
*tmpx
, *tmpy
;
704 /* get offsets into the two bignums */
705 ty
= MIN(b
->used
-1, ix
);
708 /* setup temp aliases */
712 /* This is the number of times the loop will iterate, essentially it's
713 while (tx++ < a->used && ty-- >= 0) { ... }
715 iy
= MIN(a
->used
-tx
, ty
+1);
718 for (iz
= 0; iz
< iy
; iz
++) {
719 _W
+= ((mp_word
)*tmpx
++)*((mp_word
)*tmpy
--);
723 W
[ix
] = ((mp_digit
)_W
) & MP_MASK
;
725 /* make next carry */
726 _W
= _W
>> ((mp_word
)DIGIT_BIT
);
734 register mp_digit
*tmpc
;
737 for (ix
= digs
; ix
<= pa
; ix
++) {
738 /* now extract the previous digit [below the carry] */
742 /* clear unused digits [that existed in the old copy of c] */
743 for (; ix
< olduse
; ix
++) {
753 * This is the comba method where the columns of the product
754 * are computed first then the carries are computed. This
755 * has the effect of making a very simple inner loop that
756 * is executed the most
758 * W2 represents the outer products and W the inner.
760 * A further optimizations is made because the inner
761 * products are of the form "A * B * 2". The *2 part does
762 * not need to be computed until the end which is good
763 * because 64-bit shifts are slow!
765 * Based on Algorithm 14.16 on pp.597 of HAC.
768 /* the jist of squaring...
770 you do like mult except the offset of the tmpx [one that starts closer to zero]
771 can't equal the offset of tmpy. So basically you set up iy like before then you min it with
772 (ty-tx) so that it never happens. You double all those you add in the inner loop
774 After that loop you do the squares and add them in.
776 Remove W2 and don't memset W
780 static int fast_s_mp_sqr (const mp_int
* a
, mp_int
* b
)
782 int olduse
, res
, pa
, ix
, iz
;
783 mp_digit W
[MP_WARRAY
], *tmpx
;
786 /* grow the destination as required */
787 pa
= a
->used
+ a
->used
;
789 if ((res
= mp_grow (b
, pa
)) != MP_OKAY
) {
794 /* number of output digits to produce */
796 for (ix
= 0; ix
<= pa
; ix
++) {
804 /* get offsets into the two bignums */
805 ty
= MIN(a
->used
-1, ix
);
808 /* setup temp aliases */
812 /* This is the number of times the loop will iterate, essentially it's
813 while (tx++ < a->used && ty-- >= 0) { ... }
815 iy
= MIN(a
->used
-tx
, ty
+1);
817 /* now for squaring tx can never equal ty
818 * we halve the distance since they approach at a rate of 2x
819 * and we have to round because odd cases need to be executed
821 iy
= MIN(iy
, (ty
-tx
+1)>>1);
824 for (iz
= 0; iz
< iy
; iz
++) {
825 _W
+= ((mp_word
)*tmpx
++)*((mp_word
)*tmpy
--);
828 /* double the inner product and add carry */
831 /* even columns have the square term in them */
833 _W
+= ((mp_word
)a
->dp
[ix
>>1])*((mp_word
)a
->dp
[ix
>>1]);
839 /* make next carry */
840 W1
= _W
>> ((mp_word
)DIGIT_BIT
);
845 b
->used
= a
->used
+a
->used
;
850 for (ix
= 0; ix
< pa
; ix
++) {
851 *tmpb
++ = W
[ix
] & MP_MASK
;
854 /* clear unused digits [that existed in the old copy of c] */
855 for (; ix
< olduse
; ix
++) {
865 * Simple algorithm which zeroes the int, grows it then just sets one bit
869 mp_2expt (mp_int
* a
, int b
)
873 /* zero a as per default */
876 /* grow a to accommodate the single bit */
877 if ((res
= mp_grow (a
, b
/ DIGIT_BIT
+ 1)) != MP_OKAY
) {
881 /* set the used count of where the bit will go */
882 a
->used
= b
/ DIGIT_BIT
+ 1;
884 /* put the single bit in its place */
885 a
->dp
[b
/ DIGIT_BIT
] = ((mp_digit
)1) << (b
% DIGIT_BIT
);
890 /* high level addition (handles signs) */
891 int mp_add (mp_int
* a
, mp_int
* b
, mp_int
* c
)
895 /* get sign of both inputs */
899 /* handle two cases, not four */
901 /* both positive or both negative */
902 /* add their magnitudes, copy the sign */
904 res
= s_mp_add (a
, b
, c
);
906 /* one positive, the other negative */
907 /* subtract the one with the greater magnitude from */
908 /* the one of the lesser magnitude. The result gets */
909 /* the sign of the one with the greater magnitude. */
910 if (mp_cmp_mag (a
, b
) == MP_LT
) {
912 res
= s_mp_sub (b
, a
, c
);
915 res
= s_mp_sub (a
, b
, c
);
922 /* single digit addition */
924 mp_add_d (mp_int
* a
, mp_digit b
, mp_int
* c
)
926 int res
, ix
, oldused
;
927 mp_digit
*tmpa
, *tmpc
, mu
;
929 /* grow c as required */
930 if (c
->alloc
< a
->used
+ 1) {
931 if ((res
= mp_grow(c
, a
->used
+ 1)) != MP_OKAY
) {
936 /* if a is negative and |a| >= b, call c = |a| - b */
937 if (a
->sign
== MP_NEG
&& (a
->used
> 1 || a
->dp
[0] >= b
)) {
938 /* temporarily fix sign of a */
942 res
= mp_sub_d(a
, b
, c
);
945 a
->sign
= c
->sign
= MP_NEG
;
950 /* old number of used digits in c */
953 /* sign always positive */
959 /* destination alias */
962 /* if a is positive */
963 if (a
->sign
== MP_ZPOS
) {
964 /* add digit, after this we're propagating
968 mu
= *tmpc
>> DIGIT_BIT
;
971 /* now handle rest of the digits */
972 for (ix
= 1; ix
< a
->used
; ix
++) {
973 *tmpc
= *tmpa
++ + mu
;
974 mu
= *tmpc
>> DIGIT_BIT
;
977 /* set final carry */
982 c
->used
= a
->used
+ 1;
984 /* a was negative and |a| < b */
987 /* the result is a single digit */
989 *tmpc
++ = b
- a
->dp
[0];
994 /* setup count so the clearing of oldused
995 * can fall through correctly
1000 /* now zero to oldused */
1001 while (ix
++ < oldused
) {
1009 /* trim unused digits
1011 * This is used to ensure that leading zero digits are
1012 * trimed and the leading "used" digit will be non-zero
1013 * Typically very fast. Also fixes the sign if there
1014 * are no more leading digits
1017 mp_clamp (mp_int
* a
)
1019 /* decrease used while the most significant digit is
1022 while (a
->used
> 0 && a
->dp
[a
->used
- 1] == 0) {
1026 /* reset the sign flag if used == 0 */
1032 void mp_clear_multi(mp_int
*mp
, ...)
1034 mp_int
* next_mp
= mp
;
1037 while (next_mp
!= NULL
) {
1039 next_mp
= va_arg(args
, mp_int
*);
1044 /* compare two ints (signed)*/
1046 mp_cmp (const mp_int
* a
, const mp_int
* b
)
1048 /* compare based on sign */
1049 if (a
->sign
!= b
->sign
) {
1050 if (a
->sign
== MP_NEG
) {
1057 /* compare digits */
1058 if (a
->sign
== MP_NEG
) {
1059 /* if negative compare opposite direction */
1060 return mp_cmp_mag(b
, a
);
1062 return mp_cmp_mag(a
, b
);
1066 /* compare a digit */
1067 int mp_cmp_d(const mp_int
* a
, mp_digit b
)
1069 /* compare based on sign */
1070 if (a
->sign
== MP_NEG
) {
1074 /* compare based on magnitude */
1079 /* compare the only digit of a to b */
1082 } else if (a
->dp
[0] < b
) {
1089 /* compare maginitude of two ints (unsigned) */
1090 int mp_cmp_mag (const mp_int
* a
, const mp_int
* b
)
1093 mp_digit
*tmpa
, *tmpb
;
1095 /* compare based on # of non-zero digits */
1096 if (a
->used
> b
->used
) {
1100 if (a
->used
< b
->used
) {
1105 tmpa
= a
->dp
+ (a
->used
- 1);
1108 tmpb
= b
->dp
+ (a
->used
- 1);
1110 /* compare based on digits */
1111 for (n
= 0; n
< a
->used
; ++n
, --tmpa
, --tmpb
) {
1112 if (*tmpa
> *tmpb
) {
1116 if (*tmpa
< *tmpb
) {
1123 static const int lnz
[16] = {
1124 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
1127 /* Counts the number of lsbs which are zero before the first zero bit */
1128 int mp_cnt_lsb(const mp_int
*a
)
1134 if (mp_iszero(a
) == 1) {
1138 /* scan lower digits until non-zero */
1139 for (x
= 0; x
< a
->used
&& a
->dp
[x
] == 0; x
++);
1143 /* now scan this digit until a 1 is found */
1156 mp_copy (const mp_int
* a
, mp_int
* b
)
1160 /* if dst == src do nothing */
1166 if (b
->alloc
< a
->used
) {
1167 if ((res
= mp_grow (b
, a
->used
)) != MP_OKAY
) {
1172 /* zero b and copy the parameters over */
1174 register mp_digit
*tmpa
, *tmpb
;
1176 /* pointer aliases */
1184 /* copy all the digits */
1185 for (n
= 0; n
< a
->used
; n
++) {
1189 /* clear high digits */
1190 for (; n
< b
->used
; n
++) {
1195 /* copy used count and sign */
1201 /* returns the number of bits in an int */
1203 mp_count_bits (const mp_int
* a
)
1213 /* get number of digits and add that */
1214 r
= (a
->used
- 1) * DIGIT_BIT
;
1216 /* take the last digit and count the bits in it */
1217 q
= a
->dp
[a
->used
- 1];
1220 q
>>= ((mp_digit
) 1);
1225 /* calc a value mod 2**b */
1227 mp_mod_2d (const mp_int
* a
, int b
, mp_int
* c
)
1231 /* if b is <= 0 then zero the int */
1237 /* if the modulus is larger than the value than return */
1238 if (b
> a
->used
* DIGIT_BIT
) {
1239 res
= mp_copy (a
, c
);
1244 if ((res
= mp_copy (a
, c
)) != MP_OKAY
) {
1248 /* zero digits above the last digit of the modulus */
1249 for (x
= (b
/ DIGIT_BIT
) + ((b
% DIGIT_BIT
) == 0 ? 0 : 1); x
< c
->used
; x
++) {
1252 /* clear the digit that is not completely outside/inside the modulus */
1253 c
->dp
[b
/ DIGIT_BIT
] &= (1 << ((mp_digit
)b
% DIGIT_BIT
)) - 1;
1258 /* shift right a certain amount of digits */
1259 static void mp_rshd (mp_int
* a
, int b
)
1263 /* if b <= 0 then ignore it */
1268 /* if b > used then simply zero it and return */
1275 register mp_digit
*bottom
, *top
;
1277 /* shift the digits down */
1282 /* top [offset into digits] */
1285 /* this is implemented as a sliding window where
1286 * the window is b-digits long and digits from
1287 * the top of the window are copied to the bottom
1291 b-2 | b-1 | b0 | b1 | b2 | ... | bb | ---->
1293 \-------------------/ ---->
1295 for (x
= 0; x
< (a
->used
- b
); x
++) {
1299 /* zero the top digits */
1300 for (; x
< a
->used
; x
++) {
1305 /* remove excess digits */
1309 /* shift right by a certain bit count (store quotient in c, optional remainder in d) */
1310 static int mp_div_2d (const mp_int
* a
, int b
, mp_int
* c
, mp_int
* d
)
1317 /* if the shift count is <= 0 then we do no work */
1319 res
= mp_copy (a
, c
);
1326 if ((res
= mp_init (&t
)) != MP_OKAY
) {
1330 /* get the remainder */
1332 if ((res
= mp_mod_2d (a
, b
, &t
)) != MP_OKAY
) {
1339 if ((res
= mp_copy (a
, c
)) != MP_OKAY
) {
1344 /* shift by as many digits in the bit count */
1345 if (b
>= DIGIT_BIT
) {
1346 mp_rshd (c
, b
/ DIGIT_BIT
);
1349 /* shift any bit count < DIGIT_BIT */
1350 D
= (mp_digit
) (b
% DIGIT_BIT
);
1352 register mp_digit
*tmpc
, mask
, shift
;
1355 mask
= (((mp_digit
)1) << D
) - 1;
1358 shift
= DIGIT_BIT
- D
;
1361 tmpc
= c
->dp
+ (c
->used
- 1);
1365 for (x
= c
->used
- 1; x
>= 0; x
--) {
1366 /* get the lower bits of this word in a temp */
1369 /* shift the current word and mix in the carry bits from the previous word */
1370 *tmpc
= (*tmpc
>> D
) | (r
<< shift
);
1373 /* set the carry to the carry bits of the current word found above */
1385 /* shift left a certain amount of digits */
1386 static int mp_lshd (mp_int
* a
, int b
)
1390 /* if it's less than zero return */
1395 /* grow to fit the new digits */
1396 if (a
->alloc
< a
->used
+ b
) {
1397 if ((res
= mp_grow (a
, a
->used
+ b
)) != MP_OKAY
) {
1403 register mp_digit
*top
, *bottom
;
1405 /* increment the used by the shift amount then copy upwards */
1409 top
= a
->dp
+ a
->used
- 1;
1412 bottom
= a
->dp
+ a
->used
- 1 - b
;
1414 /* much like mp_rshd this is implemented using a sliding window
1415 * except the window goes the other way around. Copying from
1416 * the bottom to the top. see bn_mp_rshd.c for more info.
1418 for (x
= a
->used
- 1; x
>= b
; x
--) {
1422 /* zero the lower digits */
1424 for (x
= 0; x
< b
; x
++) {
1431 /* shift left by a certain bit count */
1432 static int mp_mul_2d (const mp_int
* a
, int b
, mp_int
* c
)
1439 if ((res
= mp_copy (a
, c
)) != MP_OKAY
) {
1444 if (c
->alloc
< c
->used
+ b
/DIGIT_BIT
+ 1) {
1445 if ((res
= mp_grow (c
, c
->used
+ b
/ DIGIT_BIT
+ 1)) != MP_OKAY
) {
1450 /* shift by as many digits in the bit count */
1451 if (b
>= DIGIT_BIT
) {
1452 if ((res
= mp_lshd (c
, b
/ DIGIT_BIT
)) != MP_OKAY
) {
1457 /* shift any bit count < DIGIT_BIT */
1458 d
= (mp_digit
) (b
% DIGIT_BIT
);
1460 register mp_digit
*tmpc
, shift
, mask
, r
, rr
;
1463 /* bitmask for carries */
1464 mask
= (((mp_digit
)1) << d
) - 1;
1466 /* shift for msbs */
1467 shift
= DIGIT_BIT
- d
;
1474 for (x
= 0; x
< c
->used
; x
++) {
1475 /* get the higher bits of the current word */
1476 rr
= (*tmpc
>> shift
) & mask
;
1478 /* shift the current word and OR in the carry */
1479 *tmpc
= ((*tmpc
<< d
) | r
) & MP_MASK
;
1482 /* set the carry to the carry bits of the current word */
1486 /* set final carry */
1488 c
->dp
[(c
->used
)++] = r
;
1495 /* multiply by a digit */
1497 mp_mul_d (const mp_int
* a
, mp_digit b
, mp_int
* c
)
1499 mp_digit u
, *tmpa
, *tmpc
;
1501 int ix
, res
, olduse
;
1503 /* make sure c is big enough to hold a*b */
1504 if (c
->alloc
< a
->used
+ 1) {
1505 if ((res
= mp_grow (c
, a
->used
+ 1)) != MP_OKAY
) {
1510 /* get the original destinations used count */
1516 /* alias for a->dp [source] */
1519 /* alias for c->dp [dest] */
1525 /* compute columns */
1526 for (ix
= 0; ix
< a
->used
; ix
++) {
1527 /* compute product and carry sum for this term */
1528 r
= ((mp_word
) u
) + ((mp_word
)*tmpa
++) * ((mp_word
)b
);
1530 /* mask off higher bits to get a single digit */
1531 *tmpc
++ = (mp_digit
) (r
& ((mp_word
) MP_MASK
));
1533 /* send carry into next iteration */
1534 u
= (mp_digit
) (r
>> ((mp_word
) DIGIT_BIT
));
1537 /* store final carry [if any] */
1540 /* now zero digits above the top */
1541 while (ix
++ < olduse
) {
1545 /* set used count */
1546 c
->used
= a
->used
+ 1;
1552 /* integer signed division.
1553 * c*b + d == a [e.g. a/b, c=quotient, d=remainder]
1554 * HAC pp.598 Algorithm 14.20
1556 * Note that the description in HAC is horribly
1557 * incomplete. For example, it doesn't consider
1558 * the case where digits are removed from 'x' in
1559 * the inner loop. It also doesn't consider the
1560 * case that y has fewer than three digits, etc..
1562 * The overall algorithm is as described as
1563 * 14.20 from HAC but fixed to treat these cases.
1565 static int mp_div (const mp_int
* a
, const mp_int
* b
, mp_int
* c
, mp_int
* d
)
1567 mp_int q
, x
, y
, t1
, t2
;
1568 int res
, n
, t
, i
, norm
, neg
;
1570 /* is divisor zero ? */
1571 if (mp_iszero (b
) == 1) {
1575 /* if a < b then q=0, r = a */
1576 if (mp_cmp_mag (a
, b
) == MP_LT
) {
1578 res
= mp_copy (a
, d
);
1588 if ((res
= mp_init_size (&q
, a
->used
+ 2)) != MP_OKAY
) {
1591 q
.used
= a
->used
+ 2;
1593 if ((res
= mp_init (&t1
)) != MP_OKAY
) {
1597 if ((res
= mp_init (&t2
)) != MP_OKAY
) {
1601 if ((res
= mp_init_copy (&x
, a
)) != MP_OKAY
) {
1605 if ((res
= mp_init_copy (&y
, b
)) != MP_OKAY
) {
1610 neg
= (a
->sign
== b
->sign
) ? MP_ZPOS
: MP_NEG
;
1611 x
.sign
= y
.sign
= MP_ZPOS
;
1613 /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */
1614 norm
= mp_count_bits(&y
) % DIGIT_BIT
;
1615 if (norm
< DIGIT_BIT
-1) {
1616 norm
= (DIGIT_BIT
-1) - norm
;
1617 if ((res
= mp_mul_2d (&x
, norm
, &x
)) != MP_OKAY
) {
1620 if ((res
= mp_mul_2d (&y
, norm
, &y
)) != MP_OKAY
) {
1627 /* note hac does 0 based, so if used==5 then it's 0,1,2,3,4, e.g. use 4 */
1631 /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */
1632 if ((res
= mp_lshd (&y
, n
- t
)) != MP_OKAY
) { /* y = y*b**{n-t} */
1636 while (mp_cmp (&x
, &y
) != MP_LT
) {
1638 if ((res
= mp_sub (&x
, &y
, &x
)) != MP_OKAY
) {
1643 /* reset y by shifting it back down */
1644 mp_rshd (&y
, n
- t
);
1646 /* step 3. for i from n down to (t + 1) */
1647 for (i
= n
; i
>= (t
+ 1); i
--) {
1652 /* step 3.1 if xi == yt then set q{i-t-1} to b-1,
1653 * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */
1654 if (x
.dp
[i
] == y
.dp
[t
]) {
1655 q
.dp
[i
- t
- 1] = ((((mp_digit
)1) << DIGIT_BIT
) - 1);
1658 tmp
= ((mp_word
) x
.dp
[i
]) << ((mp_word
) DIGIT_BIT
);
1659 tmp
|= ((mp_word
) x
.dp
[i
- 1]);
1660 tmp
/= ((mp_word
) y
.dp
[t
]);
1661 if (tmp
> (mp_word
) MP_MASK
)
1663 q
.dp
[i
- t
- 1] = (mp_digit
) (tmp
& (mp_word
) (MP_MASK
));
1666 /* while (q{i-t-1} * (yt * b + y{t-1})) >
1667 xi * b**2 + xi-1 * b + xi-2
1671 q
.dp
[i
- t
- 1] = (q
.dp
[i
- t
- 1] + 1) & MP_MASK
;
1673 q
.dp
[i
- t
- 1] = (q
.dp
[i
- t
- 1] - 1) & MP_MASK
;
1675 /* find left hand */
1677 t1
.dp
[0] = (t
- 1 < 0) ? 0 : y
.dp
[t
- 1];
1680 if ((res
= mp_mul_d (&t1
, q
.dp
[i
- t
- 1], &t1
)) != MP_OKAY
) {
1684 /* find right hand */
1685 t2
.dp
[0] = (i
- 2 < 0) ? 0 : x
.dp
[i
- 2];
1686 t2
.dp
[1] = (i
- 1 < 0) ? 0 : x
.dp
[i
- 1];
1689 } while (mp_cmp_mag(&t1
, &t2
) == MP_GT
);
1691 /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */
1692 if ((res
= mp_mul_d (&y
, q
.dp
[i
- t
- 1], &t1
)) != MP_OKAY
) {
1696 if ((res
= mp_lshd (&t1
, i
- t
- 1)) != MP_OKAY
) {
1700 if ((res
= mp_sub (&x
, &t1
, &x
)) != MP_OKAY
) {
1704 /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */
1705 if (x
.sign
== MP_NEG
) {
1706 if ((res
= mp_copy (&y
, &t1
)) != MP_OKAY
) {
1709 if ((res
= mp_lshd (&t1
, i
- t
- 1)) != MP_OKAY
) {
1712 if ((res
= mp_add (&x
, &t1
, &x
)) != MP_OKAY
) {
1716 q
.dp
[i
- t
- 1] = (q
.dp
[i
- t
- 1] - 1UL) & MP_MASK
;
1720 /* now q is the quotient and x is the remainder
1721 * [which we have to normalize]
1724 /* get sign before writing to c */
1725 x
.sign
= x
.used
== 0 ? MP_ZPOS
: a
->sign
;
1734 mp_div_2d (&x
, norm
, &x
, NULL
);
1742 __T2
:mp_clear (&t2
);
1743 __T1
:mp_clear (&t1
);
1748 static BOOL
s_is_power_of_two(mp_digit b
, int *p
)
1752 for (x
= 1; x
< DIGIT_BIT
; x
++) {
1753 if (b
== (((mp_digit
)1)<<x
)) {
1761 /* single digit division (based on routine from MPI) */
1762 static int mp_div_d (const mp_int
* a
, mp_digit b
, mp_int
* c
, mp_digit
* d
)
1769 /* cannot divide by zero */
1775 if (b
== 1 || mp_iszero(a
) == 1) {
1780 return mp_copy(a
, c
);
1785 /* power of two ? */
1786 if (s_is_power_of_two(b
, &ix
)) {
1788 *d
= a
->dp
[0] & ((((mp_digit
)1)<<ix
) - 1);
1791 return mp_div_2d(a
, ix
, c
, NULL
);
1796 /* no easy answer [c'est la vie]. Just division */
1797 if ((res
= mp_init_size(&q
, a
->used
)) != MP_OKAY
) {
1804 for (ix
= a
->used
- 1; ix
>= 0; ix
--) {
1805 w
= (w
<< ((mp_word
)DIGIT_BIT
)) | ((mp_word
)a
->dp
[ix
]);
1808 t
= (mp_digit
)(w
/ b
);
1809 w
-= ((mp_word
)t
) * ((mp_word
)b
);
1829 /* reduce "x" in place modulo "n" using the Diminished Radix algorithm.
1831 * Based on algorithm from the paper
1833 * "Generating Efficient Primes for Discrete Log Cryptosystems"
1834 * Chae Hoon Lim, Pil Loong Lee,
1835 * POSTECH Information Research Laboratories
1837 * The modulus must be of a special format [see manual]
1839 * Has been modified to use algorithm 7.10 from the LTM book instead
1841 * Input x must be in the range 0 <= x <= (n-1)**2
1844 mp_dr_reduce (mp_int
* x
, const mp_int
* n
, mp_digit k
)
1848 mp_digit mu
, *tmpx1
, *tmpx2
;
1850 /* m = digits in modulus */
1853 /* ensure that "x" has at least 2m digits */
1854 if (x
->alloc
< m
+ m
) {
1855 if ((err
= mp_grow (x
, m
+ m
)) != MP_OKAY
) {
1860 /* top of loop, this is where the code resumes if
1861 * another reduction pass is required.
1864 /* aliases for digits */
1865 /* alias for lower half of x */
1868 /* alias for upper half of x, or x/B**m */
1871 /* set carry to zero */
1874 /* compute (x mod B**m) + k * [x/B**m] inline and inplace */
1875 for (i
= 0; i
< m
; i
++) {
1876 r
= ((mp_word
)*tmpx2
++) * ((mp_word
)k
) + *tmpx1
+ mu
;
1877 *tmpx1
++ = (mp_digit
)(r
& MP_MASK
);
1878 mu
= (mp_digit
)(r
>> ((mp_word
)DIGIT_BIT
));
1881 /* set final carry */
1884 /* zero words above m */
1885 for (i
= m
+ 1; i
< x
->used
; i
++) {
1889 /* clamp, sub and return */
1892 /* if x >= n then subtract and reduce again
1893 * Each successive "recursion" makes the input smaller and smaller.
1895 if (mp_cmp_mag (x
, n
) != MP_LT
) {
1902 /* sets the value of "d" required for mp_dr_reduce */
1903 static void mp_dr_setup(const mp_int
*a
, mp_digit
*d
)
1905 /* the casts are required if DIGIT_BIT is one less than
1906 * the number of bits in a mp_digit [e.g. DIGIT_BIT==31]
1908 *d
= (mp_digit
)((((mp_word
)1) << ((mp_word
)DIGIT_BIT
)) -
1909 ((mp_word
)a
->dp
[0]));
1912 /* this is a shell function that calls either the normal or Montgomery
1913 * exptmod functions. Originally the call to the montgomery code was
1914 * embedded in the normal function but that wasted a lot of stack space
1915 * for nothing (since 99% of the time the Montgomery code would be called)
1917 int mp_exptmod (const mp_int
* G
, const mp_int
* X
, mp_int
* P
, mp_int
* Y
)
1921 /* modulus P must be positive */
1922 if (P
->sign
== MP_NEG
) {
1926 /* if exponent X is negative we have to recurse */
1927 if (X
->sign
== MP_NEG
) {
1931 /* first compute 1/G mod P */
1932 if ((err
= mp_init(&tmpG
)) != MP_OKAY
) {
1935 if ((err
= mp_invmod(G
, P
, &tmpG
)) != MP_OKAY
) {
1941 if ((err
= mp_init(&tmpX
)) != MP_OKAY
) {
1945 if ((err
= mp_abs(X
, &tmpX
)) != MP_OKAY
) {
1946 mp_clear_multi(&tmpG
, &tmpX
, NULL
);
1950 /* and now compute (1/G)**|X| instead of G**X [X < 0] */
1951 err
= mp_exptmod(&tmpG
, &tmpX
, P
, Y
);
1952 mp_clear_multi(&tmpG
, &tmpX
, NULL
);
1958 /* if the modulus is odd use the fast method */
1959 if (mp_isodd (P
) == 1) {
1960 return mp_exptmod_fast (G
, X
, P
, Y
, dr
);
1962 /* otherwise use the generic Barrett reduction technique */
1963 return s_mp_exptmod (G
, X
, P
, Y
);
1967 /* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
1969 * Uses a left-to-right k-ary sliding window to compute the modular exponentiation.
1970 * The value of k changes based on the size of the exponent.
1972 * Uses Montgomery or Diminished Radix reduction [whichever appropriate]
1976 mp_exptmod_fast (const mp_int
* G
, const mp_int
* X
, mp_int
* P
, mp_int
* Y
, int redmode
)
1980 int err
, bitbuf
, bitcpy
, bitcnt
, mode
, digidx
, x
, y
, winsize
;
1982 /* use a pointer to the reduction algorithm. This allows us to use
1983 * one of many reduction algorithms without modding the guts of
1984 * the code with if statements everywhere.
1986 int (*redux
)(mp_int
*,const mp_int
*,mp_digit
);
1988 /* find window size */
1989 x
= mp_count_bits (X
);
1992 } else if (x
<= 36) {
1994 } else if (x
<= 140) {
1996 } else if (x
<= 450) {
1998 } else if (x
<= 1303) {
2000 } else if (x
<= 3529) {
2007 /* init first cell */
2008 if ((err
= mp_init(&M
[1])) != MP_OKAY
) {
2012 /* now init the second half of the array */
2013 for (x
= 1<<(winsize
-1); x
< (1 << winsize
); x
++) {
2014 if ((err
= mp_init(&M
[x
])) != MP_OKAY
) {
2015 for (y
= 1<<(winsize
-1); y
< x
; y
++) {
2023 /* determine and setup reduction code */
2025 /* now setup montgomery */
2026 if ((err
= mp_montgomery_setup (P
, &mp
)) != MP_OKAY
) {
2030 /* automatically pick the comba one if available (saves quite a few calls/ifs) */
2031 if (((P
->used
* 2 + 1) < MP_WARRAY
) &&
2032 P
->used
< (1 << ((CHAR_BIT
* sizeof (mp_word
)) - (2 * DIGIT_BIT
)))) {
2033 redux
= fast_mp_montgomery_reduce
;
2035 /* use slower baseline Montgomery method */
2036 redux
= mp_montgomery_reduce
;
2038 } else if (redmode
== 1) {
2039 /* setup DR reduction for moduli of the form B**k - b */
2040 mp_dr_setup(P
, &mp
);
2041 redux
= mp_dr_reduce
;
2043 /* setup DR reduction for moduli of the form 2**k - b */
2044 if ((err
= mp_reduce_2k_setup(P
, &mp
)) != MP_OKAY
) {
2047 redux
= mp_reduce_2k
;
2051 if ((err
= mp_init (&res
)) != MP_OKAY
) {
2059 * The first half of the table is not computed though accept for M[0] and M[1]
2063 /* now we need R mod m */
2064 if ((err
= mp_montgomery_calc_normalization (&res
, P
)) != MP_OKAY
) {
2068 /* now set M[1] to G * R mod m */
2069 if ((err
= mp_mulmod (G
, &res
, P
, &M
[1])) != MP_OKAY
) {
2074 if ((err
= mp_mod(G
, P
, &M
[1])) != MP_OKAY
) {
2079 /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
2080 if ((err
= mp_copy (&M
[1], &M
[1 << (winsize
- 1)])) != MP_OKAY
) {
2084 for (x
= 0; x
< (winsize
- 1); x
++) {
2085 if ((err
= mp_sqr (&M
[1 << (winsize
- 1)], &M
[1 << (winsize
- 1)])) != MP_OKAY
) {
2088 if ((err
= redux (&M
[1 << (winsize
- 1)], P
, mp
)) != MP_OKAY
) {
2093 /* create upper table */
2094 for (x
= (1 << (winsize
- 1)) + 1; x
< (1 << winsize
); x
++) {
2095 if ((err
= mp_mul (&M
[x
- 1], &M
[1], &M
[x
])) != MP_OKAY
) {
2098 if ((err
= redux (&M
[x
], P
, mp
)) != MP_OKAY
) {
2103 /* set initial mode and bit cnt */
2107 digidx
= X
->used
- 1;
2112 /* grab next digit as required */
2113 if (--bitcnt
== 0) {
2114 /* if digidx == -1 we are out of digits so break */
2118 /* read next digit and reset bitcnt */
2119 buf
= X
->dp
[digidx
--];
2123 /* grab the next msb from the exponent */
2124 y
= (buf
>> (DIGIT_BIT
- 1)) & 1;
2125 buf
<<= (mp_digit
)1;
2127 /* if the bit is zero and mode == 0 then we ignore it
2128 * These represent the leading zero bits before the first 1 bit
2129 * in the exponent. Technically this opt is not required but it
2130 * does lower the # of trivial squaring/reductions used
2132 if (mode
== 0 && y
== 0) {
2136 /* if the bit is zero and mode == 1 then we square */
2137 if (mode
== 1 && y
== 0) {
2138 if ((err
= mp_sqr (&res
, &res
)) != MP_OKAY
) {
2141 if ((err
= redux (&res
, P
, mp
)) != MP_OKAY
) {
2147 /* else we add it to the window */
2148 bitbuf
|= (y
<< (winsize
- ++bitcpy
));
2151 if (bitcpy
== winsize
) {
2152 /* ok window is filled so square as required and multiply */
2154 for (x
= 0; x
< winsize
; x
++) {
2155 if ((err
= mp_sqr (&res
, &res
)) != MP_OKAY
) {
2158 if ((err
= redux (&res
, P
, mp
)) != MP_OKAY
) {
2164 if ((err
= mp_mul (&res
, &M
[bitbuf
], &res
)) != MP_OKAY
) {
2167 if ((err
= redux (&res
, P
, mp
)) != MP_OKAY
) {
2171 /* empty window and reset */
2178 /* if bits remain then square/multiply */
2179 if (mode
== 2 && bitcpy
> 0) {
2180 /* square then multiply if the bit is set */
2181 for (x
= 0; x
< bitcpy
; x
++) {
2182 if ((err
= mp_sqr (&res
, &res
)) != MP_OKAY
) {
2185 if ((err
= redux (&res
, P
, mp
)) != MP_OKAY
) {
2189 /* get next bit of the window */
2191 if ((bitbuf
& (1 << winsize
)) != 0) {
2193 if ((err
= mp_mul (&res
, &M
[1], &res
)) != MP_OKAY
) {
2196 if ((err
= redux (&res
, P
, mp
)) != MP_OKAY
) {
2204 /* fixup result if Montgomery reduction is used
2205 * recall that any value in a Montgomery system is
2206 * actually multiplied by R mod n. So we have
2207 * to reduce one more time to cancel out the factor
2210 if ((err
= redux(&res
, P
, mp
)) != MP_OKAY
) {
2215 /* swap res with Y */
2218 __RES
:mp_clear (&res
);
2221 for (x
= 1<<(winsize
-1); x
< (1 << winsize
); x
++) {
2227 /* Greatest Common Divisor using the binary method */
2228 int mp_gcd (const mp_int
* a
, const mp_int
* b
, mp_int
* c
)
2231 int k
, u_lsb
, v_lsb
, res
;
2233 /* either zero than gcd is the largest */
2234 if (mp_iszero (a
) == 1 && mp_iszero (b
) == 0) {
2235 return mp_abs (b
, c
);
2237 if (mp_iszero (a
) == 0 && mp_iszero (b
) == 1) {
2238 return mp_abs (a
, c
);
2241 /* optimized. At this point if a == 0 then
2242 * b must equal zero too
2244 if (mp_iszero (a
) == 1) {
2249 /* get copies of a and b we can modify */
2250 if ((res
= mp_init_copy (&u
, a
)) != MP_OKAY
) {
2254 if ((res
= mp_init_copy (&v
, b
)) != MP_OKAY
) {
2258 /* must be positive for the remainder of the algorithm */
2259 u
.sign
= v
.sign
= MP_ZPOS
;
2261 /* B1. Find the common power of two for u and v */
2262 u_lsb
= mp_cnt_lsb(&u
);
2263 v_lsb
= mp_cnt_lsb(&v
);
2264 k
= MIN(u_lsb
, v_lsb
);
2267 /* divide the power of two out */
2268 if ((res
= mp_div_2d(&u
, k
, &u
, NULL
)) != MP_OKAY
) {
2272 if ((res
= mp_div_2d(&v
, k
, &v
, NULL
)) != MP_OKAY
) {
2277 /* divide any remaining factors of two out */
2279 if ((res
= mp_div_2d(&u
, u_lsb
- k
, &u
, NULL
)) != MP_OKAY
) {
2285 if ((res
= mp_div_2d(&v
, v_lsb
- k
, &v
, NULL
)) != MP_OKAY
) {
2290 while (mp_iszero(&v
) == 0) {
2291 /* make sure v is the largest */
2292 if (mp_cmp_mag(&u
, &v
) == MP_GT
) {
2293 /* swap u and v to make sure v is >= u */
2297 /* subtract smallest from largest */
2298 if ((res
= s_mp_sub(&v
, &u
, &v
)) != MP_OKAY
) {
2302 /* Divide out all factors of two */
2303 if ((res
= mp_div_2d(&v
, mp_cnt_lsb(&v
), &v
, NULL
)) != MP_OKAY
) {
2308 /* multiply by 2**k which we divided out at the beginning */
2309 if ((res
= mp_mul_2d (&u
, k
, c
)) != MP_OKAY
) {
2319 /* get the lower 32-bits of an mp_int */
2320 unsigned long mp_get_int(const mp_int
* a
)
2329 /* get number of digits of the lsb we have to read */
2330 i
= MIN(a
->used
,(int)((sizeof(unsigned long)*CHAR_BIT
+DIGIT_BIT
-1)/DIGIT_BIT
))-1;
2332 /* get most significant digit of result */
2336 res
= (res
<< DIGIT_BIT
) | DIGIT(a
,i
);
2339 /* force result to 32-bits always so it is consistent on non 32-bit platforms */
2340 return res
& 0xFFFFFFFFUL
;
2343 /* creates "a" then copies b into it */
2344 int mp_init_copy (mp_int
* a
, const mp_int
* b
)
2348 if ((res
= mp_init (a
)) != MP_OKAY
) {
2351 return mp_copy (b
, a
);
2354 int mp_init_multi(mp_int
*mp
, ...)
2356 mp_err res
= MP_OKAY
; /* Assume ok until proven otherwise */
2357 int n
= 0; /* Number of ok inits */
2358 mp_int
* cur_arg
= mp
;
2361 va_start(args
, mp
); /* init args to next argument from caller */
2362 while (cur_arg
!= NULL
) {
2363 if (mp_init(cur_arg
) != MP_OKAY
) {
2364 /* Oops - error! Back-track and mp_clear what we already
2365 succeeded in init-ing, then return error.
2369 /* end the current list */
2372 /* now start cleaning up */
2374 va_start(clean_args
, mp
);
2377 cur_arg
= va_arg(clean_args
, mp_int
*);
2384 cur_arg
= va_arg(args
, mp_int
*);
2387 return res
; /* Assumed ok, if error flagged above. */
2390 /* hac 14.61, pp608 */
2391 int mp_invmod (const mp_int
* a
, mp_int
* b
, mp_int
* c
)
2393 /* b cannot be negative */
2394 if (b
->sign
== MP_NEG
|| mp_iszero(b
) == 1) {
2398 /* if the modulus is odd we can use a faster routine instead */
2399 if (mp_isodd (b
) == 1) {
2400 return fast_mp_invmod (a
, b
, c
);
2403 return mp_invmod_slow(a
, b
, c
);
2406 /* hac 14.61, pp608 */
2407 int mp_invmod_slow (const mp_int
* a
, mp_int
* b
, mp_int
* c
)
2409 mp_int x
, y
, u
, v
, A
, B
, C
, D
;
2412 /* b cannot be negative */
2413 if (b
->sign
== MP_NEG
|| mp_iszero(b
) == 1) {
2418 if ((res
= mp_init_multi(&x
, &y
, &u
, &v
,
2419 &A
, &B
, &C
, &D
, NULL
)) != MP_OKAY
) {
2424 if ((res
= mp_copy (a
, &x
)) != MP_OKAY
) {
2427 if ((res
= mp_copy (b
, &y
)) != MP_OKAY
) {
2431 /* 2. [modified] if x,y are both even then return an error! */
2432 if (mp_iseven (&x
) == 1 && mp_iseven (&y
) == 1) {
2437 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
2438 if ((res
= mp_copy (&x
, &u
)) != MP_OKAY
) {
2441 if ((res
= mp_copy (&y
, &v
)) != MP_OKAY
) {
2448 /* 4. while u is even do */
2449 while (mp_iseven (&u
) == 1) {
2451 if ((res
= mp_div_2 (&u
, &u
)) != MP_OKAY
) {
2454 /* 4.2 if A or B is odd then */
2455 if (mp_isodd (&A
) == 1 || mp_isodd (&B
) == 1) {
2456 /* A = (A+y)/2, B = (B-x)/2 */
2457 if ((res
= mp_add (&A
, &y
, &A
)) != MP_OKAY
) {
2460 if ((res
= mp_sub (&B
, &x
, &B
)) != MP_OKAY
) {
2464 /* A = A/2, B = B/2 */
2465 if ((res
= mp_div_2 (&A
, &A
)) != MP_OKAY
) {
2468 if ((res
= mp_div_2 (&B
, &B
)) != MP_OKAY
) {
2473 /* 5. while v is even do */
2474 while (mp_iseven (&v
) == 1) {
2476 if ((res
= mp_div_2 (&v
, &v
)) != MP_OKAY
) {
2479 /* 5.2 if C or D is odd then */
2480 if (mp_isodd (&C
) == 1 || mp_isodd (&D
) == 1) {
2481 /* C = (C+y)/2, D = (D-x)/2 */
2482 if ((res
= mp_add (&C
, &y
, &C
)) != MP_OKAY
) {
2485 if ((res
= mp_sub (&D
, &x
, &D
)) != MP_OKAY
) {
2489 /* C = C/2, D = D/2 */
2490 if ((res
= mp_div_2 (&C
, &C
)) != MP_OKAY
) {
2493 if ((res
= mp_div_2 (&D
, &D
)) != MP_OKAY
) {
2498 /* 6. if u >= v then */
2499 if (mp_cmp (&u
, &v
) != MP_LT
) {
2500 /* u = u - v, A = A - C, B = B - D */
2501 if ((res
= mp_sub (&u
, &v
, &u
)) != MP_OKAY
) {
2505 if ((res
= mp_sub (&A
, &C
, &A
)) != MP_OKAY
) {
2509 if ((res
= mp_sub (&B
, &D
, &B
)) != MP_OKAY
) {
2513 /* v - v - u, C = C - A, D = D - B */
2514 if ((res
= mp_sub (&v
, &u
, &v
)) != MP_OKAY
) {
2518 if ((res
= mp_sub (&C
, &A
, &C
)) != MP_OKAY
) {
2522 if ((res
= mp_sub (&D
, &B
, &D
)) != MP_OKAY
) {
2527 /* if not zero goto step 4 */
2528 if (mp_iszero (&u
) == 0)
2531 /* now a = C, b = D, gcd == g*v */
2533 /* if v != 1 then there is no inverse */
2534 if (mp_cmp_d (&v
, 1) != MP_EQ
) {
2539 /* if it's too low */
2540 while (mp_cmp_d(&C
, 0) == MP_LT
) {
2541 if ((res
= mp_add(&C
, b
, &C
)) != MP_OKAY
) {
2547 while (mp_cmp_mag(&C
, b
) != MP_LT
) {
2548 if ((res
= mp_sub(&C
, b
, &C
)) != MP_OKAY
) {
2553 /* C is now the inverse */
2556 __ERR
:mp_clear_multi (&x
, &y
, &u
, &v
, &A
, &B
, &C
, &D
, NULL
);
2560 /* c = |a| * |b| using Karatsuba Multiplication using
2561 * three half size multiplications
2563 * Let B represent the radix [e.g. 2**DIGIT_BIT] and
2564 * let n represent half of the number of digits in
2567 * a = a1 * B**n + a0
2568 * b = b1 * B**n + b0
2571 a1b1 * B**2n + ((a1 - a0)(b1 - b0) + a0b0 + a1b1) * B + a0b0
2573 * Note that a1b1 and a0b0 are used twice and only need to be
2574 * computed once. So in total three half size (half # of
2575 * digit) multiplications are performed, a0b0, a1b1 and
2578 * Note that a multiplication of half the digits requires
2579 * 1/4th the number of single precision multiplications so in
2580 * total after one call 25% of the single precision multiplications
2581 * are saved. Note also that the call to mp_mul can end up back
2582 * in this function if the a0, a1, b0, or b1 are above the threshold.
2583 * This is known as divide-and-conquer and leads to the famous
2584 * O(N**lg(3)) or O(N**1.584) work which is asymptotically lower than
2585 * the standard O(N**2) that the baseline/comba methods use.
2586 * Generally though the overhead of this method doesn't pay off
2587 * until a certain size (N ~ 80) is reached.
2589 int mp_karatsuba_mul (const mp_int
* a
, const mp_int
* b
, mp_int
* c
)
2591 mp_int x0
, x1
, y0
, y1
, t1
, x0y0
, x1y1
;
2594 /* default the return code to an error */
2597 /* min # of digits */
2598 B
= MIN (a
->used
, b
->used
);
2600 /* now divide in two */
2603 /* init copy all the temps */
2604 if (mp_init_size (&x0
, B
) != MP_OKAY
)
2606 if (mp_init_size (&x1
, a
->used
- B
) != MP_OKAY
)
2608 if (mp_init_size (&y0
, B
) != MP_OKAY
)
2610 if (mp_init_size (&y1
, b
->used
- B
) != MP_OKAY
)
2614 if (mp_init_size (&t1
, B
* 2) != MP_OKAY
)
2616 if (mp_init_size (&x0y0
, B
* 2) != MP_OKAY
)
2618 if (mp_init_size (&x1y1
, B
* 2) != MP_OKAY
)
2621 /* now shift the digits */
2622 x0
.used
= y0
.used
= B
;
2623 x1
.used
= a
->used
- B
;
2624 y1
.used
= b
->used
- B
;
2628 register mp_digit
*tmpa
, *tmpb
, *tmpx
, *tmpy
;
2630 /* we copy the digits directly instead of using higher level functions
2631 * since we also need to shift the digits
2638 for (x
= 0; x
< B
; x
++) {
2644 for (x
= B
; x
< a
->used
; x
++) {
2649 for (x
= B
; x
< b
->used
; x
++) {
2654 /* only need to clamp the lower words since by definition the
2655 * upper words x1/y1 must have a known number of digits
2660 /* now calc the products x0y0 and x1y1 */
2661 /* after this x0 is no longer required, free temp [x0==t2]! */
2662 if (mp_mul (&x0
, &y0
, &x0y0
) != MP_OKAY
)
2663 goto X1Y1
; /* x0y0 = x0*y0 */
2664 if (mp_mul (&x1
, &y1
, &x1y1
) != MP_OKAY
)
2665 goto X1Y1
; /* x1y1 = x1*y1 */
2667 /* now calc x1-x0 and y1-y0 */
2668 if (mp_sub (&x1
, &x0
, &t1
) != MP_OKAY
)
2669 goto X1Y1
; /* t1 = x1 - x0 */
2670 if (mp_sub (&y1
, &y0
, &x0
) != MP_OKAY
)
2671 goto X1Y1
; /* t2 = y1 - y0 */
2672 if (mp_mul (&t1
, &x0
, &t1
) != MP_OKAY
)
2673 goto X1Y1
; /* t1 = (x1 - x0) * (y1 - y0) */
2676 if (mp_add (&x0y0
, &x1y1
, &x0
) != MP_OKAY
)
2677 goto X1Y1
; /* t2 = x0y0 + x1y1 */
2678 if (mp_sub (&x0
, &t1
, &t1
) != MP_OKAY
)
2679 goto X1Y1
; /* t1 = x0y0 + x1y1 - (x1-x0)*(y1-y0) */
2682 if (mp_lshd (&t1
, B
) != MP_OKAY
)
2683 goto X1Y1
; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */
2684 if (mp_lshd (&x1y1
, B
* 2) != MP_OKAY
)
2685 goto X1Y1
; /* x1y1 = x1y1 << 2*B */
2687 if (mp_add (&x0y0
, &t1
, &t1
) != MP_OKAY
)
2688 goto X1Y1
; /* t1 = x0y0 + t1 */
2689 if (mp_add (&t1
, &x1y1
, c
) != MP_OKAY
)
2690 goto X1Y1
; /* t1 = x0y0 + t1 + x1y1 */
2692 /* Algorithm succeeded set the return code to MP_OKAY */
2695 X1Y1
:mp_clear (&x1y1
);
2696 X0Y0
:mp_clear (&x0y0
);
2706 /* Karatsuba squaring, computes b = a*a using three
2707 * half size squarings
2709 * See comments of karatsuba_mul for details. It
2710 * is essentially the same algorithm but merely
2711 * tuned to perform recursive squarings.
2713 int mp_karatsuba_sqr (const mp_int
* a
, mp_int
* b
)
2715 mp_int x0
, x1
, t1
, t2
, x0x0
, x1x1
;
2720 /* min # of digits */
2723 /* now divide in two */
2726 /* init copy all the temps */
2727 if (mp_init_size (&x0
, B
) != MP_OKAY
)
2729 if (mp_init_size (&x1
, a
->used
- B
) != MP_OKAY
)
2733 if (mp_init_size (&t1
, a
->used
* 2) != MP_OKAY
)
2735 if (mp_init_size (&t2
, a
->used
* 2) != MP_OKAY
)
2737 if (mp_init_size (&x0x0
, B
* 2) != MP_OKAY
)
2739 if (mp_init_size (&x1x1
, (a
->used
- B
) * 2) != MP_OKAY
)
2744 register mp_digit
*dst
, *src
;
2748 /* now shift the digits */
2750 for (x
= 0; x
< B
; x
++) {
2755 for (x
= B
; x
< a
->used
; x
++) {
2761 x1
.used
= a
->used
- B
;
2765 /* now calc the products x0*x0 and x1*x1 */
2766 if (mp_sqr (&x0
, &x0x0
) != MP_OKAY
)
2767 goto X1X1
; /* x0x0 = x0*x0 */
2768 if (mp_sqr (&x1
, &x1x1
) != MP_OKAY
)
2769 goto X1X1
; /* x1x1 = x1*x1 */
2771 /* now calc (x1-x0)**2 */
2772 if (mp_sub (&x1
, &x0
, &t1
) != MP_OKAY
)
2773 goto X1X1
; /* t1 = x1 - x0 */
2774 if (mp_sqr (&t1
, &t1
) != MP_OKAY
)
2775 goto X1X1
; /* t1 = (x1 - x0) * (x1 - x0) */
2778 if (s_mp_add (&x0x0
, &x1x1
, &t2
) != MP_OKAY
)
2779 goto X1X1
; /* t2 = x0x0 + x1x1 */
2780 if (mp_sub (&t2
, &t1
, &t1
) != MP_OKAY
)
2781 goto X1X1
; /* t1 = x0x0 + x1x1 - (x1-x0)*(x1-x0) */
2784 if (mp_lshd (&t1
, B
) != MP_OKAY
)
2785 goto X1X1
; /* t1 = (x0x0 + x1x1 - (x1-x0)*(x1-x0))<<B */
2786 if (mp_lshd (&x1x1
, B
* 2) != MP_OKAY
)
2787 goto X1X1
; /* x1x1 = x1x1 << 2*B */
2789 if (mp_add (&x0x0
, &t1
, &t1
) != MP_OKAY
)
2790 goto X1X1
; /* t1 = x0x0 + t1 */
2791 if (mp_add (&t1
, &x1x1
, b
) != MP_OKAY
)
2792 goto X1X1
; /* t1 = x0x0 + t1 + x1x1 */
2796 X1X1
:mp_clear (&x1x1
);
2797 X0X0
:mp_clear (&x0x0
);
2806 /* computes least common multiple as |a*b|/(a, b) */
2807 int mp_lcm (const mp_int
* a
, const mp_int
* b
, mp_int
* c
)
2813 if ((res
= mp_init_multi (&t1
, &t2
, NULL
)) != MP_OKAY
) {
2817 /* t1 = get the GCD of the two inputs */
2818 if ((res
= mp_gcd (a
, b
, &t1
)) != MP_OKAY
) {
2822 /* divide the smallest by the GCD */
2823 if (mp_cmp_mag(a
, b
) == MP_LT
) {
2824 /* store quotient in t2 so that t2 * b is the LCM */
2825 if ((res
= mp_div(a
, &t1
, &t2
, NULL
)) != MP_OKAY
) {
2828 res
= mp_mul(b
, &t2
, c
);
2830 /* store quotient in t2 so that t2 * a is the LCM */
2831 if ((res
= mp_div(b
, &t1
, &t2
, NULL
)) != MP_OKAY
) {
2834 res
= mp_mul(a
, &t2
, c
);
2837 /* fix the sign to positive */
2841 mp_clear_multi (&t1
, &t2
, NULL
);
2845 /* c = a mod b, 0 <= c < b */
2847 mp_mod (const mp_int
* a
, mp_int
* b
, mp_int
* c
)
2852 if ((res
= mp_init (&t
)) != MP_OKAY
) {
2856 if ((res
= mp_div (a
, b
, NULL
, &t
)) != MP_OKAY
) {
2861 if (t
.sign
!= b
->sign
) {
2862 res
= mp_add (b
, &t
, c
);
2873 mp_mod_d (const mp_int
* a
, mp_digit b
, mp_digit
* c
)
2875 return mp_div_d(a
, b
, NULL
, c
);
2879 static int mp_mul_2(const mp_int
* a
, mp_int
* b
)
2881 int x
, res
, oldused
;
2883 /* grow to accommodate result */
2884 if (b
->alloc
< a
->used
+ 1) {
2885 if ((res
= mp_grow (b
, a
->used
+ 1)) != MP_OKAY
) {
2894 register mp_digit r
, rr
, *tmpa
, *tmpb
;
2896 /* alias for source */
2899 /* alias for dest */
2904 for (x
= 0; x
< a
->used
; x
++) {
2906 /* get what will be the *next* carry bit from the
2907 * MSB of the current digit
2909 rr
= *tmpa
>> ((mp_digit
)(DIGIT_BIT
- 1));
2911 /* now shift up this digit, add in the carry [from the previous] */
2912 *tmpb
++ = ((*tmpa
++ << ((mp_digit
)1)) | r
) & MP_MASK
;
2914 /* copy the carry that would be from the source
2915 * digit into the next iteration
2920 /* new leading digit? */
2922 /* add a MSB which is always 1 at this point */
2927 /* now zero any excess digits on the destination
2928 * that we didn't write to
2930 tmpb
= b
->dp
+ b
->used
;
2931 for (x
= b
->used
; x
< oldused
; x
++) {
2940 * shifts with subtractions when the result is greater than b.
2942 * The method is slightly modified to shift B unconditionally up to just under
2943 * the leading bit of b. This saves a lot of multiple precision shifting.
2945 int mp_montgomery_calc_normalization (mp_int
* a
, const mp_int
* b
)
2949 /* how many bits of last digit does b use */
2950 bits
= mp_count_bits (b
) % DIGIT_BIT
;
2954 if ((res
= mp_2expt (a
, (b
->used
- 1) * DIGIT_BIT
+ bits
- 1)) != MP_OKAY
) {
2963 /* now compute C = A * B mod b */
2964 for (x
= bits
- 1; x
< DIGIT_BIT
; x
++) {
2965 if ((res
= mp_mul_2 (a
, a
)) != MP_OKAY
) {
2968 if (mp_cmp_mag (a
, b
) != MP_LT
) {
2969 if ((res
= s_mp_sub (a
, b
, a
)) != MP_OKAY
) {
2978 /* computes xR**-1 == x (mod N) via Montgomery Reduction */
2980 mp_montgomery_reduce (mp_int
* x
, const mp_int
* n
, mp_digit rho
)
2985 /* can the fast reduction [comba] method be used?
2987 * Note that unlike in mul you're safely allowed *less*
2988 * than the available columns [255 per default] since carries
2989 * are fixed up in the inner loop.
2991 digs
= n
->used
* 2 + 1;
2992 if ((digs
< MP_WARRAY
) &&
2994 (1 << ((CHAR_BIT
* sizeof (mp_word
)) - (2 * DIGIT_BIT
)))) {
2995 return fast_mp_montgomery_reduce (x
, n
, rho
);
2998 /* grow the input as required */
2999 if (x
->alloc
< digs
) {
3000 if ((res
= mp_grow (x
, digs
)) != MP_OKAY
) {
3006 for (ix
= 0; ix
< n
->used
; ix
++) {
3007 /* mu = ai * rho mod b
3009 * The value of rho must be precalculated via
3010 * montgomery_setup() such that
3011 * it equals -1/n0 mod b this allows the
3012 * following inner loop to reduce the
3013 * input one digit at a time
3015 mu
= (mp_digit
) (((mp_word
)x
->dp
[ix
]) * ((mp_word
)rho
) & MP_MASK
);
3017 /* a = a + mu * m * b**i */
3020 register mp_digit
*tmpn
, *tmpx
, u
;
3023 /* alias for digits of the modulus */
3026 /* alias for the digits of x [the input] */
3029 /* set the carry to zero */
3032 /* Multiply and add in place */
3033 for (iy
= 0; iy
< n
->used
; iy
++) {
3034 /* compute product and sum */
3035 r
= ((mp_word
)mu
) * ((mp_word
)*tmpn
++) +
3036 ((mp_word
) u
) + ((mp_word
) * tmpx
);
3039 u
= (mp_digit
)(r
>> ((mp_word
) DIGIT_BIT
));
3042 *tmpx
++ = (mp_digit
)(r
& ((mp_word
) MP_MASK
));
3044 /* At this point the ix'th digit of x should be zero */
3047 /* propagate carries upwards as required*/
3050 u
= *tmpx
>> DIGIT_BIT
;
3056 /* at this point the n.used'th least
3057 * significant digits of x are all zero
3058 * which means we can shift x to the
3059 * right by n.used digits and the
3060 * residue is unchanged.
3063 /* x = x/b**n.used */
3065 mp_rshd (x
, n
->used
);
3067 /* if x >= n then x = x - n */
3068 if (mp_cmp_mag (x
, n
) != MP_LT
) {
3069 return s_mp_sub (x
, n
, x
);
3075 /* setups the montgomery reduction stuff */
3077 mp_montgomery_setup (const mp_int
* n
, mp_digit
* rho
)
3081 /* fast inversion mod 2**k
3083 * Based on the fact that
3085 * XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n)
3086 * => 2*X*A - X*X*A*A = 1
3087 * => 2*(1) - (1) = 1
3095 x
= (((b
+ 2) & 4) << 1) + b
; /* here x*a==1 mod 2**4 */
3096 x
*= 2 - b
* x
; /* here x*a==1 mod 2**8 */
3097 x
*= 2 - b
* x
; /* here x*a==1 mod 2**16 */
3098 x
*= 2 - b
* x
; /* here x*a==1 mod 2**32 */
3100 /* rho = -1/m mod b */
3101 *rho
= (((mp_word
)1 << ((mp_word
) DIGIT_BIT
)) - x
) & MP_MASK
;
3106 /* high level multiplication (handles sign) */
3107 int mp_mul (const mp_int
* a
, const mp_int
* b
, mp_int
* c
)
3110 neg
= (a
->sign
== b
->sign
) ? MP_ZPOS
: MP_NEG
;
3112 /* use Karatsuba? */
3113 if (MIN (a
->used
, b
->used
) >= KARATSUBA_MUL_CUTOFF
) {
3114 res
= mp_karatsuba_mul (a
, b
, c
);
3117 /* can we use the fast multiplier?
3119 * The fast multiplier can be used if the output will
3120 * have less than MP_WARRAY digits and the number of
3121 * digits won't affect carry propagation
3123 int digs
= a
->used
+ b
->used
+ 1;
3125 if ((digs
< MP_WARRAY
) &&
3126 MIN(a
->used
, b
->used
) <=
3127 (1 << ((CHAR_BIT
* sizeof (mp_word
)) - (2 * DIGIT_BIT
)))) {
3128 res
= fast_s_mp_mul_digs (a
, b
, c
, digs
);
3130 res
= s_mp_mul (a
, b
, c
); /* uses s_mp_mul_digs */
3132 c
->sign
= (c
->used
> 0) ? neg
: MP_ZPOS
;
3136 /* d = a * b (mod c) */
3138 mp_mulmod (const mp_int
* a
, const mp_int
* b
, mp_int
* c
, mp_int
* d
)
3143 if ((res
= mp_init (&t
)) != MP_OKAY
) {
3147 if ((res
= mp_mul (a
, b
, &t
)) != MP_OKAY
) {
3151 res
= mp_mod (&t
, c
, d
);
3156 /* table of first PRIME_SIZE primes */
3157 static const mp_digit __prime_tab
[] = {
3158 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
3159 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
3160 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
3161 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083,
3162 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
3163 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
3164 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
3165 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
3167 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
3168 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
3169 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
3170 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
3171 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
3172 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
3173 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
3174 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
3176 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
3177 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
3178 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
3179 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
3180 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
3181 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
3182 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
3183 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
3185 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
3186 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
3187 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
3188 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
3189 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
3190 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
3191 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
3192 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
3195 /* determines if an integers is divisible by one
3196 * of the first PRIME_SIZE primes or not
3198 * sets result to 0 if not, 1 if yes
3200 static int mp_prime_is_divisible (const mp_int
* a
, int *result
)
3205 /* default to not */
3208 for (ix
= 0; ix
< PRIME_SIZE
; ix
++) {
3209 /* what is a mod __prime_tab[ix] */
3210 if ((err
= mp_mod_d (a
, __prime_tab
[ix
], &res
)) != MP_OKAY
) {
3214 /* is the residue zero? */
3224 /* Miller-Rabin test of "a" to the base of "b" as described in
3225 * HAC pp. 139 Algorithm 4.24
3227 * Sets result to 0 if definitely composite or 1 if probably prime.
3228 * Randomly the chance of error is no more than 1/4 and often
3231 static int mp_prime_miller_rabin (mp_int
* a
, const mp_int
* b
, int *result
)
3240 if (mp_cmp_d(b
, 1) != MP_GT
) {
3244 /* get n1 = a - 1 */
3245 if ((err
= mp_init_copy (&n1
, a
)) != MP_OKAY
) {
3248 if ((err
= mp_sub_d (&n1
, 1, &n1
)) != MP_OKAY
) {
3252 /* set 2**s * r = n1 */
3253 if ((err
= mp_init_copy (&r
, &n1
)) != MP_OKAY
) {
3257 /* count the number of least significant bits
3262 /* now divide n - 1 by 2**s */
3263 if ((err
= mp_div_2d (&r
, s
, &r
, NULL
)) != MP_OKAY
) {
3267 /* compute y = b**r mod a */
3268 if ((err
= mp_init (&y
)) != MP_OKAY
) {
3271 if ((err
= mp_exptmod (b
, &r
, a
, &y
)) != MP_OKAY
) {
3275 /* if y != 1 and y != n1 do */
3276 if (mp_cmp_d (&y
, 1) != MP_EQ
&& mp_cmp (&y
, &n1
) != MP_EQ
) {
3278 /* while j <= s-1 and y != n1 */
3279 while ((j
<= (s
- 1)) && mp_cmp (&y
, &n1
) != MP_EQ
) {
3280 if ((err
= mp_sqrmod (&y
, a
, &y
)) != MP_OKAY
) {
3284 /* if y == 1 then composite */
3285 if (mp_cmp_d (&y
, 1) == MP_EQ
) {
3292 /* if y != n1 then composite */
3293 if (mp_cmp (&y
, &n1
) != MP_EQ
) {
3298 /* probably prime now */
3302 __N1
:mp_clear (&n1
);
3306 /* performs a variable number of rounds of Miller-Rabin
3308 * Probability of error after t rounds is no more than
3311 * Sets result to 1 if probably prime, 0 otherwise
3313 static int mp_prime_is_prime (mp_int
* a
, int t
, int *result
)
3321 /* valid value of t? */
3322 if (t
<= 0 || t
> PRIME_SIZE
) {
3326 /* is the input equal to one of the primes in the table? */
3327 for (ix
= 0; ix
< PRIME_SIZE
; ix
++) {
3328 if (mp_cmp_d(a
, __prime_tab
[ix
]) == MP_EQ
) {
3334 /* first perform trial division */
3335 if ((err
= mp_prime_is_divisible (a
, &res
)) != MP_OKAY
) {
3339 /* return if it was trivially divisible */
3340 if (res
== MP_YES
) {
3344 /* now perform the miller-rabin rounds */
3345 if ((err
= mp_init (&b
)) != MP_OKAY
) {
3349 for (ix
= 0; ix
< t
; ix
++) {
3351 mp_set (&b
, __prime_tab
[ix
]);
3353 if ((err
= mp_prime_miller_rabin (a
, &b
, &res
)) != MP_OKAY
) {
3362 /* passed the test */
3368 static const struct {
3381 /* returns # of RM trials required for a given bit size */
3382 int mp_prime_rabin_miller_trials(int size
)
3386 for (x
= 0; x
< (int)(sizeof(sizes
)/(sizeof(sizes
[0]))); x
++) {
3387 if (sizes
[x
].k
== size
) {
3389 } else if (sizes
[x
].k
> size
) {
3390 return (x
== 0) ? sizes
[0].t
: sizes
[x
- 1].t
;
3393 return sizes
[x
-1].t
+ 1;
3396 /* makes a truly random prime of a given size (bits),
3398 * Flags are as follows:
3400 * LTM_PRIME_BBS - make prime congruent to 3 mod 4
3401 * LTM_PRIME_SAFE - make sure (p-1)/2 is prime as well (implies LTM_PRIME_BBS)
3402 * LTM_PRIME_2MSB_OFF - make the 2nd highest bit zero
3403 * LTM_PRIME_2MSB_ON - make the 2nd highest bit one
3405 * You have to supply a callback which fills in a buffer with random bytes. "dat" is a parameter you can
3406 * have passed to the callback (e.g. a state or something). This function doesn't use "dat" itself
3411 /* This is possibly the mother of all prime generation functions, muahahahahaha! */
3412 int mp_prime_random_ex(mp_int
*a
, int t
, int size
, int flags
, ltm_prime_callback cb
, void *dat
)
3414 unsigned char *tmp
, maskAND
, maskOR_msb
, maskOR_lsb
;
3415 int res
, err
, bsize
, maskOR_msb_offset
;
3417 /* sanity check the input */
3418 if (size
<= 1 || t
<= 0) {
3422 /* LTM_PRIME_SAFE implies LTM_PRIME_BBS */
3423 if (flags
& LTM_PRIME_SAFE
) {
3424 flags
|= LTM_PRIME_BBS
;
3427 /* calc the byte size */
3428 bsize
= (size
>>3)+((size
&7)?1:0);
3430 /* we need a buffer of bsize bytes */
3431 tmp
= HeapAlloc(GetProcessHeap(), 0, bsize
);
3436 /* calc the maskAND value for the MSbyte*/
3437 maskAND
= ((size
&7) == 0) ? 0xFF : (0xFF >> (8 - (size
& 7)));
3439 /* calc the maskOR_msb */
3441 maskOR_msb_offset
= ((size
& 7) == 1) ? 1 : 0;
3442 if (flags
& LTM_PRIME_2MSB_ON
) {
3443 maskOR_msb
|= 1 << ((size
- 2) & 7);
3444 } else if (flags
& LTM_PRIME_2MSB_OFF
) {
3445 maskAND
&= ~(1 << ((size
- 2) & 7));
3448 /* get the maskOR_lsb */
3450 if (flags
& LTM_PRIME_BBS
) {
3455 /* read the bytes */
3456 if (cb(tmp
, bsize
, dat
) != bsize
) {
3461 /* work over the MSbyte */
3463 tmp
[0] |= 1 << ((size
- 1) & 7);
3465 /* mix in the maskORs */
3466 tmp
[maskOR_msb_offset
] |= maskOR_msb
;
3467 tmp
[bsize
-1] |= maskOR_lsb
;
3470 if ((err
= mp_read_unsigned_bin(a
, tmp
, bsize
)) != MP_OKAY
) { goto error
; }
3473 if ((err
= mp_prime_is_prime(a
, t
, &res
)) != MP_OKAY
) { goto error
; }
3478 if (flags
& LTM_PRIME_SAFE
) {
3479 /* see if (a-1)/2 is prime */
3480 if ((err
= mp_sub_d(a
, 1, a
)) != MP_OKAY
) { goto error
; }
3481 if ((err
= mp_div_2(a
, a
)) != MP_OKAY
) { goto error
; }
3484 if ((err
= mp_prime_is_prime(a
, t
, &res
)) != MP_OKAY
) { goto error
; }
3486 } while (res
== MP_NO
);
3488 if (flags
& LTM_PRIME_SAFE
) {
3489 /* restore a to the original value */
3490 if ((err
= mp_mul_2(a
, a
)) != MP_OKAY
) { goto error
; }
3491 if ((err
= mp_add_d(a
, 1, a
)) != MP_OKAY
) { goto error
; }
3496 HeapFree(GetProcessHeap(), 0, tmp
);
3500 /* reads an unsigned char array, assumes the msb is stored first [big endian] */
3502 mp_read_unsigned_bin (mp_int
* a
, const unsigned char *b
, int c
)
3506 /* make sure there are at least two digits */
3508 if ((res
= mp_grow(a
, 2)) != MP_OKAY
) {
3516 /* read the bytes in */
3518 if ((res
= mp_mul_2d (a
, 8, a
)) != MP_OKAY
) {
3529 /* reduces x mod m, assumes 0 < x < m**2, mu is
3530 * precomputed via mp_reduce_setup.
3531 * From HAC pp.604 Algorithm 14.42
3534 mp_reduce (mp_int
* x
, const mp_int
* m
, const mp_int
* mu
)
3537 int res
, um
= m
->used
;
3540 if ((res
= mp_init_copy (&q
, x
)) != MP_OKAY
) {
3544 /* q1 = x / b**(k-1) */
3545 mp_rshd (&q
, um
- 1);
3547 /* according to HAC this optimization is ok */
3548 if (((unsigned long) um
) > (((mp_digit
)1) << (DIGIT_BIT
- 1))) {
3549 if ((res
= mp_mul (&q
, mu
, &q
)) != MP_OKAY
) {
3553 if ((res
= s_mp_mul_high_digs (&q
, mu
, &q
, um
- 1)) != MP_OKAY
) {
3558 /* q3 = q2 / b**(k+1) */
3559 mp_rshd (&q
, um
+ 1);
3561 /* x = x mod b**(k+1), quick (no division) */
3562 if ((res
= mp_mod_2d (x
, DIGIT_BIT
* (um
+ 1), x
)) != MP_OKAY
) {
3566 /* q = q * m mod b**(k+1), quick (no division) */
3567 if ((res
= s_mp_mul_digs (&q
, m
, &q
, um
+ 1)) != MP_OKAY
) {
3572 if ((res
= mp_sub (x
, &q
, x
)) != MP_OKAY
) {
3576 /* If x < 0, add b**(k+1) to it */
3577 if (mp_cmp_d (x
, 0) == MP_LT
) {
3579 if ((res
= mp_lshd (&q
, um
+ 1)) != MP_OKAY
)
3581 if ((res
= mp_add (x
, &q
, x
)) != MP_OKAY
)
3585 /* Back off if it's too big */
3586 while (mp_cmp (x
, m
) != MP_LT
) {
3587 if ((res
= s_mp_sub (x
, m
, x
)) != MP_OKAY
) {
3598 /* reduces a modulo n where n is of the form 2**p - d */
3600 mp_reduce_2k(mp_int
*a
, const mp_int
*n
, mp_digit d
)
3605 if ((res
= mp_init(&q
)) != MP_OKAY
) {
3609 p
= mp_count_bits(n
);
3611 /* q = a/2**p, a = a mod 2**p */
3612 if ((res
= mp_div_2d(a
, p
, &q
, a
)) != MP_OKAY
) {
3618 if ((res
= mp_mul_d(&q
, d
, &q
)) != MP_OKAY
) {
3624 if ((res
= s_mp_add(a
, &q
, a
)) != MP_OKAY
) {
3628 if (mp_cmp_mag(a
, n
) != MP_LT
) {
3638 /* determines the setup value */
3640 mp_reduce_2k_setup(const mp_int
*a
, mp_digit
*d
)
3645 if ((res
= mp_init(&tmp
)) != MP_OKAY
) {
3649 p
= mp_count_bits(a
);
3650 if ((res
= mp_2expt(&tmp
, p
)) != MP_OKAY
) {
3655 if ((res
= s_mp_sub(&tmp
, a
, &tmp
)) != MP_OKAY
) {
3665 /* pre-calculate the value required for Barrett reduction
3666 * For a given modulus "b" it calculates the value required in "a"
3668 int mp_reduce_setup (mp_int
* a
, const mp_int
* b
)
3672 if ((res
= mp_2expt (a
, b
->used
* 2 * DIGIT_BIT
)) != MP_OKAY
) {
3675 return mp_div (a
, b
, a
, NULL
);
3678 /* set to a digit */
3679 void mp_set (mp_int
* a
, mp_digit b
)
3682 a
->dp
[0] = b
& MP_MASK
;
3683 a
->used
= (a
->dp
[0] != 0) ? 1 : 0;
3686 /* set a 32-bit const */
3687 int mp_set_int (mp_int
* a
, unsigned long b
)
3693 /* set four bits at a time */
3694 for (x
= 0; x
< 8; x
++) {
3695 /* shift the number up four bits */
3696 if ((res
= mp_mul_2d (a
, 4, a
)) != MP_OKAY
) {
3700 /* OR in the top four bits of the source */
3701 a
->dp
[0] |= (b
>> 28) & 15;
3703 /* shift the source up to the next four bits */
3706 /* ensure that digits are not clamped off */
3713 /* shrink a bignum */
3714 int mp_shrink (mp_int
* a
)
3717 if (a
->alloc
!= a
->used
&& a
->used
> 0) {
3718 if ((tmp
= HeapReAlloc(GetProcessHeap(), 0, a
->dp
, sizeof (mp_digit
) * a
->used
)) == NULL
) {
3727 /* computes b = a*a */
3729 mp_sqr (const mp_int
* a
, mp_int
* b
)
3733 if (a
->used
>= KARATSUBA_SQR_CUTOFF
) {
3734 res
= mp_karatsuba_sqr (a
, b
);
3737 /* can we use the fast comba multiplier? */
3738 if ((a
->used
* 2 + 1) < MP_WARRAY
&&
3740 (1 << (sizeof(mp_word
) * CHAR_BIT
- 2*DIGIT_BIT
- 1))) {
3741 res
= fast_s_mp_sqr (a
, b
);
3743 res
= s_mp_sqr (a
, b
);
3749 /* c = a * a (mod b) */
3751 mp_sqrmod (const mp_int
* a
, mp_int
* b
, mp_int
* c
)
3756 if ((res
= mp_init (&t
)) != MP_OKAY
) {
3760 if ((res
= mp_sqr (a
, &t
)) != MP_OKAY
) {
3764 res
= mp_mod (&t
, b
, c
);
3769 /* high level subtraction (handles signs) */
3771 mp_sub (mp_int
* a
, mp_int
* b
, mp_int
* c
)
3779 /* subtract a negative from a positive, OR */
3780 /* subtract a positive from a negative. */
3781 /* In either case, ADD their magnitudes, */
3782 /* and use the sign of the first number. */
3784 res
= s_mp_add (a
, b
, c
);
3786 /* subtract a positive from a positive, OR */
3787 /* subtract a negative from a negative. */
3788 /* First, take the difference between their */
3789 /* magnitudes, then... */
3790 if (mp_cmp_mag (a
, b
) != MP_LT
) {
3791 /* Copy the sign from the first */
3793 /* The first has a larger or equal magnitude */
3794 res
= s_mp_sub (a
, b
, c
);
3796 /* The result has the *opposite* sign from */
3797 /* the first number. */
3798 c
->sign
= (sa
== MP_ZPOS
) ? MP_NEG
: MP_ZPOS
;
3799 /* The second has a larger magnitude */
3800 res
= s_mp_sub (b
, a
, c
);
3806 /* single digit subtraction */
3808 mp_sub_d (mp_int
* a
, mp_digit b
, mp_int
* c
)
3810 mp_digit
*tmpa
, *tmpc
, mu
;
3811 int res
, ix
, oldused
;
3813 /* grow c as required */
3814 if (c
->alloc
< a
->used
+ 1) {
3815 if ((res
= mp_grow(c
, a
->used
+ 1)) != MP_OKAY
) {
3820 /* if a is negative just do an unsigned
3821 * addition [with fudged signs]
3823 if (a
->sign
== MP_NEG
) {
3825 res
= mp_add_d(a
, b
, c
);
3826 a
->sign
= c
->sign
= MP_NEG
;
3835 /* if a <= b simply fix the single digit */
3836 if ((a
->used
== 1 && a
->dp
[0] <= b
) || a
->used
== 0) {
3838 *tmpc
++ = b
- *tmpa
;
3844 /* negative/1digit */
3852 /* subtract first digit */
3853 *tmpc
= *tmpa
++ - b
;
3854 mu
= *tmpc
>> (sizeof(mp_digit
) * CHAR_BIT
- 1);
3857 /* handle rest of the digits */
3858 for (ix
= 1; ix
< a
->used
; ix
++) {
3859 *tmpc
= *tmpa
++ - mu
;
3860 mu
= *tmpc
>> (sizeof(mp_digit
) * CHAR_BIT
- 1);
3865 /* zero excess digits */
3866 while (ix
++ < oldused
) {
3873 /* store in unsigned [big endian] format */
3875 mp_to_unsigned_bin (const mp_int
* a
, unsigned char *b
)
3880 if ((res
= mp_init_copy (&t
, a
)) != MP_OKAY
) {
3885 while (mp_iszero (&t
) == 0) {
3886 b
[x
++] = (unsigned char) (t
.dp
[0] & 255);
3887 if ((res
= mp_div_2d (&t
, 8, &t
, NULL
)) != MP_OKAY
) {
3897 /* get the size for an unsigned equivalent */
3899 mp_unsigned_bin_size (const mp_int
* a
)
3901 int size
= mp_count_bits (a
);
3902 return (size
/ 8 + ((size
& 7) != 0 ? 1 : 0));
3905 /* reverse an array, used for radix code */
3907 bn_reverse (unsigned char *s
, int len
)
3923 /* low level addition, based on HAC pp.594, Algorithm 14.7 */
3925 s_mp_add (mp_int
* a
, mp_int
* b
, mp_int
* c
)
3928 int olduse
, res
, min
, max
;
3930 /* find sizes, we let |a| <= |b| which means we have to sort
3931 * them. "x" will point to the input with the most digits
3933 if (a
->used
> b
->used
) {
3944 if (c
->alloc
< max
+ 1) {
3945 if ((res
= mp_grow (c
, max
+ 1)) != MP_OKAY
) {
3950 /* get old used digit count and set new one */
3955 register mp_digit u
, *tmpa
, *tmpb
, *tmpc
;
3958 /* alias for digit pointers */
3969 /* zero the carry */
3971 for (i
= 0; i
< min
; i
++) {
3972 /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
3973 *tmpc
= *tmpa
++ + *tmpb
++ + u
;
3975 /* U = carry bit of T[i] */
3976 u
= *tmpc
>> ((mp_digit
)DIGIT_BIT
);
3978 /* take away carry bit from T[i] */
3982 /* now copy higher words if any, that is in A+B
3983 * if A or B has more digits add those in
3986 for (; i
< max
; i
++) {
3987 /* T[i] = X[i] + U */
3988 *tmpc
= x
->dp
[i
] + u
;
3990 /* U = carry bit of T[i] */
3991 u
= *tmpc
>> ((mp_digit
)DIGIT_BIT
);
3993 /* take away carry bit from T[i] */
4001 /* clear digits above oldused */
4002 for (i
= c
->used
; i
< olduse
; i
++) {
4011 static int s_mp_exptmod (const mp_int
* G
, const mp_int
* X
, mp_int
* P
, mp_int
* Y
)
4013 mp_int M
[256], res
, mu
;
4015 int err
, bitbuf
, bitcpy
, bitcnt
, mode
, digidx
, x
, y
, winsize
;
4017 /* find window size */
4018 x
= mp_count_bits (X
);
4021 } else if (x
<= 36) {
4023 } else if (x
<= 140) {
4025 } else if (x
<= 450) {
4027 } else if (x
<= 1303) {
4029 } else if (x
<= 3529) {
4036 /* init first cell */
4037 if ((err
= mp_init(&M
[1])) != MP_OKAY
) {
4041 /* now init the second half of the array */
4042 for (x
= 1<<(winsize
-1); x
< (1 << winsize
); x
++) {
4043 if ((err
= mp_init(&M
[x
])) != MP_OKAY
) {
4044 for (y
= 1<<(winsize
-1); y
< x
; y
++) {
4052 /* create mu, used for Barrett reduction */
4053 if ((err
= mp_init (&mu
)) != MP_OKAY
) {
4056 if ((err
= mp_reduce_setup (&mu
, P
)) != MP_OKAY
) {
4062 * The M table contains powers of the base,
4063 * e.g. M[x] = G**x mod P
4065 * The first half of the table is not
4066 * computed though accept for M[0] and M[1]
4068 if ((err
= mp_mod (G
, P
, &M
[1])) != MP_OKAY
) {
4072 /* compute the value at M[1<<(winsize-1)] by squaring
4073 * M[1] (winsize-1) times
4075 if ((err
= mp_copy (&M
[1], &M
[1 << (winsize
- 1)])) != MP_OKAY
) {
4079 for (x
= 0; x
< (winsize
- 1); x
++) {
4080 if ((err
= mp_sqr (&M
[1 << (winsize
- 1)],
4081 &M
[1 << (winsize
- 1)])) != MP_OKAY
) {
4084 if ((err
= mp_reduce (&M
[1 << (winsize
- 1)], P
, &mu
)) != MP_OKAY
) {
4089 /* create upper table, that is M[x] = M[x-1] * M[1] (mod P)
4090 * for x = (2**(winsize - 1) + 1) to (2**winsize - 1)
4092 for (x
= (1 << (winsize
- 1)) + 1; x
< (1 << winsize
); x
++) {
4093 if ((err
= mp_mul (&M
[x
- 1], &M
[1], &M
[x
])) != MP_OKAY
) {
4096 if ((err
= mp_reduce (&M
[x
], P
, &mu
)) != MP_OKAY
) {
4102 if ((err
= mp_init (&res
)) != MP_OKAY
) {
4107 /* set initial mode and bit cnt */
4111 digidx
= X
->used
- 1;
4116 /* grab next digit as required */
4117 if (--bitcnt
== 0) {
4118 /* if digidx == -1 we are out of digits */
4122 /* read next digit and reset the bitcnt */
4123 buf
= X
->dp
[digidx
--];
4127 /* grab the next msb from the exponent */
4128 y
= (buf
>> (mp_digit
)(DIGIT_BIT
- 1)) & 1;
4129 buf
<<= (mp_digit
)1;
4131 /* if the bit is zero and mode == 0 then we ignore it
4132 * These represent the leading zero bits before the first 1 bit
4133 * in the exponent. Technically this opt is not required but it
4134 * does lower the # of trivial squaring/reductions used
4136 if (mode
== 0 && y
== 0) {
4140 /* if the bit is zero and mode == 1 then we square */
4141 if (mode
== 1 && y
== 0) {
4142 if ((err
= mp_sqr (&res
, &res
)) != MP_OKAY
) {
4145 if ((err
= mp_reduce (&res
, P
, &mu
)) != MP_OKAY
) {
4151 /* else we add it to the window */
4152 bitbuf
|= (y
<< (winsize
- ++bitcpy
));
4155 if (bitcpy
== winsize
) {
4156 /* ok window is filled so square as required and multiply */
4158 for (x
= 0; x
< winsize
; x
++) {
4159 if ((err
= mp_sqr (&res
, &res
)) != MP_OKAY
) {
4162 if ((err
= mp_reduce (&res
, P
, &mu
)) != MP_OKAY
) {
4168 if ((err
= mp_mul (&res
, &M
[bitbuf
], &res
)) != MP_OKAY
) {
4171 if ((err
= mp_reduce (&res
, P
, &mu
)) != MP_OKAY
) {
4175 /* empty window and reset */
4182 /* if bits remain then square/multiply */
4183 if (mode
== 2 && bitcpy
> 0) {
4184 /* square then multiply if the bit is set */
4185 for (x
= 0; x
< bitcpy
; x
++) {
4186 if ((err
= mp_sqr (&res
, &res
)) != MP_OKAY
) {
4189 if ((err
= mp_reduce (&res
, P
, &mu
)) != MP_OKAY
) {
4194 if ((bitbuf
& (1 << winsize
)) != 0) {
4196 if ((err
= mp_mul (&res
, &M
[1], &res
)) != MP_OKAY
) {
4199 if ((err
= mp_reduce (&res
, P
, &mu
)) != MP_OKAY
) {
4208 __RES
:mp_clear (&res
);
4209 __MU
:mp_clear (&mu
);
4212 for (x
= 1<<(winsize
-1); x
< (1 << winsize
); x
++) {
4218 /* multiplies |a| * |b| and only computes up to digs digits of result
4219 * HAC pp. 595, Algorithm 14.12 Modified so you can control how
4220 * many digits of output are created.
4223 s_mp_mul_digs (const mp_int
* a
, const mp_int
* b
, mp_int
* c
, int digs
)
4226 int res
, pa
, pb
, ix
, iy
;
4229 mp_digit tmpx
, *tmpt
, *tmpy
;
4231 /* can we use the fast multiplier? */
4232 if (((digs
) < MP_WARRAY
) &&
4233 MIN (a
->used
, b
->used
) <
4234 (1 << ((CHAR_BIT
* sizeof (mp_word
)) - (2 * DIGIT_BIT
)))) {
4235 return fast_s_mp_mul_digs (a
, b
, c
, digs
);
4238 if ((res
= mp_init_size (&t
, digs
)) != MP_OKAY
) {
4243 /* compute the digits of the product directly */
4245 for (ix
= 0; ix
< pa
; ix
++) {
4246 /* set the carry to zero */
4249 /* limit ourselves to making digs digits of output */
4250 pb
= MIN (b
->used
, digs
- ix
);
4252 /* setup some aliases */
4253 /* copy of the digit from a used within the nested loop */
4256 /* an alias for the destination shifted ix places */
4259 /* an alias for the digits of b */
4262 /* compute the columns of the output and propagate the carry */
4263 for (iy
= 0; iy
< pb
; iy
++) {
4264 /* compute the column as a mp_word */
4265 r
= ((mp_word
)*tmpt
) +
4266 ((mp_word
)tmpx
) * ((mp_word
)*tmpy
++) +
4269 /* the new column is the lower part of the result */
4270 *tmpt
++ = (mp_digit
) (r
& ((mp_word
) MP_MASK
));
4272 /* get the carry word from the result */
4273 u
= (mp_digit
) (r
>> ((mp_word
) DIGIT_BIT
));
4275 /* set carry if it is placed below digs */
4276 if (ix
+ iy
< digs
) {
4288 /* multiplies |a| * |b| and does not compute the lower digs digits
4289 * [meant to get the higher part of the product]
4292 s_mp_mul_high_digs (const mp_int
* a
, const mp_int
* b
, mp_int
* c
, int digs
)
4295 int res
, pa
, pb
, ix
, iy
;
4298 mp_digit tmpx
, *tmpt
, *tmpy
;
4300 /* can we use the fast multiplier? */
4301 if (((a
->used
+ b
->used
+ 1) < MP_WARRAY
)
4302 && MIN (a
->used
, b
->used
) < (1 << ((CHAR_BIT
* sizeof (mp_word
)) - (2 * DIGIT_BIT
)))) {
4303 return fast_s_mp_mul_high_digs (a
, b
, c
, digs
);
4306 if ((res
= mp_init_size (&t
, a
->used
+ b
->used
+ 1)) != MP_OKAY
) {
4309 t
.used
= a
->used
+ b
->used
+ 1;
4313 for (ix
= 0; ix
< pa
; ix
++) {
4314 /* clear the carry */
4317 /* left hand side of A[ix] * B[iy] */
4320 /* alias to the address of where the digits will be stored */
4321 tmpt
= &(t
.dp
[digs
]);
4323 /* alias for where to read the right hand side from */
4324 tmpy
= b
->dp
+ (digs
- ix
);
4326 for (iy
= digs
- ix
; iy
< pb
; iy
++) {
4327 /* calculate the double precision result */
4328 r
= ((mp_word
)*tmpt
) +
4329 ((mp_word
)tmpx
) * ((mp_word
)*tmpy
++) +
4332 /* get the lower part */
4333 *tmpt
++ = (mp_digit
) (r
& ((mp_word
) MP_MASK
));
4335 /* carry the carry */
4336 u
= (mp_digit
) (r
>> ((mp_word
) DIGIT_BIT
));
4346 /* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
4348 s_mp_sqr (const mp_int
* a
, mp_int
* b
)
4351 int res
, ix
, iy
, pa
;
4353 mp_digit u
, tmpx
, *tmpt
;
4356 if ((res
= mp_init_size (&t
, 2*pa
+ 1)) != MP_OKAY
) {
4360 /* default used is maximum possible size */
4363 for (ix
= 0; ix
< pa
; ix
++) {
4364 /* first calculate the digit at 2*ix */
4365 /* calculate double precision result */
4366 r
= ((mp_word
) t
.dp
[2*ix
]) +
4367 ((mp_word
)a
->dp
[ix
])*((mp_word
)a
->dp
[ix
]);
4369 /* store lower part in result */
4370 t
.dp
[ix
+ix
] = (mp_digit
) (r
& ((mp_word
) MP_MASK
));
4373 u
= (mp_digit
)(r
>> ((mp_word
) DIGIT_BIT
));
4375 /* left hand side of A[ix] * A[iy] */
4378 /* alias for where to store the results */
4379 tmpt
= t
.dp
+ (2*ix
+ 1);
4381 for (iy
= ix
+ 1; iy
< pa
; iy
++) {
4382 /* first calculate the product */
4383 r
= ((mp_word
)tmpx
) * ((mp_word
)a
->dp
[iy
]);
4385 /* now calculate the double precision result, note we use
4386 * addition instead of *2 since it's easier to optimize
4388 r
= ((mp_word
) *tmpt
) + r
+ r
+ ((mp_word
) u
);
4390 /* store lower part */
4391 *tmpt
++ = (mp_digit
) (r
& ((mp_word
) MP_MASK
));
4394 u
= (mp_digit
)(r
>> ((mp_word
) DIGIT_BIT
));
4396 /* propagate upwards */
4398 r
= ((mp_word
) *tmpt
) + ((mp_word
) u
);
4399 *tmpt
++ = (mp_digit
) (r
& ((mp_word
) MP_MASK
));
4400 u
= (mp_digit
)(r
>> ((mp_word
) DIGIT_BIT
));
4410 /* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */
4412 s_mp_sub (const mp_int
* a
, const mp_int
* b
, mp_int
* c
)
4414 int olduse
, res
, min
, max
;
4421 if (c
->alloc
< max
) {
4422 if ((res
= mp_grow (c
, max
)) != MP_OKAY
) {
4430 register mp_digit u
, *tmpa
, *tmpb
, *tmpc
;
4433 /* alias for digit pointers */
4438 /* set carry to zero */
4440 for (i
= 0; i
< min
; i
++) {
4441 /* T[i] = A[i] - B[i] - U */
4442 *tmpc
= *tmpa
++ - *tmpb
++ - u
;
4444 /* U = carry bit of T[i]
4445 * Note this saves performing an AND operation since
4446 * if a carry does occur it will propagate all the way to the
4447 * MSB. As a result a single shift is enough to get the carry
4449 u
= *tmpc
>> ((mp_digit
)(CHAR_BIT
* sizeof (mp_digit
) - 1));
4451 /* Clear carry from T[i] */
4455 /* now copy higher words if any, e.g. if A has more digits than B */
4456 for (; i
< max
; i
++) {
4457 /* T[i] = A[i] - U */
4458 *tmpc
= *tmpa
++ - u
;
4460 /* U = carry bit of T[i] */
4461 u
= *tmpc
>> ((mp_digit
)(CHAR_BIT
* sizeof (mp_digit
) - 1));
4463 /* Clear carry from T[i] */
4467 /* clear digits above used (since we may not have grown result above) */
4468 for (i
= c
->used
; i
< olduse
; i
++) {