3 * Multi Precision Integer functions
5 * Copyright 2004 Michael Jung
6 * Based on public domain code by Tom St Denis (tomstdenis@iahu.ca)
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
24 * This file contains code from the LibTomCrypt cryptographic
25 * library written by Tom St Denis (tomstdenis@iahu.ca). LibTomCrypt
26 * is in the public domain. The code in this file is tailored to
27 * special requirements. Take a look at http://libtomcrypt.org for the
34 /* Known optimal configurations
35 CPU /Compiler /MUL CUTOFF/SQR CUTOFF
36 -------------------------------------------------------------
37 Intel P4 Northwood /GCC v3.4.1 / 88/ 128/LTM 0.32 ;-)
39 static const int KARATSUBA_MUL_CUTOFF
= 88, /* Min. number of digits before Karatsuba multiplication is used. */
40 KARATSUBA_SQR_CUTOFF
= 128; /* Min. number of digits before Karatsuba squaring is used. */
42 /* computes the modular inverse via binary extended euclidean algorithm,
43 * that is c = 1/a mod b
45 * Based on slow invmod except this is optimized for the case where b is
46 * odd as per HAC Note 14.64 on pp. 610
49 fast_mp_invmod (const mp_int
* a
, mp_int
* b
, mp_int
* c
)
51 mp_int x
, y
, u
, v
, B
, D
;
54 /* 2. [modified] b must be odd */
55 if (mp_iseven (b
) == 1) {
59 /* init all our temps */
60 if ((res
= mp_init_multi(&x
, &y
, &u
, &v
, &B
, &D
, NULL
)) != MP_OKAY
) {
64 /* x == modulus, y == value to invert */
65 if ((res
= mp_copy (b
, &x
)) != MP_OKAY
) {
70 if ((res
= mp_abs (a
, &y
)) != MP_OKAY
) {
74 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
75 if ((res
= mp_copy (&x
, &u
)) != MP_OKAY
) {
78 if ((res
= mp_copy (&y
, &v
)) != MP_OKAY
) {
84 /* 4. while u is even do */
85 while (mp_iseven (&u
) == 1) {
87 if ((res
= mp_div_2 (&u
, &u
)) != MP_OKAY
) {
90 /* 4.2 if B is odd then */
91 if (mp_isodd (&B
) == 1) {
92 if ((res
= mp_sub (&B
, &x
, &B
)) != MP_OKAY
) {
97 if ((res
= mp_div_2 (&B
, &B
)) != MP_OKAY
) {
102 /* 5. while v is even do */
103 while (mp_iseven (&v
) == 1) {
105 if ((res
= mp_div_2 (&v
, &v
)) != MP_OKAY
) {
108 /* 5.2 if D is odd then */
109 if (mp_isodd (&D
) == 1) {
111 if ((res
= mp_sub (&D
, &x
, &D
)) != MP_OKAY
) {
116 if ((res
= mp_div_2 (&D
, &D
)) != MP_OKAY
) {
121 /* 6. if u >= v then */
122 if (mp_cmp (&u
, &v
) != MP_LT
) {
123 /* u = u - v, B = B - D */
124 if ((res
= mp_sub (&u
, &v
, &u
)) != MP_OKAY
) {
128 if ((res
= mp_sub (&B
, &D
, &B
)) != MP_OKAY
) {
132 /* v - v - u, D = D - B */
133 if ((res
= mp_sub (&v
, &u
, &v
)) != MP_OKAY
) {
137 if ((res
= mp_sub (&D
, &B
, &D
)) != MP_OKAY
) {
142 /* if not zero goto step 4 */
143 if (mp_iszero (&u
) == 0) {
147 /* now a = C, b = D, gcd == g*v */
149 /* if v != 1 then there is no inverse */
150 if (mp_cmp_d (&v
, 1) != MP_EQ
) {
155 /* b is now the inverse */
157 while (D
.sign
== MP_NEG
) {
158 if ((res
= mp_add (&D
, b
, &D
)) != MP_OKAY
) {
166 __ERR
:mp_clear_multi (&x
, &y
, &u
, &v
, &B
, &D
, NULL
);
170 /* computes xR**-1 == x (mod N) via Montgomery Reduction
172 * This is an optimized implementation of montgomery_reduce
173 * which uses the comba method to quickly calculate the columns of the
176 * Based on Algorithm 14.32 on pp.601 of HAC.
179 fast_mp_montgomery_reduce (mp_int
* x
, const mp_int
* n
, mp_digit rho
)
182 mp_word W
[MP_WARRAY
];
184 /* get old used count */
187 /* grow a as required */
188 if (x
->alloc
< n
->used
+ 1) {
189 if ((res
= mp_grow (x
, n
->used
+ 1)) != MP_OKAY
) {
194 /* first we have to get the digits of the input into
195 * an array of double precision words W[...]
198 register mp_word
*_W
;
199 register mp_digit
*tmpx
;
201 /* alias for the W[] array */
204 /* alias for the digits of x*/
207 /* copy the digits of a into W[0..a->used-1] */
208 for (ix
= 0; ix
< x
->used
; ix
++) {
212 /* zero the high words of W[a->used..m->used*2] */
213 for (; ix
< n
->used
* 2 + 1; ix
++) {
218 /* now we proceed to zero successive digits
219 * from the least significant upwards
221 for (ix
= 0; ix
< n
->used
; ix
++) {
222 /* mu = ai * m' mod b
224 * We avoid a double precision multiplication (which isn't required)
225 * by casting the value down to a mp_digit. Note this requires
226 * that W[ix-1] have the carry cleared (see after the inner loop)
228 register mp_digit mu
;
229 mu
= (mp_digit
) (((W
[ix
] & MP_MASK
) * rho
) & MP_MASK
);
231 /* a = a + mu * m * b**i
233 * This is computed in place and on the fly. The multiplication
234 * by b**i is handled by offseting which columns the results
237 * Note the comba method normally doesn't handle carries in the
238 * inner loop In this case we fix the carry from the previous
239 * column since the Montgomery reduction requires digits of the
240 * result (so far) [see above] to work. This is
241 * handled by fixing up one carry after the inner loop. The
242 * carry fixups are done in order so after these loops the
243 * first m->used words of W[] have the carries fixed
247 register mp_digit
*tmpn
;
248 register mp_word
*_W
;
250 /* alias for the digits of the modulus */
253 /* Alias for the columns set by an offset of ix */
257 for (iy
= 0; iy
< n
->used
; iy
++) {
258 *_W
++ += ((mp_word
)mu
) * ((mp_word
)*tmpn
++);
262 /* now fix carry for next digit, W[ix+1] */
263 W
[ix
+ 1] += W
[ix
] >> ((mp_word
) DIGIT_BIT
);
266 /* now we have to propagate the carries and
267 * shift the words downward [all those least
268 * significant digits we zeroed].
271 register mp_digit
*tmpx
;
272 register mp_word
*_W
, *_W1
;
274 /* nox fix rest of carries */
276 /* alias for current word */
279 /* alias for next word, where the carry goes */
282 for (; ix
<= n
->used
* 2 + 1; ix
++) {
283 *_W
++ += *_W1
++ >> ((mp_word
) DIGIT_BIT
);
286 /* copy out, A = A/b**n
288 * The result is A/b**n but instead of converting from an
289 * array of mp_word to mp_digit than calling mp_rshd
290 * we just copy them in the right order
293 /* alias for destination word */
296 /* alias for shifted double precision result */
299 for (ix
= 0; ix
< n
->used
+ 1; ix
++) {
300 *tmpx
++ = (mp_digit
)(*_W
++ & ((mp_word
) MP_MASK
));
303 /* zero oldused digits, if the input a was larger than
304 * m->used+1 we'll have to clear the digits
306 for (; ix
< olduse
; ix
++) {
311 /* set the max used and clamp */
312 x
->used
= n
->used
+ 1;
315 /* if A >= m then A = A - m */
316 if (mp_cmp_mag (x
, n
) != MP_LT
) {
317 return s_mp_sub (x
, n
, x
);
322 /* Fast (comba) multiplier
324 * This is the fast column-array [comba] multiplier. It is
325 * designed to compute the columns of the product first
326 * then handle the carries afterwards. This has the effect
327 * of making the nested loops that compute the columns very
328 * simple and schedulable on super-scalar processors.
330 * This has been modified to produce a variable number of
331 * digits of output so if say only a half-product is required
332 * you don't have to compute the upper half (a feature
333 * required for fast Barrett reduction).
335 * Based on Algorithm 14.12 on pp.595 of HAC.
339 fast_s_mp_mul_digs (const mp_int
* a
, const mp_int
* b
, mp_int
* c
, int digs
)
341 int olduse
, res
, pa
, ix
, iz
;
342 mp_digit W
[MP_WARRAY
];
345 /* grow the destination as required */
346 if (c
->alloc
< digs
) {
347 if ((res
= mp_grow (c
, digs
)) != MP_OKAY
) {
352 /* number of output digits to produce */
353 pa
= MIN(digs
, a
->used
+ b
->used
);
355 /* clear the carry */
357 for (ix
= 0; ix
<= pa
; ix
++) {
360 mp_digit
*tmpx
, *tmpy
;
362 /* get offsets into the two bignums */
363 ty
= MIN(b
->used
-1, ix
);
366 /* setup temp aliases */
370 /* this is the number of times the loop will iterrate, essentially its
371 while (tx++ < a->used && ty-- >= 0) { ... }
373 iy
= MIN(a
->used
-tx
, ty
+1);
376 for (iz
= 0; iz
< iy
; ++iz
) {
377 _W
+= ((mp_word
)*tmpx
++)*((mp_word
)*tmpy
--);
381 W
[ix
] = ((mp_digit
)_W
) & MP_MASK
;
383 /* make next carry */
384 _W
= _W
>> ((mp_word
)DIGIT_BIT
);
392 register mp_digit
*tmpc
;
394 for (ix
= 0; ix
< digs
; ix
++) {
395 /* now extract the previous digit [below the carry] */
399 /* clear unused digits [that existed in the old copy of c] */
400 for (; ix
< olduse
; ix
++) {
408 /* this is a modified version of fast_s_mul_digs that only produces
409 * output digits *above* digs. See the comments for fast_s_mul_digs
410 * to see how it works.
412 * This is used in the Barrett reduction since for one of the multiplications
413 * only the higher digits were needed. This essentially halves the work.
415 * Based on Algorithm 14.12 on pp.595 of HAC.
418 fast_s_mp_mul_high_digs (const mp_int
* a
, const mp_int
* b
, mp_int
* c
, int digs
)
420 int olduse
, res
, pa
, ix
, iz
;
421 mp_digit W
[MP_WARRAY
];
424 /* grow the destination as required */
425 pa
= a
->used
+ b
->used
;
427 if ((res
= mp_grow (c
, pa
)) != MP_OKAY
) {
432 /* number of output digits to produce */
433 pa
= a
->used
+ b
->used
;
435 for (ix
= digs
; ix
<= pa
; ix
++) {
437 mp_digit
*tmpx
, *tmpy
;
439 /* get offsets into the two bignums */
440 ty
= MIN(b
->used
-1, ix
);
443 /* setup temp aliases */
447 /* this is the number of times the loop will iterrate, essentially its
448 while (tx++ < a->used && ty-- >= 0) { ... }
450 iy
= MIN(a
->used
-tx
, ty
+1);
453 for (iz
= 0; iz
< iy
; iz
++) {
454 _W
+= ((mp_word
)*tmpx
++)*((mp_word
)*tmpy
--);
458 W
[ix
] = ((mp_digit
)_W
) & MP_MASK
;
460 /* make next carry */
461 _W
= _W
>> ((mp_word
)DIGIT_BIT
);
469 register mp_digit
*tmpc
;
472 for (ix
= digs
; ix
<= pa
; ix
++) {
473 /* now extract the previous digit [below the carry] */
477 /* clear unused digits [that existed in the old copy of c] */
478 for (; ix
< olduse
; ix
++) {
488 * This is the comba method where the columns of the product
489 * are computed first then the carries are computed. This
490 * has the effect of making a very simple inner loop that
491 * is executed the most
493 * W2 represents the outer products and W the inner.
495 * A further optimizations is made because the inner
496 * products are of the form "A * B * 2". The *2 part does
497 * not need to be computed until the end which is good
498 * because 64-bit shifts are slow!
500 * Based on Algorithm 14.16 on pp.597 of HAC.
503 /* the jist of squaring...
505 you do like mult except the offset of the tmpx [one that starts closer to zero]
506 can't equal the offset of tmpy. So basically you set up iy like before then you min it with
507 (ty-tx) so that it never happens. You double all those you add in the inner loop
509 After that loop you do the squares and add them in.
511 Remove W2 and don't memset W
515 int fast_s_mp_sqr (const mp_int
* a
, mp_int
* b
)
517 int olduse
, res
, pa
, ix
, iz
;
518 mp_digit W
[MP_WARRAY
], *tmpx
;
521 /* grow the destination as required */
522 pa
= a
->used
+ a
->used
;
524 if ((res
= mp_grow (b
, pa
)) != MP_OKAY
) {
529 /* number of output digits to produce */
531 for (ix
= 0; ix
<= pa
; ix
++) {
539 /* get offsets into the two bignums */
540 ty
= MIN(a
->used
-1, ix
);
543 /* setup temp aliases */
547 /* this is the number of times the loop will iterrate, essentially its
548 while (tx++ < a->used && ty-- >= 0) { ... }
550 iy
= MIN(a
->used
-tx
, ty
+1);
552 /* now for squaring tx can never equal ty
553 * we halve the distance since they approach at a rate of 2x
554 * and we have to round because odd cases need to be executed
556 iy
= MIN(iy
, (ty
-tx
+1)>>1);
559 for (iz
= 0; iz
< iy
; iz
++) {
560 _W
+= ((mp_word
)*tmpx
++)*((mp_word
)*tmpy
--);
563 /* double the inner product and add carry */
566 /* even columns have the square term in them */
568 _W
+= ((mp_word
)a
->dp
[ix
>>1])*((mp_word
)a
->dp
[ix
>>1]);
574 /* make next carry */
575 W1
= _W
>> ((mp_word
)DIGIT_BIT
);
580 b
->used
= a
->used
+a
->used
;
585 for (ix
= 0; ix
< pa
; ix
++) {
586 *tmpb
++ = W
[ix
] & MP_MASK
;
589 /* clear unused digits [that existed in the old copy of c] */
590 for (; ix
< olduse
; ix
++) {
600 * Simple algorithm which zeroes the int, grows it then just sets one bit
604 mp_2expt (mp_int
* a
, int b
)
608 /* zero a as per default */
611 /* grow a to accommodate the single bit */
612 if ((res
= mp_grow (a
, b
/ DIGIT_BIT
+ 1)) != MP_OKAY
) {
616 /* set the used count of where the bit will go */
617 a
->used
= b
/ DIGIT_BIT
+ 1;
619 /* put the single bit in its place */
620 a
->dp
[b
/ DIGIT_BIT
] = ((mp_digit
)1) << (b
% DIGIT_BIT
);
627 * Simple function copies the input and fixes the sign to positive
630 mp_abs (const mp_int
* a
, mp_int
* b
)
636 if ((res
= mp_copy (a
, b
)) != MP_OKAY
) {
641 /* force the sign of b to positive */
647 /* high level addition (handles signs) */
648 int mp_add (mp_int
* a
, mp_int
* b
, mp_int
* c
)
652 /* get sign of both inputs */
656 /* handle two cases, not four */
658 /* both positive or both negative */
659 /* add their magnitudes, copy the sign */
661 res
= s_mp_add (a
, b
, c
);
663 /* one positive, the other negative */
664 /* subtract the one with the greater magnitude from */
665 /* the one of the lesser magnitude. The result gets */
666 /* the sign of the one with the greater magnitude. */
667 if (mp_cmp_mag (a
, b
) == MP_LT
) {
669 res
= s_mp_sub (b
, a
, c
);
672 res
= s_mp_sub (a
, b
, c
);
679 /* single digit addition */
681 mp_add_d (mp_int
* a
, mp_digit b
, mp_int
* c
)
683 int res
, ix
, oldused
;
684 mp_digit
*tmpa
, *tmpc
, mu
;
686 /* grow c as required */
687 if (c
->alloc
< a
->used
+ 1) {
688 if ((res
= mp_grow(c
, a
->used
+ 1)) != MP_OKAY
) {
693 /* if a is negative and |a| >= b, call c = |a| - b */
694 if (a
->sign
== MP_NEG
&& (a
->used
> 1 || a
->dp
[0] >= b
)) {
695 /* temporarily fix sign of a */
699 res
= mp_sub_d(a
, b
, c
);
702 a
->sign
= c
->sign
= MP_NEG
;
707 /* old number of used digits in c */
710 /* sign always positive */
716 /* destination alias */
719 /* if a is positive */
720 if (a
->sign
== MP_ZPOS
) {
721 /* add digit, after this we're propagating
725 mu
= *tmpc
>> DIGIT_BIT
;
728 /* now handle rest of the digits */
729 for (ix
= 1; ix
< a
->used
; ix
++) {
730 *tmpc
= *tmpa
++ + mu
;
731 mu
= *tmpc
>> DIGIT_BIT
;
734 /* set final carry */
739 c
->used
= a
->used
+ 1;
741 /* a was negative and |a| < b */
744 /* the result is a single digit */
746 *tmpc
++ = b
- a
->dp
[0];
751 /* setup count so the clearing of oldused
752 * can fall through correctly
757 /* now zero to oldused */
758 while (ix
++ < oldused
) {
766 /* trim unused digits
768 * This is used to ensure that leading zero digits are
769 * trimed and the leading "used" digit will be non-zero
770 * Typically very fast. Also fixes the sign if there
771 * are no more leading digits
774 mp_clamp (mp_int
* a
)
776 /* decrease used while the most significant digit is
779 while (a
->used
> 0 && a
->dp
[a
->used
- 1] == 0) {
783 /* reset the sign flag if used == 0 */
789 /* clear one (frees) */
791 mp_clear (mp_int
* a
)
795 /* only do anything if a hasn't been freed previously */
797 /* first zero the digits */
798 for (i
= 0; i
< a
->used
; i
++) {
805 /* reset members to make debugging easier */
807 a
->alloc
= a
->used
= 0;
813 void mp_clear_multi(mp_int
*mp
, ...)
815 mp_int
* next_mp
= mp
;
818 while (next_mp
!= NULL
) {
820 next_mp
= va_arg(args
, mp_int
*);
825 /* compare two ints (signed)*/
827 mp_cmp (const mp_int
* a
, const mp_int
* b
)
829 /* compare based on sign */
830 if (a
->sign
!= b
->sign
) {
831 if (a
->sign
== MP_NEG
) {
839 if (a
->sign
== MP_NEG
) {
840 /* if negative compare opposite direction */
841 return mp_cmp_mag(b
, a
);
843 return mp_cmp_mag(a
, b
);
847 /* compare a digit */
848 int mp_cmp_d(const mp_int
* a
, mp_digit b
)
850 /* compare based on sign */
851 if (a
->sign
== MP_NEG
) {
855 /* compare based on magnitude */
860 /* compare the only digit of a to b */
863 } else if (a
->dp
[0] < b
) {
870 /* compare maginitude of two ints (unsigned) */
871 int mp_cmp_mag (const mp_int
* a
, const mp_int
* b
)
874 mp_digit
*tmpa
, *tmpb
;
876 /* compare based on # of non-zero digits */
877 if (a
->used
> b
->used
) {
881 if (a
->used
< b
->used
) {
886 tmpa
= a
->dp
+ (a
->used
- 1);
889 tmpb
= b
->dp
+ (a
->used
- 1);
891 /* compare based on digits */
892 for (n
= 0; n
< a
->used
; ++n
, --tmpa
, --tmpb
) {
904 static const int lnz
[16] = {
905 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
908 /* Counts the number of lsbs which are zero before the first zero bit */
909 int mp_cnt_lsb(const mp_int
*a
)
915 if (mp_iszero(a
) == 1) {
919 /* scan lower digits until non-zero */
920 for (x
= 0; x
< a
->used
&& a
->dp
[x
] == 0; x
++);
924 /* now scan this digit until a 1 is found */
937 mp_copy (const mp_int
* a
, mp_int
* b
)
941 /* if dst == src do nothing */
947 if (b
->alloc
< a
->used
) {
948 if ((res
= mp_grow (b
, a
->used
)) != MP_OKAY
) {
953 /* zero b and copy the parameters over */
955 register mp_digit
*tmpa
, *tmpb
;
957 /* pointer aliases */
965 /* copy all the digits */
966 for (n
= 0; n
< a
->used
; n
++) {
970 /* clear high digits */
971 for (; n
< b
->used
; n
++) {
976 /* copy used count and sign */
982 /* returns the number of bits in an int */
984 mp_count_bits (const mp_int
* a
)
994 /* get number of digits and add that */
995 r
= (a
->used
- 1) * DIGIT_BIT
;
997 /* take the last digit and count the bits in it */
998 q
= a
->dp
[a
->used
- 1];
999 while (q
> ((mp_digit
) 0)) {
1001 q
>>= ((mp_digit
) 1);
1006 /* integer signed division.
1007 * c*b + d == a [e.g. a/b, c=quotient, d=remainder]
1008 * HAC pp.598 Algorithm 14.20
1010 * Note that the description in HAC is horribly
1011 * incomplete. For example, it doesn't consider
1012 * the case where digits are removed from 'x' in
1013 * the inner loop. It also doesn't consider the
1014 * case that y has fewer than three digits, etc..
1016 * The overall algorithm is as described as
1017 * 14.20 from HAC but fixed to treat these cases.
1019 int mp_div (const mp_int
* a
, const mp_int
* b
, mp_int
* c
, mp_int
* d
)
1021 mp_int q
, x
, y
, t1
, t2
;
1022 int res
, n
, t
, i
, norm
, neg
;
1024 /* is divisor zero ? */
1025 if (mp_iszero (b
) == 1) {
1029 /* if a < b then q=0, r = a */
1030 if (mp_cmp_mag (a
, b
) == MP_LT
) {
1032 res
= mp_copy (a
, d
);
1042 if ((res
= mp_init_size (&q
, a
->used
+ 2)) != MP_OKAY
) {
1045 q
.used
= a
->used
+ 2;
1047 if ((res
= mp_init (&t1
)) != MP_OKAY
) {
1051 if ((res
= mp_init (&t2
)) != MP_OKAY
) {
1055 if ((res
= mp_init_copy (&x
, a
)) != MP_OKAY
) {
1059 if ((res
= mp_init_copy (&y
, b
)) != MP_OKAY
) {
1064 neg
= (a
->sign
== b
->sign
) ? MP_ZPOS
: MP_NEG
;
1065 x
.sign
= y
.sign
= MP_ZPOS
;
1067 /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */
1068 norm
= mp_count_bits(&y
) % DIGIT_BIT
;
1069 if (norm
< (int)(DIGIT_BIT
-1)) {
1070 norm
= (DIGIT_BIT
-1) - norm
;
1071 if ((res
= mp_mul_2d (&x
, norm
, &x
)) != MP_OKAY
) {
1074 if ((res
= mp_mul_2d (&y
, norm
, &y
)) != MP_OKAY
) {
1081 /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */
1085 /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */
1086 if ((res
= mp_lshd (&y
, n
- t
)) != MP_OKAY
) { /* y = y*b**{n-t} */
1090 while (mp_cmp (&x
, &y
) != MP_LT
) {
1092 if ((res
= mp_sub (&x
, &y
, &x
)) != MP_OKAY
) {
1097 /* reset y by shifting it back down */
1098 mp_rshd (&y
, n
- t
);
1100 /* step 3. for i from n down to (t + 1) */
1101 for (i
= n
; i
>= (t
+ 1); i
--) {
1106 /* step 3.1 if xi == yt then set q{i-t-1} to b-1,
1107 * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */
1108 if (x
.dp
[i
] == y
.dp
[t
]) {
1109 q
.dp
[i
- t
- 1] = ((((mp_digit
)1) << DIGIT_BIT
) - 1);
1112 tmp
= ((mp_word
) x
.dp
[i
]) << ((mp_word
) DIGIT_BIT
);
1113 tmp
|= ((mp_word
) x
.dp
[i
- 1]);
1114 tmp
/= ((mp_word
) y
.dp
[t
]);
1115 if (tmp
> (mp_word
) MP_MASK
)
1117 q
.dp
[i
- t
- 1] = (mp_digit
) (tmp
& (mp_word
) (MP_MASK
));
1120 /* while (q{i-t-1} * (yt * b + y{t-1})) >
1121 xi * b**2 + xi-1 * b + xi-2
1125 q
.dp
[i
- t
- 1] = (q
.dp
[i
- t
- 1] + 1) & MP_MASK
;
1127 q
.dp
[i
- t
- 1] = (q
.dp
[i
- t
- 1] - 1) & MP_MASK
;
1129 /* find left hand */
1131 t1
.dp
[0] = (t
- 1 < 0) ? 0 : y
.dp
[t
- 1];
1134 if ((res
= mp_mul_d (&t1
, q
.dp
[i
- t
- 1], &t1
)) != MP_OKAY
) {
1138 /* find right hand */
1139 t2
.dp
[0] = (i
- 2 < 0) ? 0 : x
.dp
[i
- 2];
1140 t2
.dp
[1] = (i
- 1 < 0) ? 0 : x
.dp
[i
- 1];
1143 } while (mp_cmp_mag(&t1
, &t2
) == MP_GT
);
1145 /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */
1146 if ((res
= mp_mul_d (&y
, q
.dp
[i
- t
- 1], &t1
)) != MP_OKAY
) {
1150 if ((res
= mp_lshd (&t1
, i
- t
- 1)) != MP_OKAY
) {
1154 if ((res
= mp_sub (&x
, &t1
, &x
)) != MP_OKAY
) {
1158 /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */
1159 if (x
.sign
== MP_NEG
) {
1160 if ((res
= mp_copy (&y
, &t1
)) != MP_OKAY
) {
1163 if ((res
= mp_lshd (&t1
, i
- t
- 1)) != MP_OKAY
) {
1166 if ((res
= mp_add (&x
, &t1
, &x
)) != MP_OKAY
) {
1170 q
.dp
[i
- t
- 1] = (q
.dp
[i
- t
- 1] - 1UL) & MP_MASK
;
1174 /* now q is the quotient and x is the remainder
1175 * [which we have to normalize]
1178 /* get sign before writing to c */
1179 x
.sign
= x
.used
== 0 ? MP_ZPOS
: a
->sign
;
1188 mp_div_2d (&x
, norm
, &x
, NULL
);
1196 __T2
:mp_clear (&t2
);
1197 __T1
:mp_clear (&t1
);
1203 int mp_div_2(const mp_int
* a
, mp_int
* b
)
1205 int x
, res
, oldused
;
1208 if (b
->alloc
< a
->used
) {
1209 if ((res
= mp_grow (b
, a
->used
)) != MP_OKAY
) {
1217 register mp_digit r
, rr
, *tmpa
, *tmpb
;
1220 tmpa
= a
->dp
+ b
->used
- 1;
1223 tmpb
= b
->dp
+ b
->used
- 1;
1227 for (x
= b
->used
- 1; x
>= 0; x
--) {
1228 /* get the carry for the next iteration */
1231 /* shift the current digit, add in carry and store */
1232 *tmpb
-- = (*tmpa
-- >> 1) | (r
<< (DIGIT_BIT
- 1));
1234 /* forward carry to next iteration */
1238 /* zero excess digits */
1239 tmpb
= b
->dp
+ b
->used
;
1240 for (x
= b
->used
; x
< oldused
; x
++) {
1249 /* shift right by a certain bit count (store quotient in c, optional remainder in d) */
1250 int mp_div_2d (const mp_int
* a
, int b
, mp_int
* c
, mp_int
* d
)
1257 /* if the shift count is <= 0 then we do no work */
1259 res
= mp_copy (a
, c
);
1266 if ((res
= mp_init (&t
)) != MP_OKAY
) {
1270 /* get the remainder */
1272 if ((res
= mp_mod_2d (a
, b
, &t
)) != MP_OKAY
) {
1279 if ((res
= mp_copy (a
, c
)) != MP_OKAY
) {
1284 /* shift by as many digits in the bit count */
1285 if (b
>= (int)DIGIT_BIT
) {
1286 mp_rshd (c
, b
/ DIGIT_BIT
);
1289 /* shift any bit count < DIGIT_BIT */
1290 D
= (mp_digit
) (b
% DIGIT_BIT
);
1292 register mp_digit
*tmpc
, mask
, shift
;
1295 mask
= (((mp_digit
)1) << D
) - 1;
1298 shift
= DIGIT_BIT
- D
;
1301 tmpc
= c
->dp
+ (c
->used
- 1);
1305 for (x
= c
->used
- 1; x
>= 0; x
--) {
1306 /* get the lower bits of this word in a temp */
1309 /* shift the current word and mix in the carry bits from the previous word */
1310 *tmpc
= (*tmpc
>> D
) | (r
<< shift
);
1313 /* set the carry to the carry bits of the current word found above */
1325 static int s_is_power_of_two(mp_digit b
, int *p
)
1329 for (x
= 1; x
< DIGIT_BIT
; x
++) {
1330 if (b
== (((mp_digit
)1)<<x
)) {
1338 /* single digit division (based on routine from MPI) */
1339 int mp_div_d (const mp_int
* a
, mp_digit b
, mp_int
* c
, mp_digit
* d
)
1346 /* cannot divide by zero */
1352 if (b
== 1 || mp_iszero(a
) == 1) {
1357 return mp_copy(a
, c
);
1362 /* power of two ? */
1363 if (s_is_power_of_two(b
, &ix
) == 1) {
1365 *d
= a
->dp
[0] & ((((mp_digit
)1)<<ix
) - 1);
1368 return mp_div_2d(a
, ix
, c
, NULL
);
1373 /* no easy answer [c'est la vie]. Just division */
1374 if ((res
= mp_init_size(&q
, a
->used
)) != MP_OKAY
) {
1381 for (ix
= a
->used
- 1; ix
>= 0; ix
--) {
1382 w
= (w
<< ((mp_word
)DIGIT_BIT
)) | ((mp_word
)a
->dp
[ix
]);
1385 t
= (mp_digit
)(w
/ b
);
1386 w
-= ((mp_word
)t
) * ((mp_word
)b
);
1390 q
.dp
[ix
] = (mp_digit
)t
;
1406 /* reduce "x" in place modulo "n" using the Diminished Radix algorithm.
1408 * Based on algorithm from the paper
1410 * "Generating Efficient Primes for Discrete Log Cryptosystems"
1411 * Chae Hoon Lim, Pil Loong Lee,
1412 * POSTECH Information Research Laboratories
1414 * The modulus must be of a special format [see manual]
1416 * Has been modified to use algorithm 7.10 from the LTM book instead
1418 * Input x must be in the range 0 <= x <= (n-1)**2
1421 mp_dr_reduce (mp_int
* x
, const mp_int
* n
, mp_digit k
)
1425 mp_digit mu
, *tmpx1
, *tmpx2
;
1427 /* m = digits in modulus */
1430 /* ensure that "x" has at least 2m digits */
1431 if (x
->alloc
< m
+ m
) {
1432 if ((err
= mp_grow (x
, m
+ m
)) != MP_OKAY
) {
1437 /* top of loop, this is where the code resumes if
1438 * another reduction pass is required.
1441 /* aliases for digits */
1442 /* alias for lower half of x */
1445 /* alias for upper half of x, or x/B**m */
1448 /* set carry to zero */
1451 /* compute (x mod B**m) + k * [x/B**m] inline and inplace */
1452 for (i
= 0; i
< m
; i
++) {
1453 r
= ((mp_word
)*tmpx2
++) * ((mp_word
)k
) + *tmpx1
+ mu
;
1454 *tmpx1
++ = (mp_digit
)(r
& MP_MASK
);
1455 mu
= (mp_digit
)(r
>> ((mp_word
)DIGIT_BIT
));
1458 /* set final carry */
1461 /* zero words above m */
1462 for (i
= m
+ 1; i
< x
->used
; i
++) {
1466 /* clamp, sub and return */
1469 /* if x >= n then subtract and reduce again
1470 * Each successive "recursion" makes the input smaller and smaller.
1472 if (mp_cmp_mag (x
, n
) != MP_LT
) {
1479 /* determines the setup value */
1480 void mp_dr_setup(const mp_int
*a
, mp_digit
*d
)
1482 /* the casts are required if DIGIT_BIT is one less than
1483 * the number of bits in a mp_digit [e.g. DIGIT_BIT==31]
1485 *d
= (mp_digit
)((((mp_word
)1) << ((mp_word
)DIGIT_BIT
)) -
1486 ((mp_word
)a
->dp
[0]));
1489 /* swap the elements of two integers, for cases where you can't simply swap the
1490 * mp_int pointers around
1493 mp_exch (mp_int
* a
, mp_int
* b
)
1502 /* this is a shell function that calls either the normal or Montgomery
1503 * exptmod functions. Originally the call to the montgomery code was
1504 * embedded in the normal function but that wasted a lot of stack space
1505 * for nothing (since 99% of the time the Montgomery code would be called)
1507 int mp_exptmod (const mp_int
* G
, const mp_int
* X
, mp_int
* P
, mp_int
* Y
)
1511 /* modulus P must be positive */
1512 if (P
->sign
== MP_NEG
) {
1516 /* if exponent X is negative we have to recurse */
1517 if (X
->sign
== MP_NEG
) {
1521 /* first compute 1/G mod P */
1522 if ((err
= mp_init(&tmpG
)) != MP_OKAY
) {
1525 if ((err
= mp_invmod(G
, P
, &tmpG
)) != MP_OKAY
) {
1531 if ((err
= mp_init(&tmpX
)) != MP_OKAY
) {
1535 if ((err
= mp_abs(X
, &tmpX
)) != MP_OKAY
) {
1536 mp_clear_multi(&tmpG
, &tmpX
, NULL
);
1540 /* and now compute (1/G)**|X| instead of G**X [X < 0] */
1541 err
= mp_exptmod(&tmpG
, &tmpX
, P
, Y
);
1542 mp_clear_multi(&tmpG
, &tmpX
, NULL
);
1548 /* if the modulus is odd or dr != 0 use the fast method */
1549 if (mp_isodd (P
) == 1 || dr
!= 0) {
1550 return mp_exptmod_fast (G
, X
, P
, Y
, dr
);
1552 /* otherwise use the generic Barrett reduction technique */
1553 return s_mp_exptmod (G
, X
, P
, Y
);
1557 /* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
1559 * Uses a left-to-right k-ary sliding window to compute the modular exponentiation.
1560 * The value of k changes based on the size of the exponent.
1562 * Uses Montgomery or Diminished Radix reduction [whichever appropriate]
1566 mp_exptmod_fast (const mp_int
* G
, const mp_int
* X
, mp_int
* P
, mp_int
* Y
, int redmode
)
1570 int err
, bitbuf
, bitcpy
, bitcnt
, mode
, digidx
, x
, y
, winsize
;
1572 /* use a pointer to the reduction algorithm. This allows us to use
1573 * one of many reduction algorithms without modding the guts of
1574 * the code with if statements everywhere.
1576 int (*redux
)(mp_int
*,const mp_int
*,mp_digit
);
1578 /* find window size */
1579 x
= mp_count_bits (X
);
1582 } else if (x
<= 36) {
1584 } else if (x
<= 140) {
1586 } else if (x
<= 450) {
1588 } else if (x
<= 1303) {
1590 } else if (x
<= 3529) {
1597 /* init first cell */
1598 if ((err
= mp_init(&M
[1])) != MP_OKAY
) {
1602 /* now init the second half of the array */
1603 for (x
= 1<<(winsize
-1); x
< (1 << winsize
); x
++) {
1604 if ((err
= mp_init(&M
[x
])) != MP_OKAY
) {
1605 for (y
= 1<<(winsize
-1); y
< x
; y
++) {
1613 /* determine and setup reduction code */
1615 /* now setup montgomery */
1616 if ((err
= mp_montgomery_setup (P
, &mp
)) != MP_OKAY
) {
1620 /* automatically pick the comba one if available (saves quite a few calls/ifs) */
1621 if (((P
->used
* 2 + 1) < MP_WARRAY
) &&
1622 P
->used
< (1 << ((CHAR_BIT
* sizeof (mp_word
)) - (2 * DIGIT_BIT
)))) {
1623 redux
= fast_mp_montgomery_reduce
;
1625 /* use slower baseline Montgomery method */
1626 redux
= mp_montgomery_reduce
;
1628 } else if (redmode
== 1) {
1629 /* setup DR reduction for moduli of the form B**k - b */
1630 mp_dr_setup(P
, &mp
);
1631 redux
= mp_dr_reduce
;
1633 /* setup DR reduction for moduli of the form 2**k - b */
1634 if ((err
= mp_reduce_2k_setup(P
, &mp
)) != MP_OKAY
) {
1637 redux
= mp_reduce_2k
;
1641 if ((err
= mp_init (&res
)) != MP_OKAY
) {
1649 * The first half of the table is not computed though accept for M[0] and M[1]
1653 /* now we need R mod m */
1654 if ((err
= mp_montgomery_calc_normalization (&res
, P
)) != MP_OKAY
) {
1658 /* now set M[1] to G * R mod m */
1659 if ((err
= mp_mulmod (G
, &res
, P
, &M
[1])) != MP_OKAY
) {
1664 if ((err
= mp_mod(G
, P
, &M
[1])) != MP_OKAY
) {
1669 /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
1670 if ((err
= mp_copy (&M
[1], &M
[1 << (winsize
- 1)])) != MP_OKAY
) {
1674 for (x
= 0; x
< (winsize
- 1); x
++) {
1675 if ((err
= mp_sqr (&M
[1 << (winsize
- 1)], &M
[1 << (winsize
- 1)])) != MP_OKAY
) {
1678 if ((err
= redux (&M
[1 << (winsize
- 1)], P
, mp
)) != MP_OKAY
) {
1683 /* create upper table */
1684 for (x
= (1 << (winsize
- 1)) + 1; x
< (1 << winsize
); x
++) {
1685 if ((err
= mp_mul (&M
[x
- 1], &M
[1], &M
[x
])) != MP_OKAY
) {
1688 if ((err
= redux (&M
[x
], P
, mp
)) != MP_OKAY
) {
1693 /* set initial mode and bit cnt */
1697 digidx
= X
->used
- 1;
1702 /* grab next digit as required */
1703 if (--bitcnt
== 0) {
1704 /* if digidx == -1 we are out of digits so break */
1708 /* read next digit and reset bitcnt */
1709 buf
= X
->dp
[digidx
--];
1710 bitcnt
= (int)DIGIT_BIT
;
1713 /* grab the next msb from the exponent */
1714 y
= (mp_digit
)(buf
>> (DIGIT_BIT
- 1)) & 1;
1715 buf
<<= (mp_digit
)1;
1717 /* if the bit is zero and mode == 0 then we ignore it
1718 * These represent the leading zero bits before the first 1 bit
1719 * in the exponent. Technically this opt is not required but it
1720 * does lower the # of trivial squaring/reductions used
1722 if (mode
== 0 && y
== 0) {
1726 /* if the bit is zero and mode == 1 then we square */
1727 if (mode
== 1 && y
== 0) {
1728 if ((err
= mp_sqr (&res
, &res
)) != MP_OKAY
) {
1731 if ((err
= redux (&res
, P
, mp
)) != MP_OKAY
) {
1737 /* else we add it to the window */
1738 bitbuf
|= (y
<< (winsize
- ++bitcpy
));
1741 if (bitcpy
== winsize
) {
1742 /* ok window is filled so square as required and multiply */
1744 for (x
= 0; x
< winsize
; x
++) {
1745 if ((err
= mp_sqr (&res
, &res
)) != MP_OKAY
) {
1748 if ((err
= redux (&res
, P
, mp
)) != MP_OKAY
) {
1754 if ((err
= mp_mul (&res
, &M
[bitbuf
], &res
)) != MP_OKAY
) {
1757 if ((err
= redux (&res
, P
, mp
)) != MP_OKAY
) {
1761 /* empty window and reset */
1768 /* if bits remain then square/multiply */
1769 if (mode
== 2 && bitcpy
> 0) {
1770 /* square then multiply if the bit is set */
1771 for (x
= 0; x
< bitcpy
; x
++) {
1772 if ((err
= mp_sqr (&res
, &res
)) != MP_OKAY
) {
1775 if ((err
= redux (&res
, P
, mp
)) != MP_OKAY
) {
1779 /* get next bit of the window */
1781 if ((bitbuf
& (1 << winsize
)) != 0) {
1783 if ((err
= mp_mul (&res
, &M
[1], &res
)) != MP_OKAY
) {
1786 if ((err
= redux (&res
, P
, mp
)) != MP_OKAY
) {
1794 /* fixup result if Montgomery reduction is used
1795 * recall that any value in a Montgomery system is
1796 * actually multiplied by R mod n. So we have
1797 * to reduce one more time to cancel out the factor
1800 if ((err
= redux(&res
, P
, mp
)) != MP_OKAY
) {
1805 /* swap res with Y */
1808 __RES
:mp_clear (&res
);
1811 for (x
= 1<<(winsize
-1); x
< (1 << winsize
); x
++) {
1817 /* Greatest Common Divisor using the binary method */
1818 int mp_gcd (const mp_int
* a
, const mp_int
* b
, mp_int
* c
)
1821 int k
, u_lsb
, v_lsb
, res
;
1823 /* either zero than gcd is the largest */
1824 if (mp_iszero (a
) == 1 && mp_iszero (b
) == 0) {
1825 return mp_abs (b
, c
);
1827 if (mp_iszero (a
) == 0 && mp_iszero (b
) == 1) {
1828 return mp_abs (a
, c
);
1831 /* optimized. At this point if a == 0 then
1832 * b must equal zero too
1834 if (mp_iszero (a
) == 1) {
1839 /* get copies of a and b we can modify */
1840 if ((res
= mp_init_copy (&u
, a
)) != MP_OKAY
) {
1844 if ((res
= mp_init_copy (&v
, b
)) != MP_OKAY
) {
1848 /* must be positive for the remainder of the algorithm */
1849 u
.sign
= v
.sign
= MP_ZPOS
;
1851 /* B1. Find the common power of two for u and v */
1852 u_lsb
= mp_cnt_lsb(&u
);
1853 v_lsb
= mp_cnt_lsb(&v
);
1854 k
= MIN(u_lsb
, v_lsb
);
1857 /* divide the power of two out */
1858 if ((res
= mp_div_2d(&u
, k
, &u
, NULL
)) != MP_OKAY
) {
1862 if ((res
= mp_div_2d(&v
, k
, &v
, NULL
)) != MP_OKAY
) {
1867 /* divide any remaining factors of two out */
1869 if ((res
= mp_div_2d(&u
, u_lsb
- k
, &u
, NULL
)) != MP_OKAY
) {
1875 if ((res
= mp_div_2d(&v
, v_lsb
- k
, &v
, NULL
)) != MP_OKAY
) {
1880 while (mp_iszero(&v
) == 0) {
1881 /* make sure v is the largest */
1882 if (mp_cmp_mag(&u
, &v
) == MP_GT
) {
1883 /* swap u and v to make sure v is >= u */
1887 /* subtract smallest from largest */
1888 if ((res
= s_mp_sub(&v
, &u
, &v
)) != MP_OKAY
) {
1892 /* Divide out all factors of two */
1893 if ((res
= mp_div_2d(&v
, mp_cnt_lsb(&v
), &v
, NULL
)) != MP_OKAY
) {
1898 /* multiply by 2**k which we divided out at the beginning */
1899 if ((res
= mp_mul_2d (&u
, k
, c
)) != MP_OKAY
) {
1909 /* get the lower 32-bits of an mp_int */
1910 unsigned long mp_get_int(const mp_int
* a
)
1919 /* get number of digits of the lsb we have to read */
1920 i
= MIN(a
->used
,(int)((sizeof(unsigned long)*CHAR_BIT
+DIGIT_BIT
-1)/DIGIT_BIT
))-1;
1922 /* get most significant digit of result */
1926 res
= (res
<< DIGIT_BIT
) | DIGIT(a
,i
);
1929 /* force result to 32-bits always so it is consistent on non 32-bit platforms */
1930 return res
& 0xFFFFFFFFUL
;
1933 /* grow as required */
1934 int mp_grow (mp_int
* a
, int size
)
1939 /* if the alloc size is smaller alloc more ram */
1940 if (a
->alloc
< size
) {
1941 /* ensure there are always at least MP_PREC digits extra on top */
1942 size
+= (MP_PREC
* 2) - (size
% MP_PREC
);
1944 /* reallocate the array a->dp
1946 * We store the return in a temporary variable
1947 * in case the operation failed we don't want
1948 * to overwrite the dp member of a.
1950 tmp
= realloc (a
->dp
, sizeof (mp_digit
) * size
);
1952 /* reallocation failed but "a" is still valid [can be freed] */
1956 /* reallocation succeeded so set a->dp */
1959 /* zero excess digits */
1962 for (; i
< a
->alloc
; i
++) {
1969 /* init a new mp_int */
1970 int mp_init (mp_int
* a
)
1974 /* allocate memory required and clear it */
1975 a
->dp
= malloc (sizeof (mp_digit
) * MP_PREC
);
1976 if (a
->dp
== NULL
) {
1980 /* set the digits to zero */
1981 for (i
= 0; i
< MP_PREC
; i
++) {
1985 /* set the used to zero, allocated digits to the default precision
1986 * and sign to positive */
1994 /* creates "a" then copies b into it */
1995 int mp_init_copy (mp_int
* a
, const mp_int
* b
)
1999 if ((res
= mp_init (a
)) != MP_OKAY
) {
2002 return mp_copy (b
, a
);
2005 int mp_init_multi(mp_int
*mp
, ...)
2007 mp_err res
= MP_OKAY
; /* Assume ok until proven otherwise */
2008 int n
= 0; /* Number of ok inits */
2009 mp_int
* cur_arg
= mp
;
2012 va_start(args
, mp
); /* init args to next argument from caller */
2013 while (cur_arg
!= NULL
) {
2014 if (mp_init(cur_arg
) != MP_OKAY
) {
2015 /* Oops - error! Back-track and mp_clear what we already
2016 succeeded in init-ing, then return error.
2020 /* end the current list */
2023 /* now start cleaning up */
2025 va_start(clean_args
, mp
);
2028 cur_arg
= va_arg(clean_args
, mp_int
*);
2035 cur_arg
= va_arg(args
, mp_int
*);
2038 return res
; /* Assumed ok, if error flagged above. */
2041 /* init an mp_init for a given size */
2042 int mp_init_size (mp_int
* a
, int size
)
2046 /* pad size so there are always extra digits */
2047 size
+= (MP_PREC
* 2) - (size
% MP_PREC
);
2050 a
->dp
= malloc (sizeof (mp_digit
) * size
);
2051 if (a
->dp
== NULL
) {
2055 /* set the members */
2060 /* zero the digits */
2061 for (x
= 0; x
< size
; x
++) {
2068 /* hac 14.61, pp608 */
2069 int mp_invmod (const mp_int
* a
, mp_int
* b
, mp_int
* c
)
2071 /* b cannot be negative */
2072 if (b
->sign
== MP_NEG
|| mp_iszero(b
) == 1) {
2076 /* if the modulus is odd we can use a faster routine instead */
2077 if (mp_isodd (b
) == 1) {
2078 return fast_mp_invmod (a
, b
, c
);
2081 return mp_invmod_slow(a
, b
, c
);
2084 /* hac 14.61, pp608 */
2085 int mp_invmod_slow (const mp_int
* a
, mp_int
* b
, mp_int
* c
)
2087 mp_int x
, y
, u
, v
, A
, B
, C
, D
;
2090 /* b cannot be negative */
2091 if (b
->sign
== MP_NEG
|| mp_iszero(b
) == 1) {
2096 if ((res
= mp_init_multi(&x
, &y
, &u
, &v
,
2097 &A
, &B
, &C
, &D
, NULL
)) != MP_OKAY
) {
2102 if ((res
= mp_copy (a
, &x
)) != MP_OKAY
) {
2105 if ((res
= mp_copy (b
, &y
)) != MP_OKAY
) {
2109 /* 2. [modified] if x,y are both even then return an error! */
2110 if (mp_iseven (&x
) == 1 && mp_iseven (&y
) == 1) {
2115 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
2116 if ((res
= mp_copy (&x
, &u
)) != MP_OKAY
) {
2119 if ((res
= mp_copy (&y
, &v
)) != MP_OKAY
) {
2126 /* 4. while u is even do */
2127 while (mp_iseven (&u
) == 1) {
2129 if ((res
= mp_div_2 (&u
, &u
)) != MP_OKAY
) {
2132 /* 4.2 if A or B is odd then */
2133 if (mp_isodd (&A
) == 1 || mp_isodd (&B
) == 1) {
2134 /* A = (A+y)/2, B = (B-x)/2 */
2135 if ((res
= mp_add (&A
, &y
, &A
)) != MP_OKAY
) {
2138 if ((res
= mp_sub (&B
, &x
, &B
)) != MP_OKAY
) {
2142 /* A = A/2, B = B/2 */
2143 if ((res
= mp_div_2 (&A
, &A
)) != MP_OKAY
) {
2146 if ((res
= mp_div_2 (&B
, &B
)) != MP_OKAY
) {
2151 /* 5. while v is even do */
2152 while (mp_iseven (&v
) == 1) {
2154 if ((res
= mp_div_2 (&v
, &v
)) != MP_OKAY
) {
2157 /* 5.2 if C or D is odd then */
2158 if (mp_isodd (&C
) == 1 || mp_isodd (&D
) == 1) {
2159 /* C = (C+y)/2, D = (D-x)/2 */
2160 if ((res
= mp_add (&C
, &y
, &C
)) != MP_OKAY
) {
2163 if ((res
= mp_sub (&D
, &x
, &D
)) != MP_OKAY
) {
2167 /* C = C/2, D = D/2 */
2168 if ((res
= mp_div_2 (&C
, &C
)) != MP_OKAY
) {
2171 if ((res
= mp_div_2 (&D
, &D
)) != MP_OKAY
) {
2176 /* 6. if u >= v then */
2177 if (mp_cmp (&u
, &v
) != MP_LT
) {
2178 /* u = u - v, A = A - C, B = B - D */
2179 if ((res
= mp_sub (&u
, &v
, &u
)) != MP_OKAY
) {
2183 if ((res
= mp_sub (&A
, &C
, &A
)) != MP_OKAY
) {
2187 if ((res
= mp_sub (&B
, &D
, &B
)) != MP_OKAY
) {
2191 /* v - v - u, C = C - A, D = D - B */
2192 if ((res
= mp_sub (&v
, &u
, &v
)) != MP_OKAY
) {
2196 if ((res
= mp_sub (&C
, &A
, &C
)) != MP_OKAY
) {
2200 if ((res
= mp_sub (&D
, &B
, &D
)) != MP_OKAY
) {
2205 /* if not zero goto step 4 */
2206 if (mp_iszero (&u
) == 0)
2209 /* now a = C, b = D, gcd == g*v */
2211 /* if v != 1 then there is no inverse */
2212 if (mp_cmp_d (&v
, 1) != MP_EQ
) {
2217 /* if its too low */
2218 while (mp_cmp_d(&C
, 0) == MP_LT
) {
2219 if ((res
= mp_add(&C
, b
, &C
)) != MP_OKAY
) {
2225 while (mp_cmp_mag(&C
, b
) != MP_LT
) {
2226 if ((res
= mp_sub(&C
, b
, &C
)) != MP_OKAY
) {
2231 /* C is now the inverse */
2234 __ERR
:mp_clear_multi (&x
, &y
, &u
, &v
, &A
, &B
, &C
, &D
, NULL
);
2238 /* c = |a| * |b| using Karatsuba Multiplication using
2239 * three half size multiplications
2241 * Let B represent the radix [e.g. 2**DIGIT_BIT] and
2242 * let n represent half of the number of digits in
2245 * a = a1 * B**n + a0
2246 * b = b1 * B**n + b0
2249 a1b1 * B**2n + ((a1 - a0)(b1 - b0) + a0b0 + a1b1) * B + a0b0
2251 * Note that a1b1 and a0b0 are used twice and only need to be
2252 * computed once. So in total three half size (half # of
2253 * digit) multiplications are performed, a0b0, a1b1 and
2256 * Note that a multiplication of half the digits requires
2257 * 1/4th the number of single precision multiplications so in
2258 * total after one call 25% of the single precision multiplications
2259 * are saved. Note also that the call to mp_mul can end up back
2260 * in this function if the a0, a1, b0, or b1 are above the threshold.
2261 * This is known as divide-and-conquer and leads to the famous
2262 * O(N**lg(3)) or O(N**1.584) work which is asymptopically lower than
2263 * the standard O(N**2) that the baseline/comba methods use.
2264 * Generally though the overhead of this method doesn't pay off
2265 * until a certain size (N ~ 80) is reached.
2267 int mp_karatsuba_mul (const mp_int
* a
, const mp_int
* b
, mp_int
* c
)
2269 mp_int x0
, x1
, y0
, y1
, t1
, x0y0
, x1y1
;
2272 /* default the return code to an error */
2275 /* min # of digits */
2276 B
= MIN (a
->used
, b
->used
);
2278 /* now divide in two */
2281 /* init copy all the temps */
2282 if (mp_init_size (&x0
, B
) != MP_OKAY
)
2284 if (mp_init_size (&x1
, a
->used
- B
) != MP_OKAY
)
2286 if (mp_init_size (&y0
, B
) != MP_OKAY
)
2288 if (mp_init_size (&y1
, b
->used
- B
) != MP_OKAY
)
2292 if (mp_init_size (&t1
, B
* 2) != MP_OKAY
)
2294 if (mp_init_size (&x0y0
, B
* 2) != MP_OKAY
)
2296 if (mp_init_size (&x1y1
, B
* 2) != MP_OKAY
)
2299 /* now shift the digits */
2300 x0
.used
= y0
.used
= B
;
2301 x1
.used
= a
->used
- B
;
2302 y1
.used
= b
->used
- B
;
2306 register mp_digit
*tmpa
, *tmpb
, *tmpx
, *tmpy
;
2308 /* we copy the digits directly instead of using higher level functions
2309 * since we also need to shift the digits
2316 for (x
= 0; x
< B
; x
++) {
2322 for (x
= B
; x
< a
->used
; x
++) {
2327 for (x
= B
; x
< b
->used
; x
++) {
2332 /* only need to clamp the lower words since by definition the
2333 * upper words x1/y1 must have a known number of digits
2338 /* now calc the products x0y0 and x1y1 */
2339 /* after this x0 is no longer required, free temp [x0==t2]! */
2340 if (mp_mul (&x0
, &y0
, &x0y0
) != MP_OKAY
)
2341 goto X1Y1
; /* x0y0 = x0*y0 */
2342 if (mp_mul (&x1
, &y1
, &x1y1
) != MP_OKAY
)
2343 goto X1Y1
; /* x1y1 = x1*y1 */
2345 /* now calc x1-x0 and y1-y0 */
2346 if (mp_sub (&x1
, &x0
, &t1
) != MP_OKAY
)
2347 goto X1Y1
; /* t1 = x1 - x0 */
2348 if (mp_sub (&y1
, &y0
, &x0
) != MP_OKAY
)
2349 goto X1Y1
; /* t2 = y1 - y0 */
2350 if (mp_mul (&t1
, &x0
, &t1
) != MP_OKAY
)
2351 goto X1Y1
; /* t1 = (x1 - x0) * (y1 - y0) */
2354 if (mp_add (&x0y0
, &x1y1
, &x0
) != MP_OKAY
)
2355 goto X1Y1
; /* t2 = x0y0 + x1y1 */
2356 if (mp_sub (&x0
, &t1
, &t1
) != MP_OKAY
)
2357 goto X1Y1
; /* t1 = x0y0 + x1y1 - (x1-x0)*(y1-y0) */
2360 if (mp_lshd (&t1
, B
) != MP_OKAY
)
2361 goto X1Y1
; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */
2362 if (mp_lshd (&x1y1
, B
* 2) != MP_OKAY
)
2363 goto X1Y1
; /* x1y1 = x1y1 << 2*B */
2365 if (mp_add (&x0y0
, &t1
, &t1
) != MP_OKAY
)
2366 goto X1Y1
; /* t1 = x0y0 + t1 */
2367 if (mp_add (&t1
, &x1y1
, c
) != MP_OKAY
)
2368 goto X1Y1
; /* t1 = x0y0 + t1 + x1y1 */
2370 /* Algorithm succeeded set the return code to MP_OKAY */
2373 X1Y1
:mp_clear (&x1y1
);
2374 X0Y0
:mp_clear (&x0y0
);
2384 /* Karatsuba squaring, computes b = a*a using three
2385 * half size squarings
2387 * See comments of karatsuba_mul for details. It
2388 * is essentially the same algorithm but merely
2389 * tuned to perform recursive squarings.
2391 int mp_karatsuba_sqr (const mp_int
* a
, mp_int
* b
)
2393 mp_int x0
, x1
, t1
, t2
, x0x0
, x1x1
;
2398 /* min # of digits */
2401 /* now divide in two */
2404 /* init copy all the temps */
2405 if (mp_init_size (&x0
, B
) != MP_OKAY
)
2407 if (mp_init_size (&x1
, a
->used
- B
) != MP_OKAY
)
2411 if (mp_init_size (&t1
, a
->used
* 2) != MP_OKAY
)
2413 if (mp_init_size (&t2
, a
->used
* 2) != MP_OKAY
)
2415 if (mp_init_size (&x0x0
, B
* 2) != MP_OKAY
)
2417 if (mp_init_size (&x1x1
, (a
->used
- B
) * 2) != MP_OKAY
)
2422 register mp_digit
*dst
, *src
;
2426 /* now shift the digits */
2428 for (x
= 0; x
< B
; x
++) {
2433 for (x
= B
; x
< a
->used
; x
++) {
2439 x1
.used
= a
->used
- B
;
2443 /* now calc the products x0*x0 and x1*x1 */
2444 if (mp_sqr (&x0
, &x0x0
) != MP_OKAY
)
2445 goto X1X1
; /* x0x0 = x0*x0 */
2446 if (mp_sqr (&x1
, &x1x1
) != MP_OKAY
)
2447 goto X1X1
; /* x1x1 = x1*x1 */
2449 /* now calc (x1-x0)**2 */
2450 if (mp_sub (&x1
, &x0
, &t1
) != MP_OKAY
)
2451 goto X1X1
; /* t1 = x1 - x0 */
2452 if (mp_sqr (&t1
, &t1
) != MP_OKAY
)
2453 goto X1X1
; /* t1 = (x1 - x0) * (x1 - x0) */
2456 if (s_mp_add (&x0x0
, &x1x1
, &t2
) != MP_OKAY
)
2457 goto X1X1
; /* t2 = x0x0 + x1x1 */
2458 if (mp_sub (&t2
, &t1
, &t1
) != MP_OKAY
)
2459 goto X1X1
; /* t1 = x0x0 + x1x1 - (x1-x0)*(x1-x0) */
2462 if (mp_lshd (&t1
, B
) != MP_OKAY
)
2463 goto X1X1
; /* t1 = (x0x0 + x1x1 - (x1-x0)*(x1-x0))<<B */
2464 if (mp_lshd (&x1x1
, B
* 2) != MP_OKAY
)
2465 goto X1X1
; /* x1x1 = x1x1 << 2*B */
2467 if (mp_add (&x0x0
, &t1
, &t1
) != MP_OKAY
)
2468 goto X1X1
; /* t1 = x0x0 + t1 */
2469 if (mp_add (&t1
, &x1x1
, b
) != MP_OKAY
)
2470 goto X1X1
; /* t1 = x0x0 + t1 + x1x1 */
2474 X1X1
:mp_clear (&x1x1
);
2475 X0X0
:mp_clear (&x0x0
);
2484 /* computes least common multiple as |a*b|/(a, b) */
2485 int mp_lcm (const mp_int
* a
, const mp_int
* b
, mp_int
* c
)
2491 if ((res
= mp_init_multi (&t1
, &t2
, NULL
)) != MP_OKAY
) {
2495 /* t1 = get the GCD of the two inputs */
2496 if ((res
= mp_gcd (a
, b
, &t1
)) != MP_OKAY
) {
2500 /* divide the smallest by the GCD */
2501 if (mp_cmp_mag(a
, b
) == MP_LT
) {
2502 /* store quotient in t2 such that t2 * b is the LCM */
2503 if ((res
= mp_div(a
, &t1
, &t2
, NULL
)) != MP_OKAY
) {
2506 res
= mp_mul(b
, &t2
, c
);
2508 /* store quotient in t2 such that t2 * a is the LCM */
2509 if ((res
= mp_div(b
, &t1
, &t2
, NULL
)) != MP_OKAY
) {
2512 res
= mp_mul(a
, &t2
, c
);
2515 /* fix the sign to positive */
2519 mp_clear_multi (&t1
, &t2
, NULL
);
2523 /* shift left a certain amount of digits */
2524 int mp_lshd (mp_int
* a
, int b
)
2528 /* if its less than zero return */
2533 /* grow to fit the new digits */
2534 if (a
->alloc
< a
->used
+ b
) {
2535 if ((res
= mp_grow (a
, a
->used
+ b
)) != MP_OKAY
) {
2541 register mp_digit
*top
, *bottom
;
2543 /* increment the used by the shift amount then copy upwards */
2547 top
= a
->dp
+ a
->used
- 1;
2550 bottom
= a
->dp
+ a
->used
- 1 - b
;
2552 /* much like mp_rshd this is implemented using a sliding window
2553 * except the window goes the otherway around. Copying from
2554 * the bottom to the top. see bn_mp_rshd.c for more info.
2556 for (x
= a
->used
- 1; x
>= b
; x
--) {
2560 /* zero the lower digits */
2562 for (x
= 0; x
< b
; x
++) {
2569 /* c = a mod b, 0 <= c < b */
2571 mp_mod (const mp_int
* a
, mp_int
* b
, mp_int
* c
)
2576 if ((res
= mp_init (&t
)) != MP_OKAY
) {
2580 if ((res
= mp_div (a
, b
, NULL
, &t
)) != MP_OKAY
) {
2585 if (t
.sign
!= b
->sign
) {
2586 res
= mp_add (b
, &t
, c
);
2596 /* calc a value mod 2**b */
2598 mp_mod_2d (const mp_int
* a
, int b
, mp_int
* c
)
2602 /* if b is <= 0 then zero the int */
2608 /* if the modulus is larger than the value than return */
2609 if (b
> (int) (a
->used
* DIGIT_BIT
)) {
2610 res
= mp_copy (a
, c
);
2615 if ((res
= mp_copy (a
, c
)) != MP_OKAY
) {
2619 /* zero digits above the last digit of the modulus */
2620 for (x
= (b
/ DIGIT_BIT
) + ((b
% DIGIT_BIT
) == 0 ? 0 : 1); x
< c
->used
; x
++) {
2623 /* clear the digit that is not completely outside/inside the modulus */
2624 c
->dp
[b
/ DIGIT_BIT
] &=
2625 (mp_digit
) ((((mp_digit
) 1) << (((mp_digit
) b
) % DIGIT_BIT
)) - ((mp_digit
) 1));
2631 mp_mod_d (const mp_int
* a
, mp_digit b
, mp_digit
* c
)
2633 return mp_div_d(a
, b
, NULL
, c
);
2637 * shifts with subtractions when the result is greater than b.
2639 * The method is slightly modified to shift B unconditionally up to just under
2640 * the leading bit of b. This saves a lot of multiple precision shifting.
2642 int mp_montgomery_calc_normalization (mp_int
* a
, const mp_int
* b
)
2646 /* how many bits of last digit does b use */
2647 bits
= mp_count_bits (b
) % DIGIT_BIT
;
2651 if ((res
= mp_2expt (a
, (b
->used
- 1) * DIGIT_BIT
+ bits
- 1)) != MP_OKAY
) {
2660 /* now compute C = A * B mod b */
2661 for (x
= bits
- 1; x
< (int)DIGIT_BIT
; x
++) {
2662 if ((res
= mp_mul_2 (a
, a
)) != MP_OKAY
) {
2665 if (mp_cmp_mag (a
, b
) != MP_LT
) {
2666 if ((res
= s_mp_sub (a
, b
, a
)) != MP_OKAY
) {
2675 /* computes xR**-1 == x (mod N) via Montgomery Reduction */
2677 mp_montgomery_reduce (mp_int
* x
, const mp_int
* n
, mp_digit rho
)
2682 /* can the fast reduction [comba] method be used?
2684 * Note that unlike in mul you're safely allowed *less*
2685 * than the available columns [255 per default] since carries
2686 * are fixed up in the inner loop.
2688 digs
= n
->used
* 2 + 1;
2689 if ((digs
< MP_WARRAY
) &&
2691 (1 << ((CHAR_BIT
* sizeof (mp_word
)) - (2 * DIGIT_BIT
)))) {
2692 return fast_mp_montgomery_reduce (x
, n
, rho
);
2695 /* grow the input as required */
2696 if (x
->alloc
< digs
) {
2697 if ((res
= mp_grow (x
, digs
)) != MP_OKAY
) {
2703 for (ix
= 0; ix
< n
->used
; ix
++) {
2704 /* mu = ai * rho mod b
2706 * The value of rho must be precalculated via
2707 * montgomery_setup() such that
2708 * it equals -1/n0 mod b this allows the
2709 * following inner loop to reduce the
2710 * input one digit at a time
2712 mu
= (mp_digit
) (((mp_word
)x
->dp
[ix
]) * ((mp_word
)rho
) & MP_MASK
);
2714 /* a = a + mu * m * b**i */
2717 register mp_digit
*tmpn
, *tmpx
, u
;
2720 /* alias for digits of the modulus */
2723 /* alias for the digits of x [the input] */
2726 /* set the carry to zero */
2729 /* Multiply and add in place */
2730 for (iy
= 0; iy
< n
->used
; iy
++) {
2731 /* compute product and sum */
2732 r
= ((mp_word
)mu
) * ((mp_word
)*tmpn
++) +
2733 ((mp_word
) u
) + ((mp_word
) * tmpx
);
2736 u
= (mp_digit
)(r
>> ((mp_word
) DIGIT_BIT
));
2739 *tmpx
++ = (mp_digit
)(r
& ((mp_word
) MP_MASK
));
2741 /* At this point the ix'th digit of x should be zero */
2744 /* propagate carries upwards as required*/
2747 u
= *tmpx
>> DIGIT_BIT
;
2753 /* at this point the n.used'th least
2754 * significant digits of x are all zero
2755 * which means we can shift x to the
2756 * right by n.used digits and the
2757 * residue is unchanged.
2760 /* x = x/b**n.used */
2762 mp_rshd (x
, n
->used
);
2764 /* if x >= n then x = x - n */
2765 if (mp_cmp_mag (x
, n
) != MP_LT
) {
2766 return s_mp_sub (x
, n
, x
);
2772 /* setups the montgomery reduction stuff */
2774 mp_montgomery_setup (const mp_int
* n
, mp_digit
* rho
)
2778 /* fast inversion mod 2**k
2780 * Based on the fact that
2782 * XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n)
2783 * => 2*X*A - X*X*A*A = 1
2784 * => 2*(1) - (1) = 1
2792 x
= (((b
+ 2) & 4) << 1) + b
; /* here x*a==1 mod 2**4 */
2793 x
*= 2 - b
* x
; /* here x*a==1 mod 2**8 */
2794 x
*= 2 - b
* x
; /* here x*a==1 mod 2**16 */
2795 x
*= 2 - b
* x
; /* here x*a==1 mod 2**32 */
2797 /* rho = -1/m mod b */
2798 *rho
= (((mp_word
)1 << ((mp_word
) DIGIT_BIT
)) - x
) & MP_MASK
;
2803 /* high level multiplication (handles sign) */
2804 int mp_mul (const mp_int
* a
, const mp_int
* b
, mp_int
* c
)
2807 neg
= (a
->sign
== b
->sign
) ? MP_ZPOS
: MP_NEG
;
2809 /* use Karatsuba? */
2810 if (MIN (a
->used
, b
->used
) >= KARATSUBA_MUL_CUTOFF
) {
2811 res
= mp_karatsuba_mul (a
, b
, c
);
2814 /* can we use the fast multiplier?
2816 * The fast multiplier can be used if the output will
2817 * have less than MP_WARRAY digits and the number of
2818 * digits won't affect carry propagation
2820 int digs
= a
->used
+ b
->used
+ 1;
2822 if ((digs
< MP_WARRAY
) &&
2823 MIN(a
->used
, b
->used
) <=
2824 (1 << ((CHAR_BIT
* sizeof (mp_word
)) - (2 * DIGIT_BIT
)))) {
2825 res
= fast_s_mp_mul_digs (a
, b
, c
, digs
);
2827 res
= s_mp_mul (a
, b
, c
); /* uses s_mp_mul_digs */
2829 c
->sign
= (c
->used
> 0) ? neg
: MP_ZPOS
;
2834 int mp_mul_2(const mp_int
* a
, mp_int
* b
)
2836 int x
, res
, oldused
;
2838 /* grow to accommodate result */
2839 if (b
->alloc
< a
->used
+ 1) {
2840 if ((res
= mp_grow (b
, a
->used
+ 1)) != MP_OKAY
) {
2849 register mp_digit r
, rr
, *tmpa
, *tmpb
;
2851 /* alias for source */
2854 /* alias for dest */
2859 for (x
= 0; x
< a
->used
; x
++) {
2861 /* get what will be the *next* carry bit from the
2862 * MSB of the current digit
2864 rr
= *tmpa
>> ((mp_digit
)(DIGIT_BIT
- 1));
2866 /* now shift up this digit, add in the carry [from the previous] */
2867 *tmpb
++ = ((*tmpa
++ << ((mp_digit
)1)) | r
) & MP_MASK
;
2869 /* copy the carry that would be from the source
2870 * digit into the next iteration
2875 /* new leading digit? */
2877 /* add a MSB which is always 1 at this point */
2882 /* now zero any excess digits on the destination
2883 * that we didn't write to
2885 tmpb
= b
->dp
+ b
->used
;
2886 for (x
= b
->used
; x
< oldused
; x
++) {
2894 /* shift left by a certain bit count */
2895 int mp_mul_2d (const mp_int
* a
, int b
, mp_int
* c
)
2902 if ((res
= mp_copy (a
, c
)) != MP_OKAY
) {
2907 if (c
->alloc
< (int)(c
->used
+ b
/DIGIT_BIT
+ 1)) {
2908 if ((res
= mp_grow (c
, c
->used
+ b
/ DIGIT_BIT
+ 1)) != MP_OKAY
) {
2913 /* shift by as many digits in the bit count */
2914 if (b
>= (int)DIGIT_BIT
) {
2915 if ((res
= mp_lshd (c
, b
/ DIGIT_BIT
)) != MP_OKAY
) {
2920 /* shift any bit count < DIGIT_BIT */
2921 d
= (mp_digit
) (b
% DIGIT_BIT
);
2923 register mp_digit
*tmpc
, shift
, mask
, r
, rr
;
2926 /* bitmask for carries */
2927 mask
= (((mp_digit
)1) << d
) - 1;
2929 /* shift for msbs */
2930 shift
= DIGIT_BIT
- d
;
2937 for (x
= 0; x
< c
->used
; x
++) {
2938 /* get the higher bits of the current word */
2939 rr
= (*tmpc
>> shift
) & mask
;
2941 /* shift the current word and OR in the carry */
2942 *tmpc
= ((*tmpc
<< d
) | r
) & MP_MASK
;
2945 /* set the carry to the carry bits of the current word */
2949 /* set final carry */
2951 c
->dp
[(c
->used
)++] = r
;
2958 /* multiply by a digit */
2960 mp_mul_d (const mp_int
* a
, mp_digit b
, mp_int
* c
)
2962 mp_digit u
, *tmpa
, *tmpc
;
2964 int ix
, res
, olduse
;
2966 /* make sure c is big enough to hold a*b */
2967 if (c
->alloc
< a
->used
+ 1) {
2968 if ((res
= mp_grow (c
, a
->used
+ 1)) != MP_OKAY
) {
2973 /* get the original destinations used count */
2979 /* alias for a->dp [source] */
2982 /* alias for c->dp [dest] */
2988 /* compute columns */
2989 for (ix
= 0; ix
< a
->used
; ix
++) {
2990 /* compute product and carry sum for this term */
2991 r
= ((mp_word
) u
) + ((mp_word
)*tmpa
++) * ((mp_word
)b
);
2993 /* mask off higher bits to get a single digit */
2994 *tmpc
++ = (mp_digit
) (r
& ((mp_word
) MP_MASK
));
2996 /* send carry into next iteration */
2997 u
= (mp_digit
) (r
>> ((mp_word
) DIGIT_BIT
));
3000 /* store final carry [if any] */
3003 /* now zero digits above the top */
3004 while (ix
++ < olduse
) {
3008 /* set used count */
3009 c
->used
= a
->used
+ 1;
3015 /* d = a * b (mod c) */
3017 mp_mulmod (const mp_int
* a
, const mp_int
* b
, mp_int
* c
, mp_int
* d
)
3022 if ((res
= mp_init (&t
)) != MP_OKAY
) {
3026 if ((res
= mp_mul (a
, b
, &t
)) != MP_OKAY
) {
3030 res
= mp_mod (&t
, c
, d
);
3035 /* determines if an integers is divisible by one
3036 * of the first PRIME_SIZE primes or not
3038 * sets result to 0 if not, 1 if yes
3040 int mp_prime_is_divisible (const mp_int
* a
, int *result
)
3045 /* default to not */
3048 for (ix
= 0; ix
< PRIME_SIZE
; ix
++) {
3049 /* what is a mod __prime_tab[ix] */
3050 if ((err
= mp_mod_d (a
, __prime_tab
[ix
], &res
)) != MP_OKAY
) {
3054 /* is the residue zero? */
3064 /* performs a variable number of rounds of Miller-Rabin
3066 * Probability of error after t rounds is no more than
3069 * Sets result to 1 if probably prime, 0 otherwise
3071 int mp_prime_is_prime (mp_int
* a
, int t
, int *result
)
3079 /* valid value of t? */
3080 if (t
<= 0 || t
> PRIME_SIZE
) {
3084 /* is the input equal to one of the primes in the table? */
3085 for (ix
= 0; ix
< PRIME_SIZE
; ix
++) {
3086 if (mp_cmp_d(a
, __prime_tab
[ix
]) == MP_EQ
) {
3092 /* first perform trial division */
3093 if ((err
= mp_prime_is_divisible (a
, &res
)) != MP_OKAY
) {
3097 /* return if it was trivially divisible */
3098 if (res
== MP_YES
) {
3102 /* now perform the miller-rabin rounds */
3103 if ((err
= mp_init (&b
)) != MP_OKAY
) {
3107 for (ix
= 0; ix
< t
; ix
++) {
3109 mp_set (&b
, __prime_tab
[ix
]);
3111 if ((err
= mp_prime_miller_rabin (a
, &b
, &res
)) != MP_OKAY
) {
3120 /* passed the test */
3126 /* Miller-Rabin test of "a" to the base of "b" as described in
3127 * HAC pp. 139 Algorithm 4.24
3129 * Sets result to 0 if definitely composite or 1 if probably prime.
3130 * Randomly the chance of error is no more than 1/4 and often
3133 int mp_prime_miller_rabin (mp_int
* a
, const mp_int
* b
, int *result
)
3142 if (mp_cmp_d(b
, 1) != MP_GT
) {
3146 /* get n1 = a - 1 */
3147 if ((err
= mp_init_copy (&n1
, a
)) != MP_OKAY
) {
3150 if ((err
= mp_sub_d (&n1
, 1, &n1
)) != MP_OKAY
) {
3154 /* set 2**s * r = n1 */
3155 if ((err
= mp_init_copy (&r
, &n1
)) != MP_OKAY
) {
3159 /* count the number of least significant bits
3164 /* now divide n - 1 by 2**s */
3165 if ((err
= mp_div_2d (&r
, s
, &r
, NULL
)) != MP_OKAY
) {
3169 /* compute y = b**r mod a */
3170 if ((err
= mp_init (&y
)) != MP_OKAY
) {
3173 if ((err
= mp_exptmod (b
, &r
, a
, &y
)) != MP_OKAY
) {
3177 /* if y != 1 and y != n1 do */
3178 if (mp_cmp_d (&y
, 1) != MP_EQ
&& mp_cmp (&y
, &n1
) != MP_EQ
) {
3180 /* while j <= s-1 and y != n1 */
3181 while ((j
<= (s
- 1)) && mp_cmp (&y
, &n1
) != MP_EQ
) {
3182 if ((err
= mp_sqrmod (&y
, a
, &y
)) != MP_OKAY
) {
3186 /* if y == 1 then composite */
3187 if (mp_cmp_d (&y
, 1) == MP_EQ
) {
3194 /* if y != n1 then composite */
3195 if (mp_cmp (&y
, &n1
) != MP_EQ
) {
3200 /* probably prime now */
3204 __N1
:mp_clear (&n1
);
3208 static const struct {
3221 /* returns # of RM trials required for a given bit size */
3222 int mp_prime_rabin_miller_trials(int size
)
3226 for (x
= 0; x
< (int)(sizeof(sizes
)/(sizeof(sizes
[0]))); x
++) {
3227 if (sizes
[x
].k
== size
) {
3229 } else if (sizes
[x
].k
> size
) {
3230 return (x
== 0) ? sizes
[0].t
: sizes
[x
- 1].t
;
3233 return sizes
[x
-1].t
+ 1;
3236 /* makes a truly random prime of a given size (bits),
3238 * Flags are as follows:
3240 * LTM_PRIME_BBS - make prime congruent to 3 mod 4
3241 * LTM_PRIME_SAFE - make sure (p-1)/2 is prime as well (implies LTM_PRIME_BBS)
3242 * LTM_PRIME_2MSB_OFF - make the 2nd highest bit zero
3243 * LTM_PRIME_2MSB_ON - make the 2nd highest bit one
3245 * You have to supply a callback which fills in a buffer with random bytes. "dat" is a parameter you can
3246 * have passed to the callback (e.g. a state or something). This function doesn't use "dat" itself
3251 /* This is possibly the mother of all prime generation functions, muahahahahaha! */
3252 int mp_prime_random_ex(mp_int
*a
, int t
, int size
, int flags
, ltm_prime_callback cb
, void *dat
)
3254 unsigned char *tmp
, maskAND
, maskOR_msb
, maskOR_lsb
;
3255 int res
, err
, bsize
, maskOR_msb_offset
;
3257 /* sanity check the input */
3258 if (size
<= 1 || t
<= 0) {
3262 /* LTM_PRIME_SAFE implies LTM_PRIME_BBS */
3263 if (flags
& LTM_PRIME_SAFE
) {
3264 flags
|= LTM_PRIME_BBS
;
3267 /* calc the byte size */
3268 bsize
= (size
>>3)+((size
&7)?1:0);
3270 /* we need a buffer of bsize bytes */
3271 tmp
= malloc(bsize
);
3276 /* calc the maskAND value for the MSbyte*/
3277 maskAND
= ((size
&7) == 0) ? 0xFF : (0xFF >> (8 - (size
& 7)));
3279 /* calc the maskOR_msb */
3281 maskOR_msb_offset
= ((size
& 7) == 1) ? 1 : 0;
3282 if (flags
& LTM_PRIME_2MSB_ON
) {
3283 maskOR_msb
|= 1 << ((size
- 2) & 7);
3284 } else if (flags
& LTM_PRIME_2MSB_OFF
) {
3285 maskAND
&= ~(1 << ((size
- 2) & 7));
3288 /* get the maskOR_lsb */
3290 if (flags
& LTM_PRIME_BBS
) {
3295 /* read the bytes */
3296 if (cb(tmp
, bsize
, dat
) != bsize
) {
3301 /* work over the MSbyte */
3303 tmp
[0] |= 1 << ((size
- 1) & 7);
3305 /* mix in the maskORs */
3306 tmp
[maskOR_msb_offset
] |= maskOR_msb
;
3307 tmp
[bsize
-1] |= maskOR_lsb
;
3310 if ((err
= mp_read_unsigned_bin(a
, tmp
, bsize
)) != MP_OKAY
) { goto error
; }
3313 if ((err
= mp_prime_is_prime(a
, t
, &res
)) != MP_OKAY
) { goto error
; }
3318 if (flags
& LTM_PRIME_SAFE
) {
3319 /* see if (a-1)/2 is prime */
3320 if ((err
= mp_sub_d(a
, 1, a
)) != MP_OKAY
) { goto error
; }
3321 if ((err
= mp_div_2(a
, a
)) != MP_OKAY
) { goto error
; }
3324 if ((err
= mp_prime_is_prime(a
, t
, &res
)) != MP_OKAY
) { goto error
; }
3326 } while (res
== MP_NO
);
3328 if (flags
& LTM_PRIME_SAFE
) {
3329 /* restore a to the original value */
3330 if ((err
= mp_mul_2(a
, a
)) != MP_OKAY
) { goto error
; }
3331 if ((err
= mp_add_d(a
, 1, a
)) != MP_OKAY
) { goto error
; }
3340 /* reads an unsigned char array, assumes the msb is stored first [big endian] */
3342 mp_read_unsigned_bin (mp_int
* a
, const unsigned char *b
, int c
)
3346 /* make sure there are at least two digits */
3348 if ((res
= mp_grow(a
, 2)) != MP_OKAY
) {
3356 /* read the bytes in */
3358 if ((res
= mp_mul_2d (a
, 8, a
)) != MP_OKAY
) {
3369 /* reduces x mod m, assumes 0 < x < m**2, mu is
3370 * precomputed via mp_reduce_setup.
3371 * From HAC pp.604 Algorithm 14.42
3374 mp_reduce (mp_int
* x
, const mp_int
* m
, const mp_int
* mu
)
3377 int res
, um
= m
->used
;
3380 if ((res
= mp_init_copy (&q
, x
)) != MP_OKAY
) {
3384 /* q1 = x / b**(k-1) */
3385 mp_rshd (&q
, um
- 1);
3387 /* according to HAC this optimization is ok */
3388 if (((unsigned long) um
) > (((mp_digit
)1) << (DIGIT_BIT
- 1))) {
3389 if ((res
= mp_mul (&q
, mu
, &q
)) != MP_OKAY
) {
3393 if ((res
= s_mp_mul_high_digs (&q
, mu
, &q
, um
- 1)) != MP_OKAY
) {
3398 /* q3 = q2 / b**(k+1) */
3399 mp_rshd (&q
, um
+ 1);
3401 /* x = x mod b**(k+1), quick (no division) */
3402 if ((res
= mp_mod_2d (x
, DIGIT_BIT
* (um
+ 1), x
)) != MP_OKAY
) {
3406 /* q = q * m mod b**(k+1), quick (no division) */
3407 if ((res
= s_mp_mul_digs (&q
, m
, &q
, um
+ 1)) != MP_OKAY
) {
3412 if ((res
= mp_sub (x
, &q
, x
)) != MP_OKAY
) {
3416 /* If x < 0, add b**(k+1) to it */
3417 if (mp_cmp_d (x
, 0) == MP_LT
) {
3419 if ((res
= mp_lshd (&q
, um
+ 1)) != MP_OKAY
)
3421 if ((res
= mp_add (x
, &q
, x
)) != MP_OKAY
)
3425 /* Back off if it's too big */
3426 while (mp_cmp (x
, m
) != MP_LT
) {
3427 if ((res
= s_mp_sub (x
, m
, x
)) != MP_OKAY
) {
3438 /* reduces a modulo n where n is of the form 2**p - d */
3440 mp_reduce_2k(mp_int
*a
, const mp_int
*n
, mp_digit d
)
3445 if ((res
= mp_init(&q
)) != MP_OKAY
) {
3449 p
= mp_count_bits(n
);
3451 /* q = a/2**p, a = a mod 2**p */
3452 if ((res
= mp_div_2d(a
, p
, &q
, a
)) != MP_OKAY
) {
3458 if ((res
= mp_mul_d(&q
, d
, &q
)) != MP_OKAY
) {
3464 if ((res
= s_mp_add(a
, &q
, a
)) != MP_OKAY
) {
3468 if (mp_cmp_mag(a
, n
) != MP_LT
) {
3478 /* determines the setup value */
3480 mp_reduce_2k_setup(const mp_int
*a
, mp_digit
*d
)
3485 if ((res
= mp_init(&tmp
)) != MP_OKAY
) {
3489 p
= mp_count_bits(a
);
3490 if ((res
= mp_2expt(&tmp
, p
)) != MP_OKAY
) {
3495 if ((res
= s_mp_sub(&tmp
, a
, &tmp
)) != MP_OKAY
) {
3505 /* pre-calculate the value required for Barrett reduction
3506 * For a given modulus "b" it calulates the value required in "a"
3508 int mp_reduce_setup (mp_int
* a
, const mp_int
* b
)
3512 if ((res
= mp_2expt (a
, b
->used
* 2 * DIGIT_BIT
)) != MP_OKAY
) {
3515 return mp_div (a
, b
, a
, NULL
);
3518 /* shift right a certain amount of digits */
3519 void mp_rshd (mp_int
* a
, int b
)
3523 /* if b <= 0 then ignore it */
3528 /* if b > used then simply zero it and return */
3535 register mp_digit
*bottom
, *top
;
3537 /* shift the digits down */
3542 /* top [offset into digits] */
3545 /* this is implemented as a sliding window where
3546 * the window is b-digits long and digits from
3547 * the top of the window are copied to the bottom
3551 b-2 | b-1 | b0 | b1 | b2 | ... | bb | ---->
3553 \-------------------/ ---->
3555 for (x
= 0; x
< (a
->used
- b
); x
++) {
3559 /* zero the top digits */
3560 for (; x
< a
->used
; x
++) {
3565 /* remove excess digits */
3569 /* set to a digit */
3570 void mp_set (mp_int
* a
, mp_digit b
)
3573 a
->dp
[0] = b
& MP_MASK
;
3574 a
->used
= (a
->dp
[0] != 0) ? 1 : 0;
3577 /* set a 32-bit const */
3578 int mp_set_int (mp_int
* a
, unsigned long b
)
3584 /* set four bits at a time */
3585 for (x
= 0; x
< 8; x
++) {
3586 /* shift the number up four bits */
3587 if ((res
= mp_mul_2d (a
, 4, a
)) != MP_OKAY
) {
3591 /* OR in the top four bits of the source */
3592 a
->dp
[0] |= (b
>> 28) & 15;
3594 /* shift the source up to the next four bits */
3597 /* ensure that digits are not clamped off */
3604 /* shrink a bignum */
3605 int mp_shrink (mp_int
* a
)
3608 if (a
->alloc
!= a
->used
&& a
->used
> 0) {
3609 if ((tmp
= realloc (a
->dp
, sizeof (mp_digit
) * a
->used
)) == NULL
) {
3618 /* get the size for an signed equivalent */
3619 int mp_signed_bin_size (const mp_int
* a
)
3621 return 1 + mp_unsigned_bin_size (a
);
3624 /* computes b = a*a */
3626 mp_sqr (const mp_int
* a
, mp_int
* b
)
3630 if (a
->used
>= KARATSUBA_SQR_CUTOFF
) {
3631 res
= mp_karatsuba_sqr (a
, b
);
3634 /* can we use the fast comba multiplier? */
3635 if ((a
->used
* 2 + 1) < MP_WARRAY
&&
3637 (1 << (sizeof(mp_word
) * CHAR_BIT
- 2*DIGIT_BIT
- 1))) {
3638 res
= fast_s_mp_sqr (a
, b
);
3640 res
= s_mp_sqr (a
, b
);
3646 /* c = a * a (mod b) */
3648 mp_sqrmod (const mp_int
* a
, mp_int
* b
, mp_int
* c
)
3653 if ((res
= mp_init (&t
)) != MP_OKAY
) {
3657 if ((res
= mp_sqr (a
, &t
)) != MP_OKAY
) {
3661 res
= mp_mod (&t
, b
, c
);
3666 /* high level subtraction (handles signs) */
3668 mp_sub (mp_int
* a
, mp_int
* b
, mp_int
* c
)
3676 /* subtract a negative from a positive, OR */
3677 /* subtract a positive from a negative. */
3678 /* In either case, ADD their magnitudes, */
3679 /* and use the sign of the first number. */
3681 res
= s_mp_add (a
, b
, c
);
3683 /* subtract a positive from a positive, OR */
3684 /* subtract a negative from a negative. */
3685 /* First, take the difference between their */
3686 /* magnitudes, then... */
3687 if (mp_cmp_mag (a
, b
) != MP_LT
) {
3688 /* Copy the sign from the first */
3690 /* The first has a larger or equal magnitude */
3691 res
= s_mp_sub (a
, b
, c
);
3693 /* The result has the *opposite* sign from */
3694 /* the first number. */
3695 c
->sign
= (sa
== MP_ZPOS
) ? MP_NEG
: MP_ZPOS
;
3696 /* The second has a larger magnitude */
3697 res
= s_mp_sub (b
, a
, c
);
3703 /* single digit subtraction */
3705 mp_sub_d (mp_int
* a
, mp_digit b
, mp_int
* c
)
3707 mp_digit
*tmpa
, *tmpc
, mu
;
3708 int res
, ix
, oldused
;
3710 /* grow c as required */
3711 if (c
->alloc
< a
->used
+ 1) {
3712 if ((res
= mp_grow(c
, a
->used
+ 1)) != MP_OKAY
) {
3717 /* if a is negative just do an unsigned
3718 * addition [with fudged signs]
3720 if (a
->sign
== MP_NEG
) {
3722 res
= mp_add_d(a
, b
, c
);
3723 a
->sign
= c
->sign
= MP_NEG
;
3732 /* if a <= b simply fix the single digit */
3733 if ((a
->used
== 1 && a
->dp
[0] <= b
) || a
->used
== 0) {
3735 *tmpc
++ = b
- *tmpa
;
3741 /* negative/1digit */
3749 /* subtract first digit */
3750 *tmpc
= *tmpa
++ - b
;
3751 mu
= *tmpc
>> (sizeof(mp_digit
) * CHAR_BIT
- 1);
3754 /* handle rest of the digits */
3755 for (ix
= 1; ix
< a
->used
; ix
++) {
3756 *tmpc
= *tmpa
++ - mu
;
3757 mu
= *tmpc
>> (sizeof(mp_digit
) * CHAR_BIT
- 1);
3762 /* zero excess digits */
3763 while (ix
++ < oldused
) {
3770 /* store in unsigned [big endian] format */
3772 mp_to_unsigned_bin (const mp_int
* a
, unsigned char *b
)
3777 if ((res
= mp_init_copy (&t
, a
)) != MP_OKAY
) {
3782 while (mp_iszero (&t
) == 0) {
3783 b
[x
++] = (unsigned char) (t
.dp
[0] & 255);
3784 if ((res
= mp_div_2d (&t
, 8, &t
, NULL
)) != MP_OKAY
) {
3794 /* get the size for an unsigned equivalent */
3796 mp_unsigned_bin_size (const mp_int
* a
)
3798 int size
= mp_count_bits (a
);
3799 return (size
/ 8 + ((size
& 7) != 0 ? 1 : 0));
3804 mp_zero (mp_int
* a
)
3808 memset (a
->dp
, 0, sizeof (mp_digit
) * a
->alloc
);
3811 const mp_digit __prime_tab
[] = {
3812 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
3813 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
3814 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
3815 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083,
3816 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
3817 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
3818 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
3819 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
3821 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
3822 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
3823 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
3824 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
3825 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
3826 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
3827 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
3828 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
3830 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
3831 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
3832 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
3833 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
3834 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
3835 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
3836 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
3837 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
3839 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
3840 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
3841 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
3842 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
3843 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
3844 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
3845 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
3846 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
3849 /* reverse an array, used for radix code */
3851 bn_reverse (unsigned char *s
, int len
)
3867 /* low level addition, based on HAC pp.594, Algorithm 14.7 */
3869 s_mp_add (mp_int
* a
, mp_int
* b
, mp_int
* c
)
3872 int olduse
, res
, min
, max
;
3874 /* find sizes, we let |a| <= |b| which means we have to sort
3875 * them. "x" will point to the input with the most digits
3877 if (a
->used
> b
->used
) {
3888 if (c
->alloc
< max
+ 1) {
3889 if ((res
= mp_grow (c
, max
+ 1)) != MP_OKAY
) {
3894 /* get old used digit count and set new one */
3899 register mp_digit u
, *tmpa
, *tmpb
, *tmpc
;
3902 /* alias for digit pointers */
3913 /* zero the carry */
3915 for (i
= 0; i
< min
; i
++) {
3916 /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
3917 *tmpc
= *tmpa
++ + *tmpb
++ + u
;
3919 /* U = carry bit of T[i] */
3920 u
= *tmpc
>> ((mp_digit
)DIGIT_BIT
);
3922 /* take away carry bit from T[i] */
3926 /* now copy higher words if any, that is in A+B
3927 * if A or B has more digits add those in
3930 for (; i
< max
; i
++) {
3931 /* T[i] = X[i] + U */
3932 *tmpc
= x
->dp
[i
] + u
;
3934 /* U = carry bit of T[i] */
3935 u
= *tmpc
>> ((mp_digit
)DIGIT_BIT
);
3937 /* take away carry bit from T[i] */
3945 /* clear digits above oldused */
3946 for (i
= c
->used
; i
< olduse
; i
++) {
3955 int s_mp_exptmod (const mp_int
* G
, const mp_int
* X
, mp_int
* P
, mp_int
* Y
)
3957 mp_int M
[256], res
, mu
;
3959 int err
, bitbuf
, bitcpy
, bitcnt
, mode
, digidx
, x
, y
, winsize
;
3961 /* find window size */
3962 x
= mp_count_bits (X
);
3965 } else if (x
<= 36) {
3967 } else if (x
<= 140) {
3969 } else if (x
<= 450) {
3971 } else if (x
<= 1303) {
3973 } else if (x
<= 3529) {
3980 /* init first cell */
3981 if ((err
= mp_init(&M
[1])) != MP_OKAY
) {
3985 /* now init the second half of the array */
3986 for (x
= 1<<(winsize
-1); x
< (1 << winsize
); x
++) {
3987 if ((err
= mp_init(&M
[x
])) != MP_OKAY
) {
3988 for (y
= 1<<(winsize
-1); y
< x
; y
++) {
3996 /* create mu, used for Barrett reduction */
3997 if ((err
= mp_init (&mu
)) != MP_OKAY
) {
4000 if ((err
= mp_reduce_setup (&mu
, P
)) != MP_OKAY
) {
4006 * The M table contains powers of the base,
4007 * e.g. M[x] = G**x mod P
4009 * The first half of the table is not
4010 * computed though accept for M[0] and M[1]
4012 if ((err
= mp_mod (G
, P
, &M
[1])) != MP_OKAY
) {
4016 /* compute the value at M[1<<(winsize-1)] by squaring
4017 * M[1] (winsize-1) times
4019 if ((err
= mp_copy (&M
[1], &M
[1 << (winsize
- 1)])) != MP_OKAY
) {
4023 for (x
= 0; x
< (winsize
- 1); x
++) {
4024 if ((err
= mp_sqr (&M
[1 << (winsize
- 1)],
4025 &M
[1 << (winsize
- 1)])) != MP_OKAY
) {
4028 if ((err
= mp_reduce (&M
[1 << (winsize
- 1)], P
, &mu
)) != MP_OKAY
) {
4033 /* create upper table, that is M[x] = M[x-1] * M[1] (mod P)
4034 * for x = (2**(winsize - 1) + 1) to (2**winsize - 1)
4036 for (x
= (1 << (winsize
- 1)) + 1; x
< (1 << winsize
); x
++) {
4037 if ((err
= mp_mul (&M
[x
- 1], &M
[1], &M
[x
])) != MP_OKAY
) {
4040 if ((err
= mp_reduce (&M
[x
], P
, &mu
)) != MP_OKAY
) {
4046 if ((err
= mp_init (&res
)) != MP_OKAY
) {
4051 /* set initial mode and bit cnt */
4055 digidx
= X
->used
- 1;
4060 /* grab next digit as required */
4061 if (--bitcnt
== 0) {
4062 /* if digidx == -1 we are out of digits */
4066 /* read next digit and reset the bitcnt */
4067 buf
= X
->dp
[digidx
--];
4068 bitcnt
= (int) DIGIT_BIT
;
4071 /* grab the next msb from the exponent */
4072 y
= (buf
>> (mp_digit
)(DIGIT_BIT
- 1)) & 1;
4073 buf
<<= (mp_digit
)1;
4075 /* if the bit is zero and mode == 0 then we ignore it
4076 * These represent the leading zero bits before the first 1 bit
4077 * in the exponent. Technically this opt is not required but it
4078 * does lower the # of trivial squaring/reductions used
4080 if (mode
== 0 && y
== 0) {
4084 /* if the bit is zero and mode == 1 then we square */
4085 if (mode
== 1 && y
== 0) {
4086 if ((err
= mp_sqr (&res
, &res
)) != MP_OKAY
) {
4089 if ((err
= mp_reduce (&res
, P
, &mu
)) != MP_OKAY
) {
4095 /* else we add it to the window */
4096 bitbuf
|= (y
<< (winsize
- ++bitcpy
));
4099 if (bitcpy
== winsize
) {
4100 /* ok window is filled so square as required and multiply */
4102 for (x
= 0; x
< winsize
; x
++) {
4103 if ((err
= mp_sqr (&res
, &res
)) != MP_OKAY
) {
4106 if ((err
= mp_reduce (&res
, P
, &mu
)) != MP_OKAY
) {
4112 if ((err
= mp_mul (&res
, &M
[bitbuf
], &res
)) != MP_OKAY
) {
4115 if ((err
= mp_reduce (&res
, P
, &mu
)) != MP_OKAY
) {
4119 /* empty window and reset */
4126 /* if bits remain then square/multiply */
4127 if (mode
== 2 && bitcpy
> 0) {
4128 /* square then multiply if the bit is set */
4129 for (x
= 0; x
< bitcpy
; x
++) {
4130 if ((err
= mp_sqr (&res
, &res
)) != MP_OKAY
) {
4133 if ((err
= mp_reduce (&res
, P
, &mu
)) != MP_OKAY
) {
4138 if ((bitbuf
& (1 << winsize
)) != 0) {
4140 if ((err
= mp_mul (&res
, &M
[1], &res
)) != MP_OKAY
) {
4143 if ((err
= mp_reduce (&res
, P
, &mu
)) != MP_OKAY
) {
4152 __RES
:mp_clear (&res
);
4153 __MU
:mp_clear (&mu
);
4156 for (x
= 1<<(winsize
-1); x
< (1 << winsize
); x
++) {
4162 /* multiplies |a| * |b| and only computes up to digs digits of result
4163 * HAC pp. 595, Algorithm 14.12 Modified so you can control how
4164 * many digits of output are created.
4167 s_mp_mul_digs (const mp_int
* a
, const mp_int
* b
, mp_int
* c
, int digs
)
4170 int res
, pa
, pb
, ix
, iy
;
4173 mp_digit tmpx
, *tmpt
, *tmpy
;
4175 /* can we use the fast multiplier? */
4176 if (((digs
) < MP_WARRAY
) &&
4177 MIN (a
->used
, b
->used
) <
4178 (1 << ((CHAR_BIT
* sizeof (mp_word
)) - (2 * DIGIT_BIT
)))) {
4179 return fast_s_mp_mul_digs (a
, b
, c
, digs
);
4182 if ((res
= mp_init_size (&t
, digs
)) != MP_OKAY
) {
4187 /* compute the digits of the product directly */
4189 for (ix
= 0; ix
< pa
; ix
++) {
4190 /* set the carry to zero */
4193 /* limit ourselves to making digs digits of output */
4194 pb
= MIN (b
->used
, digs
- ix
);
4196 /* setup some aliases */
4197 /* copy of the digit from a used within the nested loop */
4200 /* an alias for the destination shifted ix places */
4203 /* an alias for the digits of b */
4206 /* compute the columns of the output and propagate the carry */
4207 for (iy
= 0; iy
< pb
; iy
++) {
4208 /* compute the column as a mp_word */
4209 r
= ((mp_word
)*tmpt
) +
4210 ((mp_word
)tmpx
) * ((mp_word
)*tmpy
++) +
4213 /* the new column is the lower part of the result */
4214 *tmpt
++ = (mp_digit
) (r
& ((mp_word
) MP_MASK
));
4216 /* get the carry word from the result */
4217 u
= (mp_digit
) (r
>> ((mp_word
) DIGIT_BIT
));
4219 /* set carry if it is placed below digs */
4220 if (ix
+ iy
< digs
) {
4232 /* multiplies |a| * |b| and does not compute the lower digs digits
4233 * [meant to get the higher part of the product]
4236 s_mp_mul_high_digs (const mp_int
* a
, const mp_int
* b
, mp_int
* c
, int digs
)
4239 int res
, pa
, pb
, ix
, iy
;
4242 mp_digit tmpx
, *tmpt
, *tmpy
;
4244 /* can we use the fast multiplier? */
4245 if (((a
->used
+ b
->used
+ 1) < MP_WARRAY
)
4246 && MIN (a
->used
, b
->used
) < (1 << ((CHAR_BIT
* sizeof (mp_word
)) - (2 * DIGIT_BIT
)))) {
4247 return fast_s_mp_mul_high_digs (a
, b
, c
, digs
);
4250 if ((res
= mp_init_size (&t
, a
->used
+ b
->used
+ 1)) != MP_OKAY
) {
4253 t
.used
= a
->used
+ b
->used
+ 1;
4257 for (ix
= 0; ix
< pa
; ix
++) {
4258 /* clear the carry */
4261 /* left hand side of A[ix] * B[iy] */
4264 /* alias to the address of where the digits will be stored */
4265 tmpt
= &(t
.dp
[digs
]);
4267 /* alias for where to read the right hand side from */
4268 tmpy
= b
->dp
+ (digs
- ix
);
4270 for (iy
= digs
- ix
; iy
< pb
; iy
++) {
4271 /* calculate the double precision result */
4272 r
= ((mp_word
)*tmpt
) +
4273 ((mp_word
)tmpx
) * ((mp_word
)*tmpy
++) +
4276 /* get the lower part */
4277 *tmpt
++ = (mp_digit
) (r
& ((mp_word
) MP_MASK
));
4279 /* carry the carry */
4280 u
= (mp_digit
) (r
>> ((mp_word
) DIGIT_BIT
));
4290 /* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
4292 s_mp_sqr (const mp_int
* a
, mp_int
* b
)
4295 int res
, ix
, iy
, pa
;
4297 mp_digit u
, tmpx
, *tmpt
;
4300 if ((res
= mp_init_size (&t
, 2*pa
+ 1)) != MP_OKAY
) {
4304 /* default used is maximum possible size */
4307 for (ix
= 0; ix
< pa
; ix
++) {
4308 /* first calculate the digit at 2*ix */
4309 /* calculate double precision result */
4310 r
= ((mp_word
) t
.dp
[2*ix
]) +
4311 ((mp_word
)a
->dp
[ix
])*((mp_word
)a
->dp
[ix
]);
4313 /* store lower part in result */
4314 t
.dp
[ix
+ix
] = (mp_digit
) (r
& ((mp_word
) MP_MASK
));
4317 u
= (mp_digit
)(r
>> ((mp_word
) DIGIT_BIT
));
4319 /* left hand side of A[ix] * A[iy] */
4322 /* alias for where to store the results */
4323 tmpt
= t
.dp
+ (2*ix
+ 1);
4325 for (iy
= ix
+ 1; iy
< pa
; iy
++) {
4326 /* first calculate the product */
4327 r
= ((mp_word
)tmpx
) * ((mp_word
)a
->dp
[iy
]);
4329 /* now calculate the double precision result, note we use
4330 * addition instead of *2 since it's easier to optimize
4332 r
= ((mp_word
) *tmpt
) + r
+ r
+ ((mp_word
) u
);
4334 /* store lower part */
4335 *tmpt
++ = (mp_digit
) (r
& ((mp_word
) MP_MASK
));
4338 u
= (mp_digit
)(r
>> ((mp_word
) DIGIT_BIT
));
4340 /* propagate upwards */
4341 while (u
!= ((mp_digit
) 0)) {
4342 r
= ((mp_word
) *tmpt
) + ((mp_word
) u
);
4343 *tmpt
++ = (mp_digit
) (r
& ((mp_word
) MP_MASK
));
4344 u
= (mp_digit
)(r
>> ((mp_word
) DIGIT_BIT
));
4354 /* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */
4356 s_mp_sub (const mp_int
* a
, const mp_int
* b
, mp_int
* c
)
4358 int olduse
, res
, min
, max
;
4365 if (c
->alloc
< max
) {
4366 if ((res
= mp_grow (c
, max
)) != MP_OKAY
) {
4374 register mp_digit u
, *tmpa
, *tmpb
, *tmpc
;
4377 /* alias for digit pointers */
4382 /* set carry to zero */
4384 for (i
= 0; i
< min
; i
++) {
4385 /* T[i] = A[i] - B[i] - U */
4386 *tmpc
= *tmpa
++ - *tmpb
++ - u
;
4388 /* U = carry bit of T[i]
4389 * Note this saves performing an AND operation since
4390 * if a carry does occur it will propagate all the way to the
4391 * MSB. As a result a single shift is enough to get the carry
4393 u
= *tmpc
>> ((mp_digit
)(CHAR_BIT
* sizeof (mp_digit
) - 1));
4395 /* Clear carry from T[i] */
4399 /* now copy higher words if any, e.g. if A has more digits than B */
4400 for (; i
< max
; i
++) {
4401 /* T[i] = A[i] - U */
4402 *tmpc
= *tmpa
++ - u
;
4404 /* U = carry bit of T[i] */
4405 u
= *tmpc
>> ((mp_digit
)(CHAR_BIT
* sizeof (mp_digit
) - 1));
4407 /* Clear carry from T[i] */
4411 /* clear digits above used (since we may not have grown result above) */
4412 for (i
= c
->used
; i
< olduse
; i
++) {