Tomato 1.26
[tomato.git] / release / src / router / matrixssl / src / crypto / peersec / mpi.c
blobc37353d3b7a92565112a4c2fd03cce6879c1eff4
1 /*
2 * mpi.c
3 * Release $Name: MATRIXSSL_1_8_8_OPEN $
5 * multiple-precision integer library
6 */
7 /*
8 * Copyright (c) PeerSec Networks, 2002-2009. All Rights Reserved.
9 * The latest version of this code is available at http://www.matrixssl.org
11 * This software is open source; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * This General Public License does NOT permit incorporating this software
17 * into proprietary programs. If you are unable to comply with the GPL, a
18 * commercial license for this software may be purchased from PeerSec Networks
19 * at http://www.peersec.com
21 * This program is distributed in WITHOUT ANY WARRANTY; without even the
22 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
23 * See the GNU General Public License for more details.
25 * You should have received a copy of the GNU General Public License
26 * along with this program; if not, write to the Free Software
27 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
28 * http://www.gnu.org/copyleft/gpl.html
30 /******************************************************************************/
32 #include "../cryptoLayer.h"
33 #include <stdarg.h>
35 #ifndef USE_MPI2
37 static int32 mp_exptmod_fast (psPool_t *pool, mp_int * G, mp_int * X,
38 mp_int * P, mp_int * Y, int32 redmode);
40 /******************************************************************************/
42 FUTURE
43 1. Convert the mp_init and mp_clear functions to not use malloc + free,
44 but to use static storage within the bignum variable instead - but
45 how to handle grow()? Maybe use a simple memory allocator
46 2. verify stack usage of all functions and use of MP_LOW_MEM:
47 fast_mp_montgomery_reduce
48 fast_s_mp_mul_digs
49 fast_s_mp_sqr
50 fast_s_mp_mul_high_digs
51 3. HAC stands for Handbook of Applied Cryptography
52 http://www.cacr.math.uwaterloo.ca/hac/
54 /******************************************************************************/
56 Utility functions
58 void psZeromem(void *dst, size_t len)
60 unsigned char *mem = (unsigned char *)dst;
62 if (dst == NULL) {
63 return;
65 while (len-- > 0) {
66 *mem++ = 0;
70 void psBurnStack(unsigned long len)
72 unsigned char buf[32];
74 psZeromem(buf, sizeof(buf));
75 if (len > (unsigned long)sizeof(buf)) {
76 psBurnStack(len - sizeof(buf));
80 /******************************************************************************/
82 Multiple precision integer functions
83 Note: we don't use va_args here to prevent portability issues.
85 int32 _mp_init_multi(psPool_t *pool, mp_int *mp0, mp_int *mp1, mp_int *mp2,
86 mp_int *mp3, mp_int *mp4, mp_int *mp5,
87 mp_int *mp6, mp_int *mp7)
89 mp_err res = MP_OKAY; /* Assume ok until proven otherwise */
90 int32 n = 0; /* Number of ok inits */
91 mp_int *tempArray[9];
93 tempArray[0] = mp0;
94 tempArray[1] = mp1;
95 tempArray[2] = mp2;
96 tempArray[3] = mp3;
97 tempArray[4] = mp4;
98 tempArray[5] = mp5;
99 tempArray[6] = mp6;
100 tempArray[7] = mp7;
101 tempArray[8] = NULL;
103 while (tempArray[n] != NULL) {
104 if (mp_init(pool, tempArray[n]) != MP_OKAY) {
105 res = MP_MEM;
106 break;
108 n++;
111 if (res == MP_MEM) {
112 n = 0;
113 while (tempArray[n] != NULL) {
114 mp_clear(tempArray[n]);
115 n++;
118 return res; /* Assumed ok, if error flagged above. */
120 /******************************************************************************/
122 Reads a unsigned char array, assumes the msb is stored first [big endian]
124 int32 mp_read_unsigned_bin (mp_int * a, unsigned char *b, int32 c)
126 int32 res;
129 Make sure there are at least two digits.
131 if (a->alloc < 2) {
132 if ((res = mp_grow(a, 2)) != MP_OKAY) {
133 return res;
138 Zero the int32.
140 mp_zero (a);
143 read the bytes in
145 while (c-- > 0) {
146 if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) {
147 return res;
150 #ifndef MP_8BIT
151 a->dp[0] |= *b++;
152 a->used += 1;
153 #else
154 a->dp[0] = (*b & MP_MASK);
155 a->dp[1] |= ((*b++ >> 7U) & 1);
156 a->used += 2;
157 #endif /* MP_8BIT */
159 mp_clamp (a);
160 return MP_OKAY;
163 /******************************************************************************/
165 Compare two ints (signed)
167 int32 mp_cmp (mp_int * a, mp_int * b)
170 compare based on sign
172 if (a->sign != b->sign) {
173 if (a->sign == MP_NEG) {
174 return MP_LT;
175 } else {
176 return MP_GT;
181 compare digits
183 if (a->sign == MP_NEG) {
184 /* if negative compare opposite direction */
185 return mp_cmp_mag(b, a);
186 } else {
187 return mp_cmp_mag(a, b);
191 /******************************************************************************/
193 Store in unsigned [big endian] format.
195 int32 mp_to_unsigned_bin(psPool_t *pool, mp_int * a, unsigned char *b)
197 int32 x, res;
198 mp_int t;
200 if ((res = mp_init_copy(pool, &t, a)) != MP_OKAY) {
201 return res;
204 x = 0;
205 while (mp_iszero (&t) == 0) {
206 #ifndef MP_8BIT
207 b[x++] = (unsigned char) (t.dp[0] & 255);
208 #else
209 b[x++] = (unsigned char) (t.dp[0] | ((t.dp[1] & 0x01) << 7));
210 #endif /* MP_8BIT */
211 if ((res = mp_div_2d (pool, &t, 8, &t, NULL)) != MP_OKAY) {
212 mp_clear (&t);
213 return res;
216 bn_reverse (b, x);
217 mp_clear (&t);
218 return MP_OKAY;
221 void _mp_clear_multi(mp_int *mp0, mp_int *mp1, mp_int *mp2, mp_int *mp3,
222 mp_int *mp4, mp_int *mp5, mp_int *mp6, mp_int *mp7)
224 int32 n = 0; /* Number of ok inits */
226 mp_int *tempArray[9];
228 tempArray[0] = mp0;
229 tempArray[1] = mp1;
230 tempArray[2] = mp2;
231 tempArray[3] = mp3;
232 tempArray[4] = mp4;
233 tempArray[5] = mp5;
234 tempArray[6] = mp6;
235 tempArray[7] = mp7;
236 tempArray[8] = NULL;
238 for (n = 0; tempArray[n] != NULL; n++) {
239 mp_clear(tempArray[n]);
243 /******************************************************************************/
245 Init a new mp_int.
247 int32 mp_init (psPool_t *pool, mp_int * a)
249 int32 i;
251 allocate memory required and clear it
253 a->dp = OPT_CAST(mp_digit) psMalloc(pool, sizeof (mp_digit) * MP_PREC);
254 if (a->dp == NULL) {
255 return MP_MEM;
259 set the digits to zero
261 for (i = 0; i < MP_PREC; i++) {
262 a->dp[i] = 0;
265 set the used to zero, allocated digits to the default precision and sign
266 to positive
268 a->used = 0;
269 a->alloc = MP_PREC;
270 a->sign = MP_ZPOS;
272 return MP_OKAY;
275 /******************************************************************************/
277 clear one (frees).
279 void mp_clear (mp_int * a)
281 int32 i;
283 only do anything if a hasn't been freed previously
285 if (a->dp != NULL) {
287 first zero the digits
289 for (i = 0; i < a->used; i++) {
290 a->dp[i] = 0;
293 /* free ram */
294 psFree (a->dp);
297 reset members to make debugging easier
299 a->dp = NULL;
300 a->alloc = a->used = 0;
301 a->sign = MP_ZPOS;
305 /******************************************************************************/
307 Get the size for an unsigned equivalent.
309 int32 mp_unsigned_bin_size (mp_int * a)
311 int32 size = mp_count_bits (a);
313 return (size / 8 + ((size & 7) != 0 ? 1 : 0));
316 /******************************************************************************/
318 Trim unused digits
320 This is used to ensure that leading zero digits are trimed and the
321 leading "used" digit will be non-zero. Typically very fast. Also fixes
322 the sign if there are no more leading digits
324 void mp_clamp (mp_int * a)
327 decrease used while the most significant digit is zero.
329 while (a->used > 0 && a->dp[a->used - 1] == 0) {
330 --(a->used);
334 reset the sign flag if used == 0
336 if (a->used == 0) {
337 a->sign = MP_ZPOS;
341 /******************************************************************************/
343 Shift left by a certain bit count.
345 int32 mp_mul_2d (mp_int * a, int32 b, mp_int * c)
347 mp_digit d;
348 int32 res;
351 Copy
353 if (a != c) {
354 if ((res = mp_copy (a, c)) != MP_OKAY) {
355 return res;
359 if (c->alloc < (int32)(c->used + b/DIGIT_BIT + 1)) {
360 if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) {
361 return res;
366 Shift by as many digits in the bit count
368 if (b >= (int32)DIGIT_BIT) {
369 if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) {
370 return res;
375 shift any bit count < DIGIT_BIT
377 d = (mp_digit) (b % DIGIT_BIT);
378 if (d != 0) {
379 register mp_digit *tmpc, shift, mask, r, rr;
380 register int32 x;
383 bitmask for carries
385 mask = (((mp_digit)1) << d) - 1;
388 shift for msbs
390 shift = DIGIT_BIT - d;
392 /* alias */
393 tmpc = c->dp;
395 /* carry */
396 r = 0;
397 for (x = 0; x < c->used; x++) {
399 get the higher bits of the current word
401 rr = (*tmpc >> shift) & mask;
404 shift the current word and OR in the carry
406 *tmpc = ((*tmpc << d) | r) & MP_MASK;
407 ++tmpc;
410 set the carry to the carry bits of the current word
412 r = rr;
416 set final carry
418 if (r != 0) {
419 c->dp[(c->used)++] = r;
422 mp_clamp (c);
423 return MP_OKAY;
426 /******************************************************************************/
428 Set to zero.
430 void mp_zero (mp_int * a)
432 int n;
433 mp_digit *tmp;
435 a->sign = MP_ZPOS;
436 a->used = 0;
438 tmp = a->dp;
439 for (n = 0; n < a->alloc; n++) {
440 *tmp++ = 0;
444 #ifdef MP_LOW_MEM
445 #define TAB_SIZE 32
446 #else
447 #define TAB_SIZE 256
448 #endif /* MP_LOW_MEM */
450 /******************************************************************************/
452 Compare maginitude of two ints (unsigned).
454 int32 mp_cmp_mag (mp_int * a, mp_int * b)
456 int32 n;
457 mp_digit *tmpa, *tmpb;
460 compare based on # of non-zero digits
462 if (a->used > b->used) {
463 return MP_GT;
466 if (a->used < b->used) {
467 return MP_LT;
470 /* alias for a */
471 tmpa = a->dp + (a->used - 1);
473 /* alias for b */
474 tmpb = b->dp + (a->used - 1);
477 compare based on digits
479 for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {
480 if (*tmpa > *tmpb) {
481 return MP_GT;
484 if (*tmpa < *tmpb) {
485 return MP_LT;
488 return MP_EQ;
491 /******************************************************************************/
493 computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
495 Uses a left-to-right k-ary sliding window to compute the modular
496 exponentiation. The value of k changes based on the size of the exponent.
498 Uses Montgomery or Diminished Radix reduction [whichever appropriate]
500 int32 mp_exptmod(psPool_t *pool, mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
504 modulus P must be positive
506 if (P->sign == MP_NEG) {
507 return MP_VAL;
511 if exponent X is negative we have to recurse
513 if (X->sign == MP_NEG) {
514 mp_int tmpG, tmpX;
515 int32 err;
518 first compute 1/G mod P
520 if ((err = mp_init(pool, &tmpG)) != MP_OKAY) {
521 return err;
523 if ((err = mp_invmod(pool, G, P, &tmpG)) != MP_OKAY) {
524 mp_clear(&tmpG);
525 return err;
529 now get |X|
531 if ((err = mp_init(pool, &tmpX)) != MP_OKAY) {
532 mp_clear(&tmpG);
533 return err;
535 if ((err = mp_abs(X, &tmpX)) != MP_OKAY) {
536 mp_clear(&tmpG);
537 mp_clear(&tmpX);
538 return err;
542 and now compute (1/G)**|X| instead of G**X [X < 0]
544 err = mp_exptmod(pool, &tmpG, &tmpX, P, Y);
545 mp_clear(&tmpG);
546 mp_clear(&tmpX);
547 return err;
551 if the modulus is odd or dr != 0 use the fast method
553 if (mp_isodd (P) == 1) {
554 return mp_exptmod_fast (pool, G, X, P, Y, 0);
555 } else {
557 no exptmod for evens
559 return MP_VAL;
563 /******************************************************************************/
565 Call only from mp_exptmod to make sure this fast version qualifies
567 static int32 mp_exptmod_fast(psPool_t *pool, mp_int * G, mp_int * X,
568 mp_int * P, mp_int * Y, int32 redmode)
570 mp_int M[TAB_SIZE], res;
571 mp_digit buf, mp;
572 int32 err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
576 use a pointer to the reduction algorithm. This allows us to use
577 one of many reduction algorithms without modding the guts of
578 the code with if statements everywhere.
580 int32 (*redux)(mp_int*,mp_int*,mp_digit);
583 find window size
585 x = mp_count_bits (X);
586 if (x <= 7) {
587 winsize = 2;
588 } else if (x <= 36) {
589 winsize = 3;
590 } else if (x <= 140) {
591 winsize = 4;
592 } else if (x <= 450) {
593 winsize = 5;
594 } else if (x <= 1303) {
595 winsize = 6;
596 } else if (x <= 3529) {
597 winsize = 7;
598 } else {
599 winsize = 8;
602 #ifdef MP_LOW_MEM
603 if (winsize > 5) {
604 winsize = 5;
606 #endif
609 init M array
610 init first cell
612 if ((err = mp_init(pool, &M[1])) != MP_OKAY) {
613 return err;
617 now init the second half of the array
619 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
620 if ((err = mp_init(pool, &M[x])) != MP_OKAY) {
621 for (y = 1<<(winsize-1); y < x; y++) {
622 mp_clear(&M[y]);
624 mp_clear(&M[1]);
625 return err;
631 now setup montgomery
633 if ((err = mp_montgomery_setup(P, &mp)) != MP_OKAY) {
634 goto LBL_M;
638 automatically pick the comba one if available
640 if (((P->used * 2 + 1) < MP_WARRAY) &&
641 P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
642 redux = fast_mp_montgomery_reduce;
643 } else {
645 use slower baseline Montgomery method
647 redux = mp_montgomery_reduce;
651 setup result
653 if ((err = mp_init(pool, &res)) != MP_OKAY) {
654 goto LBL_M;
658 create M table. The first half of the table is not computed
659 though accept for M[0] and M[1]
663 now we need R mod m
665 if ((err = mp_montgomery_calc_normalization(&res, P)) != MP_OKAY) {
666 goto LBL_RES;
670 now set M[1] to G * R mod m
672 if ((err = mp_mulmod(pool, G, &res, P, &M[1])) != MP_OKAY) {
673 goto LBL_RES;
677 compute the value at M[1<<(winsize-1)] by squaring
678 M[1] (winsize-1) times
680 if ((err = mp_copy(&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
681 goto LBL_RES;
684 for (x = 0; x < (winsize - 1); x++) {
685 if ((err = mp_sqr(pool, &M[1 << (winsize - 1)],
686 &M[1 << (winsize - 1)])) != MP_OKAY) {
687 goto LBL_RES;
689 if ((err = redux(&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) {
690 goto LBL_RES;
695 create upper table
697 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
698 if ((err = mp_mul(pool, &M[x - 1], &M[1], &M[x])) != MP_OKAY) {
699 goto LBL_RES;
701 if ((err = redux(&M[x], P, mp)) != MP_OKAY) {
702 goto LBL_RES;
707 set initial mode and bit cnt
709 mode = 0;
710 bitcnt = 1;
711 buf = 0;
712 digidx = X->used - 1;
713 bitcpy = 0;
714 bitbuf = 0;
716 for (;;) {
718 grab next digit as required
720 if (--bitcnt == 0) {
721 /* if digidx == -1 we are out of digits so break */
722 if (digidx == -1) {
723 break;
725 /* read next digit and reset bitcnt */
726 buf = X->dp[digidx--];
727 bitcnt = (int)DIGIT_BIT;
730 /* grab the next msb from the exponent */
731 y = (mp_digit)(buf >> (DIGIT_BIT - 1)) & 1;
732 buf <<= (mp_digit)1;
735 if the bit is zero and mode == 0 then we ignore it
736 These represent the leading zero bits before the first 1 bit
737 in the exponent. Technically this opt is not required but it
738 does lower the # of trivial squaring/reductions used
740 if (mode == 0 && y == 0) {
741 continue;
745 if the bit is zero and mode == 1 then we square
747 if (mode == 1 && y == 0) {
748 if ((err = mp_sqr (pool, &res, &res)) != MP_OKAY) {
749 goto LBL_RES;
751 if ((err = redux (&res, P, mp)) != MP_OKAY) {
752 goto LBL_RES;
754 continue;
758 else we add it to the window
760 bitbuf |= (y << (winsize - ++bitcpy));
761 mode = 2;
763 if (bitcpy == winsize) {
765 ok window is filled so square as required and multiply
766 square first
768 for (x = 0; x < winsize; x++) {
769 if ((err = mp_sqr(pool, &res, &res)) != MP_OKAY) {
770 goto LBL_RES;
772 if ((err = redux(&res, P, mp)) != MP_OKAY) {
773 goto LBL_RES;
777 /* then multiply */
778 if ((err = mp_mul(pool, &res, &M[bitbuf], &res)) != MP_OKAY) {
779 goto LBL_RES;
781 if ((err = redux(&res, P, mp)) != MP_OKAY) {
782 goto LBL_RES;
786 empty window and reset
788 bitcpy = 0;
789 bitbuf = 0;
790 mode = 1;
795 if bits remain then square/multiply
797 if (mode == 2 && bitcpy > 0) {
798 /* square then multiply if the bit is set */
799 for (x = 0; x < bitcpy; x++) {
800 if ((err = mp_sqr(pool, &res, &res)) != MP_OKAY) {
801 goto LBL_RES;
803 if ((err = redux(&res, P, mp)) != MP_OKAY) {
804 goto LBL_RES;
808 get next bit of the window
810 bitbuf <<= 1;
811 if ((bitbuf & (1 << winsize)) != 0) {
813 then multiply
815 if ((err = mp_mul(pool, &res, &M[1], &res)) != MP_OKAY) {
816 goto LBL_RES;
818 if ((err = redux(&res, P, mp)) != MP_OKAY) {
819 goto LBL_RES;
826 fixup result if Montgomery reduction is used
827 recall that any value in a Montgomery system is
828 actually multiplied by R mod n. So we have
829 to reduce one more time to cancel out the factor of R.
831 if ((err = redux(&res, P, mp)) != MP_OKAY) {
832 goto LBL_RES;
836 swap res with Y
838 mp_exch(&res, Y);
839 err = MP_OKAY;
840 LBL_RES:mp_clear(&res);
841 LBL_M:
842 mp_clear(&M[1]);
843 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
844 mp_clear(&M[x]);
846 return err;
849 /******************************************************************************/
851 Grow as required
853 int32 mp_grow (mp_int * a, int32 size)
855 int32 i;
856 mp_digit *tmp;
859 If the alloc size is smaller alloc more ram.
861 if (a->alloc < size) {
863 ensure there are always at least MP_PREC digits extra on top
865 size += (MP_PREC * 2) - (size % MP_PREC);
868 Reallocate the array a->dp
870 We store the return in a temporary variable in case the operation
871 failed we don't want to overwrite the dp member of a.
873 tmp = OPT_CAST(mp_digit) psRealloc(a->dp, sizeof (mp_digit) * size);
874 if (tmp == NULL) {
876 reallocation failed but "a" is still valid [can be freed]
878 return MP_MEM;
882 reallocation succeeded so set a->dp
884 a->dp = tmp;
887 zero excess digits
889 i = a->alloc;
890 a->alloc = size;
891 for (; i < a->alloc; i++) {
892 a->dp[i] = 0;
895 return MP_OKAY;
898 /******************************************************************************/
900 b = |a|
902 Simple function copies the input and fixes the sign to positive
904 int32 mp_abs (mp_int * a, mp_int * b)
906 int32 res;
909 copy a to b
911 if (a != b) {
912 if ((res = mp_copy (a, b)) != MP_OKAY) {
913 return res;
918 Force the sign of b to positive
920 b->sign = MP_ZPOS;
922 return MP_OKAY;
925 /******************************************************************************/
927 Creates "a" then copies b into it
929 int32 mp_init_copy(psPool_t *pool, mp_int * a, mp_int * b)
931 int32 res;
933 if ((res = mp_init(pool, a)) != MP_OKAY) {
934 return res;
936 return mp_copy (b, a);
939 /******************************************************************************/
941 Reverse an array, used for radix code
943 void bn_reverse (unsigned char *s, int32 len)
945 int32 ix, iy;
946 unsigned char t;
948 ix = 0;
949 iy = len - 1;
950 while (ix < iy) {
951 t = s[ix];
952 s[ix] = s[iy];
953 s[iy] = t;
954 ++ix;
955 --iy;
959 /******************************************************************************/
961 Shift right by a certain bit count (store quotient in c, optional
962 remainder in d)
964 int32 mp_div_2d(psPool_t *pool, mp_int * a, int32 b, mp_int * c, mp_int * d)
966 mp_digit D, r, rr;
967 int32 x, res;
968 mp_int t;
971 If the shift count is <= 0 then we do no work
973 if (b <= 0) {
974 res = mp_copy (a, c);
975 if (d != NULL) {
976 mp_zero (d);
978 return res;
981 if ((res = mp_init(pool, &t)) != MP_OKAY) {
982 return res;
986 Get the remainder
988 if (d != NULL) {
989 if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) {
990 mp_clear (&t);
991 return res;
995 /* copy */
996 if ((res = mp_copy (a, c)) != MP_OKAY) {
997 mp_clear (&t);
998 return res;
1002 Shift by as many digits in the bit count
1004 if (b >= (int32)DIGIT_BIT) {
1005 mp_rshd (c, b / DIGIT_BIT);
1008 /* shift any bit count < DIGIT_BIT */
1009 D = (mp_digit) (b % DIGIT_BIT);
1010 if (D != 0) {
1011 register mp_digit *tmpc, mask, shift;
1013 /* mask */
1014 mask = (((mp_digit)1) << D) - 1;
1016 /* shift for lsb */
1017 shift = DIGIT_BIT - D;
1019 /* alias */
1020 tmpc = c->dp + (c->used - 1);
1022 /* carry */
1023 r = 0;
1024 for (x = c->used - 1; x >= 0; x--) {
1026 Get the lower bits of this word in a temp.
1028 rr = *tmpc & mask;
1031 shift the current word and mix in the carry bits from the previous word
1033 *tmpc = (*tmpc >> D) | (r << shift);
1034 --tmpc;
1037 set the carry to the carry bits of the current word found above
1039 r = rr;
1042 mp_clamp (c);
1043 if (d != NULL) {
1044 mp_exch (&t, d);
1046 mp_clear (&t);
1047 return MP_OKAY;
1050 /******************************************************************************/
1052 copy, b = a
1054 int32 mp_copy (mp_int * a, mp_int * b)
1056 int32 res, n;
1059 If dst == src do nothing
1061 if (a == b) {
1062 return MP_OKAY;
1066 Grow dest
1068 if (b->alloc < a->used) {
1069 if ((res = mp_grow (b, a->used)) != MP_OKAY) {
1070 return res;
1075 Zero b and copy the parameters over
1078 register mp_digit *tmpa, *tmpb;
1080 /* pointer aliases */
1081 /* source */
1082 tmpa = a->dp;
1084 /* destination */
1085 tmpb = b->dp;
1087 /* copy all the digits */
1088 for (n = 0; n < a->used; n++) {
1089 *tmpb++ = *tmpa++;
1092 /* clear high digits */
1093 for (; n < b->used; n++) {
1094 *tmpb++ = 0;
1099 copy used count and sign
1101 b->used = a->used;
1102 b->sign = a->sign;
1103 return MP_OKAY;
1106 /******************************************************************************/
1108 Returns the number of bits in an int32
1110 int32 mp_count_bits (mp_int * a)
1112 int32 r;
1113 mp_digit q;
1116 Shortcut
1118 if (a->used == 0) {
1119 return 0;
1123 Get number of digits and add that.
1125 r = (a->used - 1) * DIGIT_BIT;
1128 Take the last digit and count the bits in it.
1130 q = a->dp[a->used - 1];
1131 while (q > ((mp_digit) 0)) {
1132 ++r;
1133 q >>= ((mp_digit) 1);
1135 return r;
1138 /******************************************************************************/
1140 Shift left a certain amount of digits.
1142 int32 mp_lshd (mp_int * a, int32 b)
1144 int32 x, res;
1147 If its less than zero return.
1149 if (b <= 0) {
1150 return MP_OKAY;
1154 Grow to fit the new digits.
1156 if (a->alloc < a->used + b) {
1157 if ((res = mp_grow (a, a->used + b)) != MP_OKAY) {
1158 return res;
1163 register mp_digit *top, *bottom;
1166 Increment the used by the shift amount then copy upwards.
1168 a->used += b;
1170 /* top */
1171 top = a->dp + a->used - 1;
1173 /* base */
1174 bottom = a->dp + a->used - 1 - b;
1177 Much like mp_rshd this is implemented using a sliding window
1178 except the window goes the otherway around. Copying from
1179 the bottom to the top. see bn_mp_rshd.c for more info.
1181 for (x = a->used - 1; x >= b; x--) {
1182 *top-- = *bottom--;
1185 /* zero the lower digits */
1186 top = a->dp;
1187 for (x = 0; x < b; x++) {
1188 *top++ = 0;
1191 return MP_OKAY;
1194 /******************************************************************************/
1196 Set to a digit.
1198 void mp_set (mp_int * a, mp_digit b)
1200 mp_zero (a);
1201 a->dp[0] = b & MP_MASK;
1202 a->used = (a->dp[0] != 0) ? 1 : 0;
1205 /******************************************************************************/
1207 Swap the elements of two integers, for cases where you can't simply swap
1208 the mp_int pointers around
1210 void mp_exch (mp_int * a, mp_int * b)
1212 mp_int t;
1214 t = *a;
1215 *a = *b;
1216 *b = t;
1219 /******************************************************************************/
1221 High level multiplication (handles sign)
1223 int32 mp_mul(psPool_t *pool, mp_int * a, mp_int * b, mp_int * c)
1225 int32 res, neg;
1226 int32 digs = a->used + b->used + 1;
1228 neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
1230 /* Can we use the fast multiplier?
1232 The fast multiplier can be used if the output will have less than
1233 MP_WARRAY digits and the number of digits won't affect carry propagation
1235 if ((digs < MP_WARRAY) && MIN(a->used, b->used) <=
1236 (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
1237 res = fast_s_mp_mul_digs(pool, a, b, c, digs);
1238 } else {
1239 res = s_mp_mul(pool, a, b, c);
1241 c->sign = (c->used > 0) ? neg : MP_ZPOS;
1242 return res;
1245 /******************************************************************************/
1247 c = a mod b, 0 <= c < b
1249 int32 mp_mod(psPool_t *pool, mp_int * a, mp_int * b, mp_int * c)
1251 mp_int t;
1252 int32 res;
1254 if ((res = mp_init(pool, &t)) != MP_OKAY) {
1255 return res;
1258 if ((res = mp_div (pool, a, b, NULL, &t)) != MP_OKAY) {
1259 mp_clear (&t);
1260 return res;
1263 if (t.sign != b->sign) {
1264 res = mp_add (b, &t, c);
1265 } else {
1266 res = MP_OKAY;
1267 mp_exch (&t, c);
1270 mp_clear (&t);
1271 return res;
1274 /******************************************************************************/
1276 shifts with subtractions when the result is greater than b.
1278 The method is slightly modified to shift B unconditionally upto just under
1279 the leading bit of b. This saves alot of multiple precision shifting.
1281 int32 mp_montgomery_calc_normalization (mp_int * a, mp_int * b)
1283 int32 x, bits, res;
1286 How many bits of last digit does b use
1288 bits = mp_count_bits (b) % DIGIT_BIT;
1290 if (b->used > 1) {
1291 if ((res = mp_2expt(a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) {
1292 return res;
1294 } else {
1295 mp_set(a, 1);
1296 bits = 1;
1300 Now compute C = A * B mod b
1302 for (x = bits - 1; x < (int32)DIGIT_BIT; x++) {
1303 if ((res = mp_mul_2(a, a)) != MP_OKAY) {
1304 return res;
1306 if (mp_cmp_mag(a, b) != MP_LT) {
1307 if ((res = s_mp_sub(a, b, a)) != MP_OKAY) {
1308 return res;
1313 return MP_OKAY;
1316 /******************************************************************************/
1318 d = a * b (mod c)
1320 int32 mp_mulmod(psPool_t *pool, mp_int * a, mp_int * b, mp_int * c, mp_int * d)
1322 int32 res;
1323 mp_int t;
1325 if ((res = mp_init(pool, &t)) != MP_OKAY) {
1326 return res;
1329 if ((res = mp_mul (pool, a, b, &t)) != MP_OKAY) {
1330 mp_clear (&t);
1331 return res;
1333 res = mp_mod (pool, &t, c, d);
1334 mp_clear (&t);
1335 return res;
1338 /******************************************************************************/
1340 Computes b = a*a
1342 #ifdef USE_SMALL_WORD
1343 int32 mp_sqr (psPool_t *pool, mp_int * a, mp_int * b)
1345 int32 res;
1348 Can we use the fast comba multiplier?
1350 if ((a->used * 2 + 1) < MP_WARRAY && a->used <
1351 (1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
1352 res = fast_s_mp_sqr (pool, a, b);
1353 } else {
1354 res = s_mp_sqr (pool, a, b);
1356 b->sign = MP_ZPOS;
1357 return res;
1359 #endif /* USE_SMALL_WORD */
1361 /******************************************************************************/
1363 Computes xR**-1 == x (mod N) via Montgomery Reduction.
1365 This is an optimized implementation of montgomery_reduce
1366 which uses the comba method to quickly calculate the columns of the
1367 reduction.
1369 Based on Algorithm 14.32 on pp.601 of HAC.
1372 int32 fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
1374 int32 ix, res, olduse;
1375 mp_word W[MP_WARRAY];
1378 Get old used count
1380 olduse = x->used;
1383 Grow a as required
1385 if (x->alloc < n->used + 1) {
1386 if ((res = mp_grow(x, n->used + 1)) != MP_OKAY) {
1387 return res;
1392 First we have to get the digits of the input into
1393 an array of double precision words W[...]
1396 register mp_word *_W;
1397 register mp_digit *tmpx;
1400 Alias for the W[] array
1402 _W = W;
1405 Alias for the digits of x
1407 tmpx = x->dp;
1410 Copy the digits of a into W[0..a->used-1]
1412 for (ix = 0; ix < x->used; ix++) {
1413 *_W++ = *tmpx++;
1417 Zero the high words of W[a->used..m->used*2]
1419 for (; ix < n->used * 2 + 1; ix++) {
1420 *_W++ = 0;
1425 Now we proceed to zero successive digits from the least
1426 significant upwards.
1428 for (ix = 0; ix < n->used; ix++) {
1430 mu = ai * m' mod b
1432 We avoid a double precision multiplication (which isn't required) by
1433 casting the value down to a mp_digit. Note this requires that
1434 W[ix-1] have the carry cleared (see after the inner loop)
1436 register mp_digit mu;
1437 mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK);
1440 a = a + mu * m * b**i
1442 This is computed in place and on the fly. The multiplication by b**i
1443 is handled by offseting which columns the results are added to.
1445 Note the comba method normally doesn't handle carries in the inner loop
1446 In this case we fix the carry from the previous column since the
1447 Montgomery reduction requires digits of the result (so far) [see above]
1448 to work. This is handled by fixing up one carry after the inner loop.
1449 The carry fixups are done in order so after these loops the first
1450 m->used words of W[] have the carries fixed
1453 register int32 iy;
1454 register mp_digit *tmpn;
1455 register mp_word *_W;
1458 Alias for the digits of the modulus
1460 tmpn = n->dp;
1463 Alias for the columns set by an offset of ix
1465 _W = W + ix;
1468 inner loop
1470 for (iy = 0; iy < n->used; iy++) {
1471 *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++);
1476 Now fix carry for next digit, W[ix+1]
1478 W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT);
1482 Now we have to propagate the carries and shift the words downward [all those
1483 least significant digits we zeroed].
1486 register mp_digit *tmpx;
1487 register mp_word *_W, *_W1;
1490 Now fix rest of carries
1494 alias for current word
1496 _W1 = W + ix;
1499 alias for next word, where the carry goes
1501 _W = W + ++ix;
1503 for (; ix <= n->used * 2 + 1; ix++) {
1504 *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT);
1508 copy out, A = A/b**n
1510 The result is A/b**n but instead of converting from an
1511 array of mp_word to mp_digit than calling mp_rshd
1512 we just copy them in the right order
1516 alias for destination word
1518 tmpx = x->dp;
1521 alias for shifted double precision result
1523 _W = W + n->used;
1525 for (ix = 0; ix < n->used + 1; ix++) {
1526 *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK));
1530 zero oldused digits, if the input a was larger than
1531 m->used+1 we'll have to clear the digits
1533 for (; ix < olduse; ix++) {
1534 *tmpx++ = 0;
1539 Set the max used and clamp
1541 x->used = n->used + 1;
1542 mp_clamp(x);
1545 if A >= m then A = A - m
1547 if (mp_cmp_mag(x, n) != MP_LT) {
1548 return s_mp_sub(x, n, x);
1550 return MP_OKAY;
1553 /******************************************************************************/
1555 High level addition (handles signs)
1557 int32 mp_add (mp_int * a, mp_int * b, mp_int * c)
1559 int32 sa, sb, res;
1562 Get sign of both inputs
1564 sa = a->sign;
1565 sb = b->sign;
1568 Handle two cases, not four.
1570 if (sa == sb) {
1572 Both positive or both negative. Add their magnitudes, copy the sign.
1574 c->sign = sa;
1575 res = s_mp_add (a, b, c);
1576 } else {
1578 One positive, the other negative. Subtract the one with the greater
1579 magnitude from the one of the lesser magnitude. The result gets the sign of
1580 the one with the greater magnitude.
1582 if (mp_cmp_mag (a, b) == MP_LT) {
1583 c->sign = sb;
1584 res = s_mp_sub (b, a, c);
1585 } else {
1586 c->sign = sa;
1587 res = s_mp_sub (a, b, c);
1590 return res;
1593 /******************************************************************************/
1595 Compare a digit.
1597 int32 mp_cmp_d (mp_int * a, mp_digit b)
1600 Compare based on sign
1602 if (a->sign == MP_NEG) {
1603 return MP_LT;
1607 Compare based on magnitude
1609 if (a->used > 1) {
1610 return MP_GT;
1614 Compare the only digit of a to b
1616 if (a->dp[0] > b) {
1617 return MP_GT;
1618 } else if (a->dp[0] < b) {
1619 return MP_LT;
1620 } else {
1621 return MP_EQ;
1625 /******************************************************************************/
1627 b = a/2
1629 int32 mp_div_2 (mp_int * a, mp_int * b)
1631 int32 x, res, oldused;
1634 Copy
1636 if (b->alloc < a->used) {
1637 if ((res = mp_grow (b, a->used)) != MP_OKAY) {
1638 return res;
1642 oldused = b->used;
1643 b->used = a->used;
1645 register mp_digit r, rr, *tmpa, *tmpb;
1648 Source alias
1650 tmpa = a->dp + b->used - 1;
1653 dest alias
1655 tmpb = b->dp + b->used - 1;
1658 carry
1660 r = 0;
1661 for (x = b->used - 1; x >= 0; x--) {
1663 Get the carry for the next iteration
1665 rr = *tmpa & 1;
1668 Shift the current digit, add in carry and store
1670 *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
1672 Forward carry to next iteration
1674 r = rr;
1678 Zero excess digits
1680 tmpb = b->dp + b->used;
1681 for (x = b->used; x < oldused; x++) {
1682 *tmpb++ = 0;
1685 b->sign = a->sign;
1686 mp_clamp (b);
1687 return MP_OKAY;
1690 /******************************************************************************/
1692 Computes xR**-1 == x (mod N) via Montgomery Reduction
1694 #ifdef USE_SMALL_WORD
1695 int32 mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
1697 int32 ix, res, digs;
1698 mp_digit mu;
1700 /* Can the fast reduction [comba] method be used?
1702 Note that unlike in mul you're safely allowed *less* than the available
1703 columns [255 per default] since carries are fixed up in the inner loop.
1705 digs = n->used * 2 + 1;
1706 if ((digs < MP_WARRAY) &&
1707 n->used <
1708 (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
1709 return fast_mp_montgomery_reduce (x, n, rho);
1713 Grow the input as required.
1715 if (x->alloc < digs) {
1716 if ((res = mp_grow (x, digs)) != MP_OKAY) {
1717 return res;
1720 x->used = digs;
1722 for (ix = 0; ix < n->used; ix++) {
1724 mu = ai * rho mod b
1726 The value of rho must be precalculated via mp_montgomery_setup()
1727 such that it equals -1/n0 mod b this allows the following inner
1728 loop to reduce the input one digit at a time
1730 mu = (mp_digit)(((mp_word)x->dp[ix]) * ((mp_word)rho) & MP_MASK);
1732 /* a = a + mu * m * b**i */
1734 register int32 iy;
1735 register mp_digit *tmpn, *tmpx, u;
1736 register mp_word r;
1739 alias for digits of the modulus
1741 tmpn = n->dp;
1744 alias for the digits of x [the input]
1746 tmpx = x->dp + ix;
1749 set the carry to zero
1751 u = 0;
1754 Multiply and add in place
1756 for (iy = 0; iy < n->used; iy++) {
1757 /* compute product and sum */
1758 r = ((mp_word)mu) * ((mp_word)*tmpn++) +
1759 ((mp_word) u) + ((mp_word) * tmpx);
1761 /* get carry */
1762 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
1764 /* fix digit */
1765 *tmpx++ = (mp_digit)(r & ((mp_word) MP_MASK));
1767 /* At this point the ix'th digit of x should be zero */
1771 propagate carries upwards as required
1773 while (u) {
1774 *tmpx += u;
1775 u = *tmpx >> DIGIT_BIT;
1776 *tmpx++ &= MP_MASK;
1782 At this point the n.used'th least significant digits of x are all zero
1783 which means we can shift x to the right by n.used digits and the
1784 residue is unchanged.
1786 /* x = x/b**n.used */
1787 mp_clamp(x);
1788 mp_rshd (x, n->used);
1790 /* if x >= n then x = x - n */
1791 if (mp_cmp_mag (x, n) != MP_LT) {
1792 return s_mp_sub (x, n, x);
1795 return MP_OKAY;
1797 #endif /* USE_SMALL_WORD */
1799 /******************************************************************************/
1801 Setups the montgomery reduction stuff.
1803 int32 mp_montgomery_setup (mp_int * n, mp_digit * rho)
1805 mp_digit x, b;
1808 fast inversion mod 2**k
1810 Based on the fact that
1812 XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n)
1813 => 2*X*A - X*X*A*A = 1
1814 => 2*(1) - (1) = 1
1816 b = n->dp[0];
1818 if ((b & 1) == 0) {
1819 return MP_VAL;
1822 x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
1823 x = (x * (2 - b * x)) & MP_MASK; /* here x*a==1 mod 2**8 */
1824 #if !defined(MP_8BIT)
1825 x = (x * (2 - b * x)) & MP_MASK; /* here x*a==1 mod 2**8 */
1826 #endif /* MP_8BIT */
1827 #if defined(MP_64BIT) || !(defined(MP_8BIT) || defined(MP_16BIT))
1828 x *= 2 - b * x; /* here x*a==1 mod 2**32 */
1829 #endif
1830 #ifdef MP_64BIT
1831 x *= 2 - b * x; /* here x*a==1 mod 2**64 */
1832 #endif /* MP_64BIT */
1834 /* rho = -1/m mod b */
1835 *rho = (((mp_word) 1 << ((mp_word) DIGIT_BIT)) - x) & MP_MASK;
1837 return MP_OKAY;
1840 /******************************************************************************/
1842 High level subtraction (handles signs)
1844 int32 mp_sub (mp_int * a, mp_int * b, mp_int * c)
1846 int32 sa, sb, res;
1848 sa = a->sign;
1849 sb = b->sign;
1851 if (sa != sb) {
1853 Subtract a negative from a positive, OR subtract a positive from a
1854 negative. In either case, ADD their magnitudes, and use the sign of
1855 the first number.
1857 c->sign = sa;
1858 res = s_mp_add (a, b, c);
1859 } else {
1861 Subtract a positive from a positive, OR subtract a negative
1862 from a negative. First, take the difference between their
1863 magnitudes, then...
1865 if (mp_cmp_mag (a, b) != MP_LT) {
1867 Copy the sign from the first
1869 c->sign = sa;
1870 /* The first has a larger or equal magnitude */
1871 res = s_mp_sub (a, b, c);
1872 } else {
1874 The result has the *opposite* sign from the first number.
1876 c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS;
1878 The second has a larger magnitude
1880 res = s_mp_sub (b, a, c);
1883 return res;
1886 /******************************************************************************/
1888 calc a value mod 2**b
1890 int32 mp_mod_2d (mp_int * a, int32 b, mp_int * c)
1892 int32 x, res;
1895 if b is <= 0 then zero the int32
1897 if (b <= 0) {
1898 mp_zero (c);
1899 return MP_OKAY;
1903 If the modulus is larger than the value than return
1905 if (b >=(int32) (a->used * DIGIT_BIT)) {
1906 res = mp_copy (a, c);
1907 return res;
1910 /* copy */
1911 if ((res = mp_copy (a, c)) != MP_OKAY) {
1912 return res;
1916 Zero digits above the last digit of the modulus
1918 for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
1919 c->dp[x] = 0;
1922 Clear the digit that is not completely outside/inside the modulus
1924 c->dp[b / DIGIT_BIT] &=
1925 (mp_digit) ((((mp_digit) 1) << (((mp_digit) b) % DIGIT_BIT)) - ((mp_digit) 1));
1926 mp_clamp (c);
1927 return MP_OKAY;
1930 /******************************************************************************/
1932 Shift right a certain amount of digits.
1934 void mp_rshd (mp_int * a, int32 b)
1936 int32 x;
1939 If b <= 0 then ignore it
1941 if (b <= 0) {
1942 return;
1946 If b > used then simply zero it and return.
1948 if (a->used <= b) {
1949 mp_zero (a);
1950 return;
1954 register mp_digit *bottom, *top;
1957 Shift the digits down
1959 /* bottom */
1960 bottom = a->dp;
1962 /* top [offset into digits] */
1963 top = a->dp + b;
1966 This is implemented as a sliding window where the window is b-digits long
1967 and digits from the top of the window are copied to the bottom.
1969 e.g.
1971 b-2 | b-1 | b0 | b1 | b2 | ... | bb | ---->
1972 /\ | ---->
1973 \-------------------/ ---->
1975 for (x = 0; x < (a->used - b); x++) {
1976 *bottom++ = *top++;
1980 Zero the top digits
1982 for (; x < a->used; x++) {
1983 *bottom++ = 0;
1988 Remove excess digits
1990 a->used -= b;
1993 /******************************************************************************/
1995 Low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9
1997 int32 s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
1999 int32 olduse, res, min, max;
2002 Find sizes
2004 min = b->used;
2005 max = a->used;
2008 init result
2010 if (c->alloc < max) {
2011 if ((res = mp_grow (c, max)) != MP_OKAY) {
2012 return res;
2015 olduse = c->used;
2016 c->used = max;
2019 register mp_digit u, *tmpa, *tmpb, *tmpc;
2020 register int32 i;
2023 alias for digit pointers
2025 tmpa = a->dp;
2026 tmpb = b->dp;
2027 tmpc = c->dp;
2030 set carry to zero
2032 u = 0;
2033 for (i = 0; i < min; i++) {
2034 /* T[i] = A[i] - B[i] - U */
2035 *tmpc = *tmpa++ - *tmpb++ - u;
2038 U = carry bit of T[i]
2039 Note this saves performing an AND operation since if a carry does occur it
2040 will propagate all the way to the MSB. As a result a single shift
2041 is enough to get the carry
2043 u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
2045 /* Clear carry from T[i] */
2046 *tmpc++ &= MP_MASK;
2050 Now copy higher words if any, e.g. if A has more digits than B
2052 for (; i < max; i++) {
2053 /* T[i] = A[i] - U */
2054 *tmpc = *tmpa++ - u;
2056 /* U = carry bit of T[i] */
2057 u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
2059 /* Clear carry from T[i] */
2060 *tmpc++ &= MP_MASK;
2064 Clear digits above used (since we may not have grown result above)
2066 for (i = c->used; i < olduse; i++) {
2067 *tmpc++ = 0;
2071 mp_clamp (c);
2072 return MP_OKAY;
2074 /******************************************************************************/
2076 integer signed division.
2078 c*b + d == a [e.g. a/b, c=quotient, d=remainder]
2079 HAC pp.598 Algorithm 14.20
2081 Note that the description in HAC is horribly incomplete. For example,
2082 it doesn't consider the case where digits are removed from 'x' in the inner
2083 loop. It also doesn't consider the case that y has fewer than three
2084 digits, etc..
2086 The overall algorithm is as described as 14.20 from HAC but fixed to
2087 treat these cases.
2089 #ifdef MP_DIV_SMALL
2090 int32 mp_div(psPool_t *pool, mp_int * a, mp_int * b, mp_int * c, mp_int * d)
2092 mp_int ta, tb, tq, q;
2093 int32 res, n, n2;
2096 is divisor zero ?
2098 if (mp_iszero (b) == 1) {
2099 return MP_VAL;
2103 if a < b then q=0, r = a
2105 if (mp_cmp_mag (a, b) == MP_LT) {
2106 if (d != NULL) {
2107 res = mp_copy (a, d);
2108 } else {
2109 res = MP_OKAY;
2111 if (c != NULL) {
2112 mp_zero (c);
2114 return res;
2118 init our temps
2120 if ((res = _mp_init_multi(pool, &ta, &tb, &tq, &q, NULL, NULL, NULL, NULL) != MP_OKAY)) {
2121 return res;
2125 tq = 2^n, tb == b*2^n
2127 mp_set(&tq, 1);
2128 n = mp_count_bits(a) - mp_count_bits(b);
2129 if (((res = mp_abs(a, &ta)) != MP_OKAY) ||
2130 ((res = mp_abs(b, &tb)) != MP_OKAY) ||
2131 ((res = mp_mul_2d(&tb, n, &tb)) != MP_OKAY) ||
2132 ((res = mp_mul_2d(&tq, n, &tq)) != MP_OKAY)) {
2133 goto __ERR;
2135 /* old
2136 if (((res = mp_copy(a, &ta)) != MP_OKAY) ||
2137 ((res = mp_copy(b, &tb)) != MP_OKAY) ||
2138 ((res = mp_mul_2d(&tb, n, &tb)) != MP_OKAY) ||
2139 ((res = mp_mul_2d(&tq, n, &tq)) != MP_OKAY)) {
2140 goto LBL_ERR;
2143 while (n-- >= 0) {
2144 if (mp_cmp(&tb, &ta) != MP_GT) {
2145 if (((res = mp_sub(&ta, &tb, &ta)) != MP_OKAY) ||
2146 ((res = mp_add(&q, &tq, &q)) != MP_OKAY)) {
2147 goto LBL_ERR;
2150 if (((res = mp_div_2d(pool, &tb, 1, &tb, NULL)) != MP_OKAY) ||
2151 ((res = mp_div_2d(pool, &tq, 1, &tq, NULL)) != MP_OKAY)) {
2152 goto LBL_ERR;
2157 now q == quotient and ta == remainder
2159 n = a->sign;
2160 n2 = (a->sign == b->sign ? MP_ZPOS : MP_NEG);
2161 if (c != NULL) {
2162 mp_exch(c, &q);
2163 c->sign = (mp_iszero(c) == MP_YES) ? MP_ZPOS : n2;
2165 if (d != NULL) {
2166 mp_exch(d, &ta);
2167 d->sign = (mp_iszero(d) == MP_YES) ? MP_ZPOS : n;
2169 LBL_ERR:
2170 _mp_clear_multi(&ta, &tb, &tq, &q, NULL, NULL, NULL, NULL);
2171 return res;
2173 #else /* MP_DIV_SMALL */
2175 int32 mp_div(psPool_t *pool, mp_int * a, mp_int * b, mp_int * c, mp_int * d)
2177 mp_int q, x, y, t1, t2;
2178 int32 res, n, t, i, norm, neg;
2181 is divisor zero ?
2183 if (mp_iszero(b) == 1) {
2184 return MP_VAL;
2188 if a < b then q=0, r = a
2190 if (mp_cmp_mag(a, b) == MP_LT) {
2191 if (d != NULL) {
2192 res = mp_copy(a, d);
2193 } else {
2194 res = MP_OKAY;
2196 if (c != NULL) {
2197 mp_zero(c);
2199 return res;
2202 if ((res = mp_init_size(pool, &q, a->used + 2)) != MP_OKAY) {
2203 return res;
2205 q.used = a->used + 2;
2207 if ((res = mp_init(pool, &t1)) != MP_OKAY) {
2208 goto LBL_Q;
2211 if ((res = mp_init(pool, &t2)) != MP_OKAY) {
2212 goto LBL_T1;
2215 if ((res = mp_init_copy(pool, &x, a)) != MP_OKAY) {
2216 goto LBL_T2;
2219 if ((res = mp_init_copy(pool, &y, b)) != MP_OKAY) {
2220 goto LBL_X;
2224 fix the sign
2226 neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
2227 x.sign = y.sign = MP_ZPOS;
2230 normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT]
2232 norm = mp_count_bits(&y) % DIGIT_BIT;
2233 if (norm < (int32)(DIGIT_BIT-1)) {
2234 norm = (DIGIT_BIT-1) - norm;
2235 if ((res = mp_mul_2d(&x, norm, &x)) != MP_OKAY) {
2236 goto LBL_Y;
2238 if ((res = mp_mul_2d(&y, norm, &y)) != MP_OKAY) {
2239 goto LBL_Y;
2241 } else {
2242 norm = 0;
2246 note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4
2248 n = x.used - 1;
2249 t = y.used - 1;
2252 while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} }
2254 if ((res = mp_lshd(&y, n - t)) != MP_OKAY) { /* y = y*b**{n-t} */
2255 goto LBL_Y;
2258 while (mp_cmp(&x, &y) != MP_LT) {
2259 ++(q.dp[n - t]);
2260 if ((res = mp_sub(&x, &y, &x)) != MP_OKAY) {
2261 goto LBL_Y;
2266 reset y by shifting it back down
2268 mp_rshd(&y, n - t);
2271 step 3. for i from n down to (t + 1)
2273 for (i = n; i >= (t + 1); i--) {
2274 if (i > x.used) {
2275 continue;
2279 step 3.1 if xi == yt then set q{i-t-1} to b-1,
2280 otherwise set q{i-t-1} to (xi*b + x{i-1})/yt
2282 if (x.dp[i] == y.dp[t]) {
2283 q.dp[i - t - 1] = ((((mp_digit)1) << DIGIT_BIT) - 1);
2284 } else {
2285 mp_word tmp;
2286 tmp = ((mp_word) x.dp[i]) << ((mp_word) DIGIT_BIT);
2287 tmp |= ((mp_word) x.dp[i - 1]);
2288 tmp /= ((mp_word) y.dp[t]);
2289 if (tmp > (mp_word) MP_MASK) {
2290 tmp = MP_MASK;
2292 q.dp[i - t - 1] = (mp_digit) (tmp & (mp_word) (MP_MASK));
2296 while (q{i-t-1} * (yt * b + y{t-1})) >
2297 xi * b**2 + xi-1 * b + xi-2
2299 do q{i-t-1} -= 1;
2301 q.dp[i - t - 1] = (q.dp[i - t - 1] + 1) & MP_MASK;
2302 do {
2303 q.dp[i - t - 1] = (q.dp[i - t - 1] - 1) & MP_MASK;
2306 find left hand
2308 mp_zero (&t1);
2309 t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1];
2310 t1.dp[1] = y.dp[t];
2311 t1.used = 2;
2312 if ((res = mp_mul_d (&t1, q.dp[i - t - 1], &t1)) != MP_OKAY) {
2313 goto LBL_Y;
2317 find right hand
2319 t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2];
2320 t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1];
2321 t2.dp[2] = x.dp[i];
2322 t2.used = 3;
2323 } while (mp_cmp_mag(&t1, &t2) == MP_GT);
2326 step 3.3 x = x - q{i-t-1} * y * b**{i-t-1}
2328 if ((res = mp_mul_d(&y, q.dp[i - t - 1], &t1)) != MP_OKAY) {
2329 goto LBL_Y;
2332 if ((res = mp_lshd(&t1, i - t - 1)) != MP_OKAY) {
2333 goto LBL_Y;
2336 if ((res = mp_sub(&x, &t1, &x)) != MP_OKAY) {
2337 goto LBL_Y;
2341 if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; }
2343 if (x.sign == MP_NEG) {
2344 if ((res = mp_copy(&y, &t1)) != MP_OKAY) {
2345 goto LBL_Y;
2347 if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
2348 goto LBL_Y;
2350 if ((res = mp_add (&x, &t1, &x)) != MP_OKAY) {
2351 goto LBL_Y;
2354 q.dp[i - t - 1] = (q.dp[i - t - 1] - 1UL) & MP_MASK;
2359 now q is the quotient and x is the remainder
2360 [which we have to normalize]
2364 get sign before writing to c
2366 x.sign = x.used == 0 ? MP_ZPOS : a->sign;
2368 if (c != NULL) {
2369 mp_clamp(&q);
2370 mp_exch(&q, c);
2371 c->sign = neg;
2374 if (d != NULL) {
2375 mp_div_2d(pool, &x, norm, &x, NULL);
2376 mp_exch(&x, d);
2379 res = MP_OKAY;
2381 LBL_Y:mp_clear (&y);
2382 LBL_X:mp_clear (&x);
2383 LBL_T2:mp_clear (&t2);
2384 LBL_T1:mp_clear (&t1);
2385 LBL_Q:mp_clear (&q);
2386 return res;
2388 #endif /* MP_DIV_SMALL */
2390 /******************************************************************************/
2392 multiplies |a| * |b| and only computes upto digs digits of result
2393 HAC pp. 595, Algorithm 14.12 Modified so you can control how many digits
2394 of output are created.
2396 #ifdef USE_SMALL_WORD
2397 int32 s_mp_mul_digs(psPool_t *pool, mp_int * a, mp_int * b, mp_int * c, int32 digs)
2399 mp_int t;
2400 int32 res, pa, pb, ix, iy;
2401 mp_digit u;
2402 mp_word r;
2403 mp_digit tmpx, *tmpt, *tmpy;
2406 Can we use the fast multiplier?
2408 if (((digs) < MP_WARRAY) &&
2409 MIN (a->used, b->used) <
2410 (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
2411 return fast_s_mp_mul_digs (pool, a, b, c, digs);
2414 if ((res = mp_init_size(pool, &t, digs)) != MP_OKAY) {
2415 return res;
2417 t.used = digs;
2420 Compute the digits of the product directly
2422 pa = a->used;
2423 for (ix = 0; ix < pa; ix++) {
2424 /* set the carry to zero */
2425 u = 0;
2428 Limit ourselves to making digs digits of output.
2430 pb = MIN (b->used, digs - ix);
2433 Setup some aliases. Copy of the digit from a used
2434 within the nested loop
2436 tmpx = a->dp[ix];
2439 An alias for the destination shifted ix places
2441 tmpt = t.dp + ix;
2444 An alias for the digits of b
2446 tmpy = b->dp;
2449 Compute the columns of the output and propagate the carry
2451 for (iy = 0; iy < pb; iy++) {
2452 /* compute the column as a mp_word */
2453 r = ((mp_word)*tmpt) +
2454 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
2455 ((mp_word) u);
2457 /* the new column is the lower part of the result */
2458 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
2460 /* get the carry word from the result */
2461 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
2464 Set carry if it is placed below digs
2466 if (ix + iy < digs) {
2467 *tmpt = u;
2471 mp_clamp (&t);
2472 mp_exch (&t, c);
2474 mp_clear (&t);
2475 return MP_OKAY;
2477 #endif /* USE_SMALL_WORD */
2479 /******************************************************************************/
2481 Fast (comba) multiplier
2483 This is the fast column-array [comba] multiplier. It is designed to
2484 compute the columns of the product first then handle the carries afterwards.
2485 This has the effect of making the nested loops that compute the columns
2486 very simple and schedulable on super-scalar processors.
2488 This has been modified to produce a variable number of digits of output so
2489 if say only a half-product is required you don't have to compute the upper
2490 half (a feature required for fast Barrett reduction).
2492 Based on Algorithm 14.12 on pp.595 of HAC.
2495 int32 fast_s_mp_mul_digs(psPool_t *pool, mp_int * a, mp_int * b, mp_int * c,
2496 int32 digs)
2498 int32 olduse, res, pa, ix, iz, neg;
2499 mp_digit W[MP_WARRAY];
2500 register mp_word _W;
2502 neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
2505 grow the destination as required
2507 if (c->alloc < digs) {
2508 if ((res = mp_grow(c, digs)) != MP_OKAY) {
2509 return res;
2514 number of output digits to produce
2516 pa = MIN(digs, a->used + b->used);
2519 clear the carry
2521 _W = 0;
2522 for (ix = 0; ix < pa; ix++) {
2523 int32 tx, ty;
2524 int32 iy;
2525 mp_digit *tmpx, *tmpy;
2528 get offsets into the two bignums
2530 ty = MIN(b->used-1, ix);
2531 tx = ix - ty;
2534 setup temp aliases
2536 tmpx = a->dp + tx;
2537 tmpy = b->dp + ty;
2540 this is the number of times the loop will iterrate, essentially its
2541 while (tx++ < a->used && ty-- >= 0) { ... }
2543 iy = MIN(a->used-tx, ty+1);
2546 execute loop
2548 for (iz = 0; iz < iy; ++iz) {
2549 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
2553 store term
2555 W[ix] = (mp_digit)(_W & MP_MASK);
2558 make next carry
2560 _W = _W >> ((mp_word)DIGIT_BIT);
2564 store final carry
2566 W[ix] = (mp_digit)(_W & MP_MASK);
2569 setup dest
2571 olduse = c->used;
2572 c->used = pa;
2575 register mp_digit *tmpc;
2576 tmpc = c->dp;
2577 for (ix = 0; ix < pa+1; ix++) {
2579 now extract the previous digit [below the carry]
2581 *tmpc++ = W[ix];
2585 clear unused digits [that existed in the old copy of c]
2587 for (; ix < olduse; ix++) {
2588 *tmpc++ = 0;
2591 mp_clamp (c);
2592 c->sign = (c->used > 0) ? neg : MP_ZPOS;
2593 return MP_OKAY;
2596 /******************************************************************************/
2598 b = a*2
2600 int32 mp_mul_2 (mp_int * a, mp_int * b)
2602 int32 x, res, oldused;
2605 grow to accomodate result
2607 if (b->alloc < a->used + 1) {
2608 if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) {
2609 return res;
2613 oldused = b->used;
2614 b->used = a->used;
2617 register mp_digit r, rr, *tmpa, *tmpb;
2619 /* alias for source */
2620 tmpa = a->dp;
2622 /* alias for dest */
2623 tmpb = b->dp;
2625 /* carry */
2626 r = 0;
2627 for (x = 0; x < a->used; x++) {
2630 get what will be the *next* carry bit from the MSB of the
2631 current digit
2633 rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1));
2636 now shift up this digit, add in the carry [from the previous]
2638 *tmpb++ = ((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK;
2640 /* copy the carry that would be from the source digit into the next
2641 iteration
2643 r = rr;
2647 new leading digit?
2649 if (r != 0) {
2651 add a MSB which is always 1 at this point
2653 *tmpb = 1;
2654 ++(b->used);
2658 now zero any excess digits on the destination that we didn't write to
2660 tmpb = b->dp + b->used;
2661 for (x = b->used; x < oldused; x++) {
2662 *tmpb++ = 0;
2665 b->sign = a->sign;
2666 return MP_OKAY;
2669 /******************************************************************************/
2671 multiply by a digit
2673 int32 mp_mul_d(mp_int * a, mp_digit b, mp_int * c)
2675 mp_digit u, *tmpa, *tmpc;
2676 mp_word r;
2677 int32 ix, res, olduse;
2680 make sure c is big enough to hold a*b
2682 if (c->alloc < a->used + 1) {
2683 if ((res = mp_grow (c, a->used + 1)) != MP_OKAY) {
2684 return res;
2689 get the original destinations used count
2691 olduse = c->used;
2694 set the sign
2696 c->sign = a->sign;
2699 alias for a->dp [source]
2701 tmpa = a->dp;
2704 alias for c->dp [dest]
2706 tmpc = c->dp;
2708 /* zero carry */
2709 u = 0;
2711 /* compute columns */
2712 for (ix = 0; ix < a->used; ix++) {
2714 compute product and carry sum for this term
2716 r = ((mp_word) u) + ((mp_word)*tmpa++) * ((mp_word)b);
2719 mask off higher bits to get a single digit
2721 *tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK));
2724 send carry into next iteration
2726 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
2730 store final carry [if any] and increment ix offset
2732 *tmpc++ = u;
2733 ++ix;
2736 now zero digits above the top
2738 while (ix++ < olduse) {
2739 *tmpc++ = 0;
2742 /* set used count */
2743 c->used = a->used + 1;
2744 mp_clamp(c);
2746 return MP_OKAY;
2749 /******************************************************************************/
2751 low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16
2753 #ifdef USE_SMALL_WORD
2754 int32 s_mp_sqr (psPool_t *pool, mp_int * a, mp_int * b)
2756 mp_int t;
2757 int32 res, ix, iy, pa;
2758 mp_word r;
2759 mp_digit u, tmpx, *tmpt;
2761 pa = a->used;
2762 if ((res = mp_init_size(pool, &t, 2*pa + 1)) != MP_OKAY) {
2763 return res;
2767 default used is maximum possible size
2769 t.used = 2*pa + 1;
2771 for (ix = 0; ix < pa; ix++) {
2773 first calculate the digit at 2*ix
2774 calculate double precision result
2776 r = ((mp_word) t.dp[2*ix]) +
2777 ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]);
2780 store lower part in result
2782 t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK));
2785 get the carry
2787 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
2790 left hand side of A[ix] * A[iy]
2792 tmpx = a->dp[ix];
2795 alias for where to store the results
2797 tmpt = t.dp + (2*ix + 1);
2799 for (iy = ix + 1; iy < pa; iy++) {
2801 first calculate the product
2803 r = ((mp_word)tmpx) * ((mp_word)a->dp[iy]);
2806 now calculate the double precision result, note we use addition
2807 instead of *2 since it's easier to optimize
2809 r = ((mp_word) *tmpt) + r + r + ((mp_word) u);
2812 store lower part
2814 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
2816 /* get carry */
2817 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
2819 /* propagate upwards */
2820 while (u != ((mp_digit) 0)) {
2821 r = ((mp_word) *tmpt) + ((mp_word) u);
2822 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
2823 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
2827 mp_clamp (&t);
2828 mp_exch (&t, b);
2829 mp_clear (&t);
2830 return MP_OKAY;
2832 #endif /* USE_SMALL_WORD */
2834 /******************************************************************************/
2836 fast squaring
2838 This is the comba method where the columns of the product are computed
2839 first then the carries are computed. This has the effect of making a very
2840 simple inner loop that is executed the most
2842 W2 represents the outer products and W the inner.
2844 A further optimizations is made because the inner products are of the
2845 form "A * B * 2". The *2 part does not need to be computed until the end
2846 which is good because 64-bit shifts are slow!
2848 Based on Algorithm 14.16 on pp.597 of HAC.
2850 This is the 1.0 version, but no SSE stuff
2852 int32 fast_s_mp_sqr(psPool_t *pool, mp_int * a, mp_int * b)
2854 int32 olduse, res, pa, ix, iz;
2855 mp_digit W[MP_WARRAY], *tmpx;
2856 mp_word W1;
2859 grow the destination as required
2861 pa = a->used + a->used;
2862 if (b->alloc < pa) {
2863 if ((res = mp_grow(b, pa)) != MP_OKAY) {
2864 return res;
2869 number of output digits to produce
2871 W1 = 0;
2872 for (ix = 0; ix < pa; ix++) {
2873 int32 tx, ty, iy;
2874 mp_word _W;
2875 mp_digit *tmpy;
2878 clear counter
2880 _W = 0;
2883 get offsets into the two bignums
2885 ty = MIN(a->used-1, ix);
2886 tx = ix - ty;
2889 setup temp aliases
2891 tmpx = a->dp + tx;
2892 tmpy = a->dp + ty;
2895 this is the number of times the loop will iterrate, essentially
2896 while (tx++ < a->used && ty-- >= 0) { ... }
2898 iy = MIN(a->used-tx, ty+1);
2901 now for squaring tx can never equal ty
2902 we halve the distance since they approach at a rate of 2x
2903 and we have to round because odd cases need to be executed
2905 iy = MIN(iy, (ty-tx+1)>>1);
2908 execute loop
2910 for (iz = 0; iz < iy; iz++) {
2911 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
2915 double the inner product and add carry
2917 _W = _W + _W + W1;
2920 even columns have the square term in them
2922 if ((ix&1) == 0) {
2923 _W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]);
2927 store it
2929 W[ix] = (mp_digit)(_W & MP_MASK);
2932 make next carry
2934 W1 = _W >> ((mp_word)DIGIT_BIT);
2938 setup dest
2940 olduse = b->used;
2941 b->used = a->used+a->used;
2944 mp_digit *tmpb;
2945 tmpb = b->dp;
2946 for (ix = 0; ix < pa; ix++) {
2947 *tmpb++ = W[ix] & MP_MASK;
2951 clear unused digits [that existed in the old copy of c]
2953 for (; ix < olduse; ix++) {
2954 *tmpb++ = 0;
2957 mp_clamp(b);
2958 return MP_OKAY;
2961 /******************************************************************************/
2963 computes a = 2**b
2965 Simple algorithm which zeroes the int32, grows it then just sets one bit
2966 as required.
2968 int32 mp_2expt (mp_int * a, int32 b)
2970 int32 res;
2973 zero a as per default
2975 mp_zero (a);
2978 grow a to accomodate the single bit
2980 if ((res = mp_grow (a, b / DIGIT_BIT + 1)) != MP_OKAY) {
2981 return res;
2985 set the used count of where the bit will go
2987 a->used = b / DIGIT_BIT + 1;
2990 put the single bit in its place
2992 a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT);
2994 return MP_OKAY;
2997 /******************************************************************************/
2999 init an mp_init for a given size
3001 int32 mp_init_size(psPool_t *pool, mp_int * a, int32 size)
3003 int x;
3005 pad size so there are always extra digits
3007 size += (MP_PREC * 2) - (size % MP_PREC);
3010 alloc mem
3012 a->dp = OPT_CAST(mp_digit) psMalloc(pool, sizeof (mp_digit) * size);
3013 if (a->dp == NULL) {
3014 return MP_MEM;
3016 a->used = 0;
3017 a->alloc = size;
3018 a->sign = MP_ZPOS;
3021 zero the digits
3023 for (x = 0; x < size; x++) {
3024 a->dp[x] = 0;
3026 return MP_OKAY;
3029 /******************************************************************************/
3031 low level addition, based on HAC pp.594, Algorithm 14.7
3033 int32 s_mp_add (mp_int * a, mp_int * b, mp_int * c)
3035 mp_int *x;
3036 int32 olduse, res, min, max;
3039 find sizes, we let |a| <= |b| which means we have to sort them. "x" will
3040 point to the input with the most digits
3042 if (a->used > b->used) {
3043 min = b->used;
3044 max = a->used;
3045 x = a;
3046 } else {
3047 min = a->used;
3048 max = b->used;
3049 x = b;
3052 /* init result */
3053 if (c->alloc < max + 1) {
3054 if ((res = mp_grow (c, max + 1)) != MP_OKAY) {
3055 return res;
3060 get old used digit count and set new one
3062 olduse = c->used;
3063 c->used = max + 1;
3066 register mp_digit u, *tmpa, *tmpb, *tmpc;
3067 register int32 i;
3069 /* alias for digit pointers */
3071 /* first input */
3072 tmpa = a->dp;
3074 /* second input */
3075 tmpb = b->dp;
3077 /* destination */
3078 tmpc = c->dp;
3080 /* zero the carry */
3081 u = 0;
3082 for (i = 0; i < min; i++) {
3084 Compute the sum at one digit, T[i] = A[i] + B[i] + U
3086 *tmpc = *tmpa++ + *tmpb++ + u;
3089 U = carry bit of T[i]
3091 u = *tmpc >> ((mp_digit)DIGIT_BIT);
3094 take away carry bit from T[i]
3096 *tmpc++ &= MP_MASK;
3100 now copy higher words if any, that is in A+B if A or B has more digits add
3101 those in
3103 if (min != max) {
3104 for (; i < max; i++) {
3105 /* T[i] = X[i] + U */
3106 *tmpc = x->dp[i] + u;
3108 /* U = carry bit of T[i] */
3109 u = *tmpc >> ((mp_digit)DIGIT_BIT);
3111 /* take away carry bit from T[i] */
3112 *tmpc++ &= MP_MASK;
3116 /* add carry */
3117 *tmpc++ = u;
3120 clear digits above oldused
3122 for (i = c->used; i < olduse; i++) {
3123 *tmpc++ = 0;
3127 mp_clamp (c);
3128 return MP_OKAY;
3131 /******************************************************************************/
3133 FUTURE - this is only needed by the SSH code, SLOW or not, because RSA
3134 exponents are always odd.
3136 int32 mp_invmodSSH(psPool_t *pool, mp_int * a, mp_int * b, mp_int * c)
3138 mp_int x, y, u, v, A, B, C, D;
3139 int32 res;
3142 b cannot be negative
3144 if (b->sign == MP_NEG || mp_iszero(b) == 1) {
3145 return MP_VAL;
3149 if the modulus is odd we can use a faster routine instead
3151 if (mp_isodd (b) == 1) {
3152 return fast_mp_invmod(pool, a, b, c);
3156 init temps
3158 if ((res = _mp_init_multi(pool, &x, &y, &u, &v,
3159 &A, &B, &C, &D)) != MP_OKAY) {
3160 return res;
3163 /* x = a, y = b */
3164 if ((res = mp_copy(a, &x)) != MP_OKAY) {
3165 goto LBL_ERR;
3167 if ((res = mp_copy(b, &y)) != MP_OKAY) {
3168 goto LBL_ERR;
3172 2. [modified] if x,y are both even then return an error!
3174 if (mp_iseven(&x) == 1 && mp_iseven (&y) == 1) {
3175 res = MP_VAL;
3176 goto LBL_ERR;
3180 3. u=x, v=y, A=1, B=0, C=0,D=1
3182 if ((res = mp_copy(&x, &u)) != MP_OKAY) {
3183 goto LBL_ERR;
3185 if ((res = mp_copy(&y, &v)) != MP_OKAY) {
3186 goto LBL_ERR;
3188 mp_set (&A, 1);
3189 mp_set (&D, 1);
3191 top:
3193 4. while u is even do
3195 while (mp_iseven(&u) == 1) {
3196 /* 4.1 u = u/2 */
3197 if ((res = mp_div_2(&u, &u)) != MP_OKAY) {
3198 goto LBL_ERR;
3200 /* 4.2 if A or B is odd then */
3201 if (mp_isodd (&A) == 1 || mp_isodd (&B) == 1) {
3202 /* A = (A+y)/2, B = (B-x)/2 */
3203 if ((res = mp_add(&A, &y, &A)) != MP_OKAY) {
3204 goto LBL_ERR;
3206 if ((res = mp_sub(&B, &x, &B)) != MP_OKAY) {
3207 goto LBL_ERR;
3210 /* A = A/2, B = B/2 */
3211 if ((res = mp_div_2(&A, &A)) != MP_OKAY) {
3212 goto LBL_ERR;
3214 if ((res = mp_div_2(&B, &B)) != MP_OKAY) {
3215 goto LBL_ERR;
3220 5. while v is even do
3222 while (mp_iseven(&v) == 1) {
3223 /* 5.1 v = v/2 */
3224 if ((res = mp_div_2(&v, &v)) != MP_OKAY) {
3225 goto LBL_ERR;
3227 /* 5.2 if C or D is odd then */
3228 if (mp_isodd(&C) == 1 || mp_isodd (&D) == 1) {
3229 /* C = (C+y)/2, D = (D-x)/2 */
3230 if ((res = mp_add(&C, &y, &C)) != MP_OKAY) {
3231 goto LBL_ERR;
3233 if ((res = mp_sub(&D, &x, &D)) != MP_OKAY) {
3234 goto LBL_ERR;
3237 /* C = C/2, D = D/2 */
3238 if ((res = mp_div_2(&C, &C)) != MP_OKAY) {
3239 goto LBL_ERR;
3241 if ((res = mp_div_2(&D, &D)) != MP_OKAY) {
3242 goto LBL_ERR;
3247 6. if u >= v then
3249 if (mp_cmp(&u, &v) != MP_LT) {
3250 /* u = u - v, A = A - C, B = B - D */
3251 if ((res = mp_sub(&u, &v, &u)) != MP_OKAY) {
3252 goto LBL_ERR;
3255 if ((res = mp_sub(&A, &C, &A)) != MP_OKAY) {
3256 goto LBL_ERR;
3259 if ((res = mp_sub(&B, &D, &B)) != MP_OKAY) {
3260 goto LBL_ERR;
3262 } else {
3263 /* v - v - u, C = C - A, D = D - B */
3264 if ((res = mp_sub(&v, &u, &v)) != MP_OKAY) {
3265 goto LBL_ERR;
3268 if ((res = mp_sub(&C, &A, &C)) != MP_OKAY) {
3269 goto LBL_ERR;
3272 if ((res = mp_sub(&D, &B, &D)) != MP_OKAY) {
3273 goto LBL_ERR;
3278 if not zero goto step 4
3280 if (mp_iszero(&u) == 0)
3281 goto top;
3284 now a = C, b = D, gcd == g*v
3288 if v != 1 then there is no inverse
3290 if (mp_cmp_d(&v, 1) != MP_EQ) {
3291 res = MP_VAL;
3292 goto LBL_ERR;
3296 if its too low
3298 while (mp_cmp_d(&C, 0) == MP_LT) {
3299 if ((res = mp_add(&C, b, &C)) != MP_OKAY) {
3300 goto LBL_ERR;
3305 too big
3307 while (mp_cmp_mag(&C, b) != MP_LT) {
3308 if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {
3309 goto LBL_ERR;
3314 C is now the inverse
3316 mp_exch(&C, c);
3317 res = MP_OKAY;
3318 LBL_ERR:_mp_clear_multi(&x, &y, &u, &v, &A, &B, &C, &D);
3319 return res;
3322 /******************************************************************************/
3325 * Computes the modular inverse via binary extended euclidean algorithm,
3326 * that is c = 1/a mod b
3328 * Based on slow invmod except this is optimized for the case where b is
3329 * odd as per HAC Note 14.64 on pp. 610
3331 int32 fast_mp_invmod(psPool_t *pool, mp_int * a, mp_int * b, mp_int * c)
3333 mp_int x, y, u, v, B, D;
3334 int32 res, neg;
3337 2. [modified] b must be odd
3339 if (mp_iseven (b) == 1) {
3340 return MP_VAL;
3344 init all our temps
3346 if ((res = _mp_init_multi(pool, &x, &y, &u, &v, &B, &D, NULL, NULL)) != MP_OKAY) {
3347 return res;
3351 x == modulus, y == value to invert
3353 if ((res = mp_copy(b, &x)) != MP_OKAY) {
3354 goto LBL_ERR;
3358 we need y = |a|
3360 if ((res = mp_mod(pool, a, b, &y)) != MP_OKAY) {
3361 goto LBL_ERR;
3365 3. u=x, v=y, A=1, B=0, C=0,D=1
3367 if ((res = mp_copy(&x, &u)) != MP_OKAY) {
3368 goto LBL_ERR;
3370 if ((res = mp_copy(&y, &v)) != MP_OKAY) {
3371 goto LBL_ERR;
3373 mp_set(&D, 1);
3375 top:
3377 4. while u is even do
3379 while (mp_iseven(&u) == 1) {
3380 /* 4.1 u = u/2 */
3381 if ((res = mp_div_2(&u, &u)) != MP_OKAY) {
3382 goto LBL_ERR;
3384 /* 4.2 if B is odd then */
3385 if (mp_isodd(&B) == 1) {
3386 if ((res = mp_sub(&B, &x, &B)) != MP_OKAY) {
3387 goto LBL_ERR;
3390 /* B = B/2 */
3391 if ((res = mp_div_2(&B, &B)) != MP_OKAY) {
3392 goto LBL_ERR;
3397 5. while v is even do
3399 while (mp_iseven(&v) == 1) {
3400 /* 5.1 v = v/2 */
3401 if ((res = mp_div_2(&v, &v)) != MP_OKAY) {
3402 goto LBL_ERR;
3404 /* 5.2 if D is odd then */
3405 if (mp_isodd(&D) == 1) {
3406 /* D = (D-x)/2 */
3407 if ((res = mp_sub(&D, &x, &D)) != MP_OKAY) {
3408 goto LBL_ERR;
3411 /* D = D/2 */
3412 if ((res = mp_div_2(&D, &D)) != MP_OKAY) {
3413 goto LBL_ERR;
3418 6. if u >= v then
3420 if (mp_cmp(&u, &v) != MP_LT) {
3421 /* u = u - v, B = B - D */
3422 if ((res = mp_sub(&u, &v, &u)) != MP_OKAY) {
3423 goto LBL_ERR;
3426 if ((res = mp_sub(&B, &D, &B)) != MP_OKAY) {
3427 goto LBL_ERR;
3429 } else {
3430 /* v - v - u, D = D - B */
3431 if ((res = mp_sub(&v, &u, &v)) != MP_OKAY) {
3432 goto LBL_ERR;
3435 if ((res = mp_sub(&D, &B, &D)) != MP_OKAY) {
3436 goto LBL_ERR;
3441 if not zero goto step 4
3443 if (mp_iszero(&u) == 0) {
3444 goto top;
3448 now a = C, b = D, gcd == g*v
3452 if v != 1 then there is no inverse
3454 if (mp_cmp_d(&v, 1) != MP_EQ) {
3455 res = MP_VAL;
3456 goto LBL_ERR;
3460 b is now the inverse
3462 neg = a->sign;
3463 while (D.sign == MP_NEG) {
3464 if ((res = mp_add(&D, b, &D)) != MP_OKAY) {
3465 goto LBL_ERR;
3468 mp_exch(&D, c);
3469 c->sign = neg;
3470 res = MP_OKAY;
3472 LBL_ERR:_mp_clear_multi(&x, &y, &u, &v, &B, &D, NULL, NULL);
3473 return res;
3476 /******************************************************************************/
3478 d = a + b (mod c)
3480 int32 mp_addmod (psPool_t *pool, mp_int * a, mp_int * b, mp_int * c, mp_int * d)
3482 int32 res;
3483 mp_int t;
3485 if ((res = mp_init(pool, &t)) != MP_OKAY) {
3486 return res;
3489 if ((res = mp_add (a, b, &t)) != MP_OKAY) {
3490 mp_clear (&t);
3491 return res;
3493 res = mp_mod (pool, &t, c, d);
3494 mp_clear (&t);
3495 return res;
3498 /******************************************************************************/
3500 shrink a bignum
3502 int32 mp_shrink (mp_int * a)
3504 mp_digit *tmp;
3506 if (a->alloc != a->used && a->used > 0) {
3507 if ((tmp = psRealloc(a->dp, sizeof (mp_digit) * a->used)) == NULL) {
3508 return MP_MEM;
3510 a->dp = tmp;
3511 a->alloc = a->used;
3513 return MP_OKAY;
3516 /* single digit subtraction */
3517 int32 mp_sub_d (mp_int * a, mp_digit b, mp_int * c)
3519 mp_digit *tmpa, *tmpc, mu;
3520 int32 res, ix, oldused;
3522 /* grow c as required */
3523 if (c->alloc < a->used + 1) {
3524 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
3525 return res;
3529 /* if a is negative just do an unsigned
3530 * addition [with fudged signs]
3532 if (a->sign == MP_NEG) {
3533 a->sign = MP_ZPOS;
3534 res = mp_add_d(a, b, c);
3535 a->sign = c->sign = MP_NEG;
3536 return res;
3539 /* setup regs */
3540 oldused = c->used;
3541 tmpa = a->dp;
3542 tmpc = c->dp;
3544 /* if a <= b simply fix the single digit */
3545 if ((a->used == 1 && a->dp[0] <= b) || a->used == 0) {
3546 if (a->used == 1) {
3547 *tmpc++ = b - *tmpa;
3548 } else {
3549 *tmpc++ = b;
3551 ix = 1;
3553 /* negative/1digit */
3554 c->sign = MP_NEG;
3555 c->used = 1;
3556 } else {
3557 /* positive/size */
3558 c->sign = MP_ZPOS;
3559 c->used = a->used;
3561 /* subtract first digit */
3562 *tmpc = *tmpa++ - b;
3563 mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
3564 *tmpc++ &= MP_MASK;
3566 /* handle rest of the digits */
3567 for (ix = 1; ix < a->used; ix++) {
3568 *tmpc = *tmpa++ - mu;
3569 mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
3570 *tmpc++ &= MP_MASK;
3574 /* zero excess digits */
3575 while (ix++ < oldused) {
3576 *tmpc++ = 0;
3578 mp_clamp(c);
3579 return MP_OKAY;
3582 /* single digit addition */
3583 int32 mp_add_d (mp_int * a, mp_digit b, mp_int * c)
3585 int32 res, ix, oldused;
3586 mp_digit *tmpa, *tmpc, mu;
3588 /* grow c as required */
3589 if (c->alloc < a->used + 1) {
3590 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
3591 return res;
3595 /* if a is negative and |a| >= b, call c = |a| - b */
3596 if (a->sign == MP_NEG && (a->used > 1 || a->dp[0] >= b)) {
3597 /* temporarily fix sign of a */
3598 a->sign = MP_ZPOS;
3600 /* c = |a| - b */
3601 res = mp_sub_d(a, b, c);
3603 /* fix sign */
3604 a->sign = c->sign = MP_NEG;
3605 return res;
3608 /* old number of used digits in c */
3609 oldused = c->used;
3611 /* sign always positive */
3612 c->sign = MP_ZPOS;
3614 /* source alias */
3615 tmpa = a->dp;
3617 /* destination alias */
3618 tmpc = c->dp;
3620 /* if a is positive */
3621 if (a->sign == MP_ZPOS) {
3622 /* add digit, after this we're propagating the carry */
3623 *tmpc = *tmpa++ + b;
3624 mu = *tmpc >> DIGIT_BIT;
3625 *tmpc++ &= MP_MASK;
3627 /* now handle rest of the digits */
3628 for (ix = 1; ix < a->used; ix++) {
3629 *tmpc = *tmpa++ + mu;
3630 mu = *tmpc >> DIGIT_BIT;
3631 *tmpc++ &= MP_MASK;
3633 /* set final carry */
3634 ix++;
3635 *tmpc++ = mu;
3637 /* setup size */
3638 c->used = a->used + 1;
3639 } else {
3640 /* a was negative and |a| < b */
3641 c->used = 1;
3643 /* the result is a single digit */
3644 if (a->used == 1) {
3645 *tmpc++ = b - a->dp[0];
3646 } else {
3647 *tmpc++ = b;
3650 /* setup count so the clearing of oldused
3651 * can fall through correctly
3653 ix = 1;
3656 /* now zero to oldused */
3657 while (ix++ < oldused) {
3658 *tmpc++ = 0;
3660 mp_clamp(c);
3661 return MP_OKAY;
3665 /******************************************************************************/
3667 #endif /* USE_MPI2 */