src/TortoisePlink/SSHBN.C

   1 /*\r
   2  * Bignum routines for RSA and DH and stuff.\r
   3  */\r
   4 \r
   5 #include <stdio.h>\r
   6 #include <assert.h>\r
   7 #include <stdlib.h>\r
   8 #include <string.h>\r
   9 #include <limits.h>\r
  10 #include <ctype.h>\r
  11 \r
  12 #include "misc.h"\r
  13 \r
  14 #include "sshbn.h"\r
  15 \r
  16 #define BIGNUM_INTERNAL\r
  17 typedef BignumInt *Bignum;\r
  18 \r
  19 #include "ssh.h"\r
  20 \r
  21 BignumInt bnZero[1] = { 0 };\r
  22 BignumInt bnOne[2] = { 1, 1 };\r
  23 BignumInt bnTen[2] = { 1, 10 };\r
  24 \r
  25 /*\r
  26  * The Bignum format is an array of `BignumInt'. The first\r
  27  * element of the array counts the remaining elements. The\r
  28  * remaining elements express the actual number, base 2^BIGNUM_INT_BITS, _least_\r
  29  * significant digit first. (So it's trivial to extract the bit\r
  30  * with value 2^n for any n.)\r
  31  *\r
  32  * All Bignums in this module are positive. Negative numbers must\r
  33  * be dealt with outside it.\r
  34  *\r
  35  * INVARIANT: the most significant word of any Bignum must be\r
  36  * nonzero.\r
  37  */\r
  38 \r
  39 Bignum Zero = bnZero, One = bnOne, Ten = bnTen;\r
  40 \r
  41 static Bignum newbn(int length)\r
  42 {\r
  43     Bignum b;\r
  44 \r
  45     assert(length >= 0 && length < INT_MAX / BIGNUM_INT_BITS);\r
  46 \r
  47     b = snewn(length + 1, BignumInt);\r
  48     memset(b, 0, (length + 1) * sizeof(*b));\r
  49     b[0] = length;\r
  50     return b;\r
  51 }\r
  52 \r
  53 void bn_restore_invariant(Bignum b)\r
  54 {\r
  55     while (b[0] > 1 && b[b[0]] == 0)\r
  56         b[0]--;\r
  57 }\r
  58 \r
  59 Bignum copybn(Bignum orig)\r
  60 {\r
  61     Bignum b = snewn(orig[0] + 1, BignumInt);\r
  62     if (!b)\r
  63         abort();                       /* FIXME */\r
  64     memcpy(b, orig, (orig[0] + 1) * sizeof(*b));\r
  65     return b;\r
  66 }\r
  67 \r
  68 void freebn(Bignum b)\r
  69 {\r
  70     /*\r
  71      * Burn the evidence, just in case.\r
  72      */\r
  73     smemclr(b, sizeof(b[0]) * (b[0] + 1));\r
  74     sfree(b);\r
  75 }\r
  76 \r
  77 Bignum bn_power_2(int n)\r
  78 {\r
  79     Bignum ret;\r
  80 \r
  81     assert(n >= 0);\r
  82 \r
  83     ret = newbn(n / BIGNUM_INT_BITS + 1);\r
  84     bignum_set_bit(ret, n, 1);\r
  85     return ret;\r
  86 }\r
  87 \r
  88 /*\r
  89  * Internal addition. Sets c = a - b, where 'a', 'b' and 'c' are all\r
  90  * big-endian arrays of 'len' BignumInts. Returns the carry off the\r
  91  * top.\r
  92  */\r
  93 static BignumCarry internal_add(const BignumInt *a, const BignumInt *b,\r
  94                                 BignumInt *c, int len)\r
  95 {\r
  96     int i;\r
  97     BignumCarry carry = 0;\r
  98 \r
  99     for (i = len-1; i >= 0; i--)\r
 100         BignumADC(c[i], carry, a[i], b[i], carry);\r
 101 \r
 102     return (BignumInt)carry;\r
 103 }\r
 104 \r
 105 /*\r
 106  * Internal subtraction. Sets c = a - b, where 'a', 'b' and 'c' are\r
 107  * all big-endian arrays of 'len' BignumInts. Any borrow from the top\r
 108  * is ignored.\r
 109  */\r
 110 static void internal_sub(const BignumInt *a, const BignumInt *b,\r
 111                          BignumInt *c, int len)\r
 112 {\r
 113     int i;\r
 114     BignumCarry carry = 1;\r
 115 \r
 116     for (i = len-1; i >= 0; i--)\r
 117         BignumADC(c[i], carry, a[i], ~b[i], carry);\r
 118 }\r
 119 \r
 120 /*\r
 121  * Compute c = a * b.\r
 122  * Input is in the first len words of a and b.\r
 123  * Result is returned in the first 2*len words of c.\r
 124  *\r
 125  * 'scratch' must point to an array of BignumInt of size at least\r
 126  * mul_compute_scratch(len). (This covers the needs of internal_mul\r
 127  * and all its recursive calls to itself.)\r
 128  */\r
 129 #define KARATSUBA_THRESHOLD 50\r
 130 static int mul_compute_scratch(int len)\r
 131 {\r
 132     int ret = 0;\r
 133     while (len > KARATSUBA_THRESHOLD) {\r
 134         int toplen = len/2, botlen = len - toplen; /* botlen is the bigger */\r
 135         int midlen = botlen + 1;\r
 136         ret += 4*midlen;\r
 137         len = midlen;\r
 138     }\r
 139     return ret;\r
 140 }\r
 141 static void internal_mul(const BignumInt *a, const BignumInt *b,\r
 142                          BignumInt *c, int len, BignumInt *scratch)\r
 143 {\r
 144     if (len > KARATSUBA_THRESHOLD) {\r
 145         int i;\r
 146 \r
 147         /*\r
 148          * Karatsuba divide-and-conquer algorithm. Cut each input in\r
 149          * half, so that it's expressed as two big 'digits' in a giant\r
 150          * base D:\r
 151          *\r
 152          *   a = a_1 D + a_0\r
 153          *   b = b_1 D + b_0\r
 154          *\r
 155          * Then the product is of course\r
 156          *\r
 157          *  ab = a_1 b_1 D^2 + (a_1 b_0 + a_0 b_1) D + a_0 b_0\r
 158          *\r
 159          * and we compute the three coefficients by recursively\r
 160          * calling ourself to do half-length multiplications.\r
 161          *\r
 162          * The clever bit that makes this worth doing is that we only\r
 163          * need _one_ half-length multiplication for the central\r
 164          * coefficient rather than the two that it obviouly looks\r
 165          * like, because we can use a single multiplication to compute\r
 166          *\r
 167          *   (a_1 + a_0) (b_1 + b_0) = a_1 b_1 + a_1 b_0 + a_0 b_1 + a_0 b_0\r
 168          *\r
 169          * and then we subtract the other two coefficients (a_1 b_1\r
 170          * and a_0 b_0) which we were computing anyway.\r
 171          *\r
 172          * Hence we get to multiply two numbers of length N in about\r
 173          * three times as much work as it takes to multiply numbers of\r
 174          * length N/2, which is obviously better than the four times\r
 175          * as much work it would take if we just did a long\r
 176          * conventional multiply.\r
 177          */\r
 178 \r
 179         int toplen = len/2, botlen = len - toplen; /* botlen is the bigger */\r
 180         int midlen = botlen + 1;\r
 181         BignumCarry carry;\r
 182 #ifdef KARA_DEBUG\r
 183         int i;\r
 184 #endif\r
 185 \r
 186         /*\r
 187          * The coefficients a_1 b_1 and a_0 b_0 just avoid overlapping\r
 188          * in the output array, so we can compute them immediately in\r
 189          * place.\r
 190          */\r
 191 \r
 192 #ifdef KARA_DEBUG\r
 193         printf("a1,a0 = 0x");\r
 194         for (i = 0; i < len; i++) {\r
 195             if (i == toplen) printf(", 0x");\r
 196             printf("%0*x", BIGNUM_INT_BITS/4, a[i]);\r
 197         }\r
 198         printf("\n");\r
 199         printf("b1,b0 = 0x");\r
 200         for (i = 0; i < len; i++) {\r
 201             if (i == toplen) printf(", 0x");\r
 202             printf("%0*x", BIGNUM_INT_BITS/4, b[i]);\r
 203         }\r
 204         printf("\n");\r
 205 #endif\r
 206 \r
 207         /* a_1 b_1 */\r
 208         internal_mul(a, b, c, toplen, scratch);\r
 209 #ifdef KARA_DEBUG\r
 210         printf("a1b1 = 0x");\r
 211         for (i = 0; i < 2*toplen; i++) {\r
 212             printf("%0*x", BIGNUM_INT_BITS/4, c[i]);\r
 213         }\r
 214         printf("\n");\r
 215 #endif\r
 216 \r
 217         /* a_0 b_0 */\r
 218         internal_mul(a + toplen, b + toplen, c + 2*toplen, botlen, scratch);\r
 219 #ifdef KARA_DEBUG\r
 220         printf("a0b0 = 0x");\r
 221         for (i = 0; i < 2*botlen; i++) {\r
 222             printf("%0*x", BIGNUM_INT_BITS/4, c[2*toplen+i]);\r
 223         }\r
 224         printf("\n");\r
 225 #endif\r
 226 \r
 227         /* Zero padding. midlen exceeds toplen by at most 2, so just\r
 228          * zero the first two words of each input and the rest will be\r
 229          * copied over. */\r
 230         scratch[0] = scratch[1] = scratch[midlen] = scratch[midlen+1] = 0;\r
 231 \r
 232         for (i = 0; i < toplen; i++) {\r
 233             scratch[midlen - toplen + i] = a[i]; /* a_1 */\r
 234             scratch[2*midlen - toplen + i] = b[i]; /* b_1 */\r
 235         }\r
 236 \r
 237         /* compute a_1 + a_0 */\r
 238         scratch[0] = internal_add(scratch+1, a+toplen, scratch+1, botlen);\r
 239 #ifdef KARA_DEBUG\r
 240         printf("a1plusa0 = 0x");\r
 241         for (i = 0; i < midlen; i++) {\r
 242             printf("%0*x", BIGNUM_INT_BITS/4, scratch[i]);\r
 243         }\r
 244         printf("\n");\r
 245 #endif\r
 246         /* compute b_1 + b_0 */\r
 247         scratch[midlen] = internal_add(scratch+midlen+1, b+toplen,\r
 248                                        scratch+midlen+1, botlen);\r
 249 #ifdef KARA_DEBUG\r
 250         printf("b1plusb0 = 0x");\r
 251         for (i = 0; i < midlen; i++) {\r
 252             printf("%0*x", BIGNUM_INT_BITS/4, scratch[midlen+i]);\r
 253         }\r
 254         printf("\n");\r
 255 #endif\r
 256 \r
 257         /*\r
 258          * Now we can do the third multiplication.\r
 259          */\r
 260         internal_mul(scratch, scratch + midlen, scratch + 2*midlen, midlen,\r
 261                      scratch + 4*midlen);\r
 262 #ifdef KARA_DEBUG\r
 263         printf("a1plusa0timesb1plusb0 = 0x");\r
 264         for (i = 0; i < 2*midlen; i++) {\r
 265             printf("%0*x", BIGNUM_INT_BITS/4, scratch[2*midlen+i]);\r
 266         }\r
 267         printf("\n");\r
 268 #endif\r
 269 \r
 270         /*\r
 271          * Now we can reuse the first half of 'scratch' to compute the\r
 272          * sum of the outer two coefficients, to subtract from that\r
 273          * product to obtain the middle one.\r
 274          */\r
 275         scratch[0] = scratch[1] = scratch[2] = scratch[3] = 0;\r
 276         for (i = 0; i < 2*toplen; i++)\r
 277             scratch[2*midlen - 2*toplen + i] = c[i];\r
 278         scratch[1] = internal_add(scratch+2, c + 2*toplen,\r
 279                                   scratch+2, 2*botlen);\r
 280 #ifdef KARA_DEBUG\r
 281         printf("a1b1plusa0b0 = 0x");\r
 282         for (i = 0; i < 2*midlen; i++) {\r
 283             printf("%0*x", BIGNUM_INT_BITS/4, scratch[i]);\r
 284         }\r
 285         printf("\n");\r
 286 #endif\r
 287 \r
 288         internal_sub(scratch + 2*midlen, scratch,\r
 289                      scratch + 2*midlen, 2*midlen);\r
 290 #ifdef KARA_DEBUG\r
 291         printf("a1b0plusa0b1 = 0x");\r
 292         for (i = 0; i < 2*midlen; i++) {\r
 293             printf("%0*x", BIGNUM_INT_BITS/4, scratch[2*midlen+i]);\r
 294         }\r
 295         printf("\n");\r
 296 #endif\r
 297 \r
 298         /*\r
 299          * And now all we need to do is to add that middle coefficient\r
 300          * back into the output. We may have to propagate a carry\r
 301          * further up the output, but we can be sure it won't\r
 302          * propagate right the way off the top.\r
 303          */\r
 304         carry = internal_add(c + 2*len - botlen - 2*midlen,\r
 305                              scratch + 2*midlen,\r
 306                              c + 2*len - botlen - 2*midlen, 2*midlen);\r
 307         i = 2*len - botlen - 2*midlen - 1;\r
 308         while (carry) {\r
 309             assert(i >= 0);\r
 310             BignumADC(c[i], carry, c[i], 0, carry);\r
 311             i--;\r
 312         }\r
 313 #ifdef KARA_DEBUG\r
 314         printf("ab = 0x");\r
 315         for (i = 0; i < 2*len; i++) {\r
 316             printf("%0*x", BIGNUM_INT_BITS/4, c[i]);\r
 317         }\r
 318         printf("\n");\r
 319 #endif\r
 320 \r
 321     } else {\r
 322         int i;\r
 323         BignumInt carry;\r
 324         const BignumInt *ap, *bp;\r
 325         BignumInt *cp, *cps;\r
 326 \r
 327         /*\r
 328          * Multiply in the ordinary O(N^2) way.\r
 329          */\r
 330 \r
 331         for (i = 0; i < 2 * len; i++)\r
 332             c[i] = 0;\r
 333 \r
 334         for (cps = c + 2*len, ap = a + len; ap-- > a; cps--) {\r
 335             carry = 0;\r
 336             for (cp = cps, bp = b + len; cp--, bp-- > b ;)\r
 337                 BignumMULADD2(carry, *cp, *ap, *bp, *cp, carry);\r
 338             *cp = carry;\r
 339         }\r
 340     }\r
 341 }\r
 342 \r
 343 /*\r
 344  * Variant form of internal_mul used for the initial step of\r
 345  * Montgomery reduction. Only bothers outputting 'len' words\r
 346  * (everything above that is thrown away).\r
 347  */\r
 348 static void internal_mul_low(const BignumInt *a, const BignumInt *b,\r
 349                              BignumInt *c, int len, BignumInt *scratch)\r
 350 {\r
 351     if (len > KARATSUBA_THRESHOLD) {\r
 352         int i;\r
 353 \r
 354         /*\r
 355          * Karatsuba-aware version of internal_mul_low. As before, we\r
 356          * express each input value as a shifted combination of two\r
 357          * halves:\r
 358          *\r
 359          *   a = a_1 D + a_0\r
 360          *   b = b_1 D + b_0\r
 361          *\r
 362          * Then the full product is, as before,\r
 363          *\r
 364          *  ab = a_1 b_1 D^2 + (a_1 b_0 + a_0 b_1) D + a_0 b_0\r
 365          *\r
 366          * Provided we choose D on the large side (so that a_0 and b_0\r
 367          * are _at least_ as long as a_1 and b_1), we don't need the\r
 368          * topmost term at all, and we only need half of the middle\r
 369          * term. So there's no point in doing the proper Karatsuba\r
 370          * optimisation which computes the middle term using the top\r
 371          * one, because we'd take as long computing the top one as\r
 372          * just computing the middle one directly.\r
 373          *\r
 374          * So instead, we do a much more obvious thing: we call the\r
 375          * fully optimised internal_mul to compute a_0 b_0, and we\r
 376          * recursively call ourself to compute the _bottom halves_ of\r
 377          * a_1 b_0 and a_0 b_1, each of which we add into the result\r
 378          * in the obvious way.\r
 379          *\r
 380          * In other words, there's no actual Karatsuba _optimisation_\r
 381          * in this function; the only benefit in doing it this way is\r
 382          * that we call internal_mul proper for a large part of the\r
 383          * work, and _that_ can optimise its operation.\r
 384          */\r
 385 \r
 386         int toplen = len/2, botlen = len - toplen; /* botlen is the bigger */\r
 387 \r
 388         /*\r
 389          * Scratch space for the various bits and pieces we're going\r
 390          * to be adding together: we need botlen*2 words for a_0 b_0\r
 391          * (though we may end up throwing away its topmost word), and\r
 392          * toplen words for each of a_1 b_0 and a_0 b_1. That adds up\r
 393          * to exactly 2*len.\r
 394          */\r
 395 \r
 396         /* a_0 b_0 */\r
 397         internal_mul(a + toplen, b + toplen, scratch + 2*toplen, botlen,\r
 398                      scratch + 2*len);\r
 399 \r
 400         /* a_1 b_0 */\r
 401         internal_mul_low(a, b + len - toplen, scratch + toplen, toplen,\r
 402                          scratch + 2*len);\r
 403 \r
 404         /* a_0 b_1 */\r
 405         internal_mul_low(a + len - toplen, b, scratch, toplen,\r
 406                          scratch + 2*len);\r
 407 \r
 408         /* Copy the bottom half of the big coefficient into place */\r
 409         for (i = 0; i < botlen; i++)\r
 410             c[toplen + i] = scratch[2*toplen + botlen + i];\r
 411 \r
 412         /* Add the two small coefficients, throwing away the returned carry */\r
 413         internal_add(scratch, scratch + toplen, scratch, toplen);\r
 414 \r
 415         /* And add that to the large coefficient, leaving the result in c. */\r
 416         internal_add(scratch, scratch + 2*toplen + botlen - toplen,\r
 417                      c, toplen);\r
 418 \r
 419     } else {\r
 420         int i;\r
 421         BignumInt carry;\r
 422         const BignumInt *ap, *bp;\r
 423         BignumInt *cp, *cps;\r
 424 \r
 425         /*\r
 426          * Multiply in the ordinary O(N^2) way.\r
 427          */\r
 428 \r
 429         for (i = 0; i < len; i++)\r
 430             c[i] = 0;\r
 431 \r
 432         for (cps = c + len, ap = a + len; ap-- > a; cps--) {\r
 433             carry = 0;\r
 434             for (cp = cps, bp = b + len; bp--, cp-- > c ;)\r
 435                 BignumMULADD2(carry, *cp, *ap, *bp, *cp, carry);\r
 436         }\r
 437     }\r
 438 }\r
 439 \r
 440 /*\r
 441  * Montgomery reduction. Expects x to be a big-endian array of 2*len\r
 442  * BignumInts whose value satisfies 0 <= x < rn (where r = 2^(len *\r
 443  * BIGNUM_INT_BITS) is the Montgomery base). Returns in the same array\r
 444  * a value x' which is congruent to xr^{-1} mod n, and satisfies 0 <=\r
 445  * x' < n.\r
 446  *\r
 447  * 'n' and 'mninv' should be big-endian arrays of 'len' BignumInts\r
 448  * each, containing respectively n and the multiplicative inverse of\r
 449  * -n mod r.\r
 450  *\r
 451  * 'tmp' is an array of BignumInt used as scratch space, of length at\r
 452  * least 3*len + mul_compute_scratch(len).\r
 453  */\r
 454 static void monty_reduce(BignumInt *x, const BignumInt *n,\r
 455                          const BignumInt *mninv, BignumInt *tmp, int len)\r
 456 {\r
 457     int i;\r
 458     BignumInt carry;\r
 459 \r
 460     /*\r
 461      * Multiply x by (-n)^{-1} mod r. This gives us a value m such\r
 462      * that mn is congruent to -x mod r. Hence, mn+x is an exact\r
 463      * multiple of r, and is also (obviously) congruent to x mod n.\r
 464      */\r
 465     internal_mul_low(x + len, mninv, tmp, len, tmp + 3*len);\r
 466 \r
 467     /*\r
 468      * Compute t = (mn+x)/r in ordinary, non-modular, integer\r
 469      * arithmetic. By construction this is exact, and is congruent mod\r
 470      * n to x * r^{-1}, i.e. the answer we want.\r
 471      *\r
 472      * The following multiply leaves that answer in the _most_\r
 473      * significant half of the 'x' array, so then we must shift it\r
 474      * down.\r
 475      */\r
 476     internal_mul(tmp, n, tmp+len, len, tmp + 3*len);\r
 477     carry = internal_add(x, tmp+len, x, 2*len);\r
 478     for (i = 0; i < len; i++)\r
 479         x[len + i] = x[i], x[i] = 0;\r
 480 \r
 481     /*\r
 482      * Reduce t mod n. This doesn't require a full-on division by n,\r
 483      * but merely a test and single optional subtraction, since we can\r
 484      * show that 0 <= t < 2n.\r
 485      *\r
 486      * Proof:\r
 487      *  + we computed m mod r, so 0 <= m < r.\r
 488      *  + so 0 <= mn < rn, obviously\r
 489      *  + hence we only need 0 <= x < rn to guarantee that 0 <= mn+x < 2rn\r
 490      *  + yielding 0 <= (mn+x)/r < 2n as required.\r
 491      */\r
 492     if (!carry) {\r
 493         for (i = 0; i < len; i++)\r
 494             if (x[len + i] != n[i])\r
 495                 break;\r
 496     }\r
 497     if (carry || i >= len || x[len + i] > n[i])\r
 498         internal_sub(x+len, n, x+len, len);\r
 499 }\r
 500 \r
 501 static void internal_add_shifted(BignumInt *number,\r
 502                                  BignumInt n, int shift)\r
 503 {\r
 504     int word = 1 + (shift / BIGNUM_INT_BITS);\r
 505     int bshift = shift % BIGNUM_INT_BITS;\r
 506     BignumInt addendh, addendl;\r
 507     BignumCarry carry;\r
 508 \r
 509     addendl = n << bshift;\r
 510     addendh = (bshift == 0 ? 0 : n >> (BIGNUM_INT_BITS - bshift));\r
 511 \r
 512     assert(word <= number[0]);\r
 513     BignumADC(number[word], carry, number[word], addendl, 0);\r
 514     word++;\r
 515     if (!addendh && !carry)\r
 516         return;\r
 517     assert(word <= number[0]);\r
 518     BignumADC(number[word], carry, number[word], addendh, carry);\r
 519     word++;\r
 520     while (carry) {\r
 521         assert(word <= number[0]);\r
 522         BignumADC(number[word], carry, number[word], 0, carry);\r
 523         word++;\r
 524     }\r
 525 }\r
 526 \r
 527 static int bn_clz(BignumInt x)\r
 528 {\r
 529     /*\r
 530      * Count the leading zero bits in x. Equivalently, how far left\r
 531      * would we need to shift x to make its top bit set?\r
 532      *\r
 533      * Precondition: x != 0.\r
 534      */\r
 535 \r
 536     /* FIXME: would be nice to put in some compiler intrinsics under\r
 537      * ifdef here */\r
 538     int i, ret = 0;\r
 539     for (i = BIGNUM_INT_BITS / 2; i != 0; i >>= 1) {\r
 540         if ((x >> (BIGNUM_INT_BITS-i)) == 0) {\r
 541             x <<= i;\r
 542             ret += i;\r
 543         }\r
 544     }\r
 545     return ret;\r
 546 }\r
 547 \r
 548 static BignumInt reciprocal_word(BignumInt d)\r
 549 {\r
 550     BignumInt dshort, recip, prodh, prodl;\r
 551     int corrections;\r
 552 \r
 553     /*\r
 554      * Input: a BignumInt value d, with its top bit set.\r
 555      */\r
 556     assert(d >> (BIGNUM_INT_BITS-1) == 1);\r
 557 \r
 558     /*\r
 559      * Output: a value, shifted to fill a BignumInt, which is strictly\r
 560      * less than 1/(d+1), i.e. is an *under*-estimate (but by as\r
 561      * little as possible within the constraints) of the reciprocal of\r
 562      * any number whose first BIGNUM_INT_BITS bits match d.\r
 563      *\r
 564      * Ideally we'd like to _totally_ fill BignumInt, i.e. always\r
 565      * return a value with the top bit set. Unfortunately we can't\r
 566      * quite guarantee that for all inputs and also return a fixed\r
 567      * exponent. So instead we take our reciprocal to be\r
 568      * 2^(BIGNUM_INT_BITS*2-1) / d, so that it has the top bit clear\r
 569      * only in the exceptional case where d takes exactly the maximum\r
 570      * value BIGNUM_INT_MASK; in that case, the top bit is clear and\r
 571      * the next bit down is set.\r
 572      */\r
 573 \r
 574     /*\r
 575      * Start by computing a half-length version of the answer, by\r
 576      * straightforward division within a BignumInt.\r
 577      */\r
 578     dshort = (d >> (BIGNUM_INT_BITS/2)) + 1;\r
 579     recip = (BIGNUM_TOP_BIT + dshort - 1) / dshort;\r
 580     recip <<= BIGNUM_INT_BITS - BIGNUM_INT_BITS/2;\r
 581 \r
 582     /*\r
 583      * Newton-Raphson iteration to improve that starting reciprocal\r
 584      * estimate: take f(x) = d - 1/x, and then the N-R formula gives\r
 585      * x_new = x - f(x)/f'(x) = x - (d-1/x)/(1/x^2) = x(2-d*x). Or,\r
 586      * taking our fixed-point representation into account, take f(x)\r
 587      * to be d - K/x (where K = 2^(BIGNUM_INT_BITS*2-1) as discussed\r
 588      * above) and then we get (2K - d*x) * x/K.\r
 589      *\r
 590      * Newton-Raphson doubles the number of correct bits at every\r
 591      * iteration, and the initial division above already gave us half\r
 592      * the output word, so it's only worth doing one iteration.\r
 593      */\r
 594     BignumMULADD(prodh, prodl, recip, d, recip);\r
 595     prodl = ~prodl;\r
 596     prodh = ~prodh;\r
 597     {\r
 598         BignumCarry c;\r
 599         BignumADC(prodl, c, prodl, 1, 0);\r
 600         prodh += c;\r
 601     }\r
 602     BignumMUL(prodh, prodl, prodh, recip);\r
 603     recip = (prodh << 1) | (prodl >> (BIGNUM_INT_BITS-1));\r
 604 \r
 605     /*\r
 606      * Now make sure we have the best possible reciprocal estimate,\r
 607      * before we return it. We might have been off by a handful either\r
 608      * way - not enough to bother with any better-thought-out kind of\r
 609      * correction loop.\r
 610      */\r
 611     BignumMULADD(prodh, prodl, recip, d, recip);\r
 612     corrections = 0;\r
 613     if (prodh >= BIGNUM_TOP_BIT) {\r
 614         do {\r
 615             BignumCarry c = 1;\r
 616             BignumADC(prodl, c, prodl, ~d, c); prodh += BIGNUM_INT_MASK + c;\r
 617             recip--;\r
 618             corrections++;\r
 619         } while (prodh >= ((BignumInt)1 << (BIGNUM_INT_BITS-1)));\r
 620     } else {\r
 621         while (1) {\r
 622             BignumInt newprodh, newprodl;\r
 623             BignumCarry c = 0;\r
 624             BignumADC(newprodl, c, prodl, d, c); newprodh = prodh + c;\r
 625             if (newprodh >= BIGNUM_TOP_BIT)\r
 626                 break;\r
 627             prodh = newprodh;\r
 628             prodl = newprodl;\r
 629             recip++;\r
 630             corrections++;\r
 631         }\r
 632     }\r
 633 \r
 634     return recip;\r
 635 }\r
 636 \r
 637 /*\r
 638  * Compute a = a % m.\r
 639  * Input in first alen words of a and first mlen words of m.\r
 640  * Output in first alen words of a\r
 641  * (of which first alen-mlen words will be zero).\r
 642  * Quotient is accumulated in the `quotient' array, which is a Bignum\r
 643  * rather than the internal bigendian format.\r
 644  *\r
 645  * 'recip' must be the result of calling reciprocal_word() on the top\r
 646  * BIGNUM_INT_BITS of the modulus (denoted m0 in comments below), with\r
 647  * the topmost set bit normalised to the MSB of the input to\r
 648  * reciprocal_word. 'rshift' is how far left the top nonzero word of\r
 649  * the modulus had to be shifted to set that top bit.\r
 650  */\r
 651 static void internal_mod(BignumInt *a, int alen,\r
 652                          BignumInt *m, int mlen,\r
 653                          BignumInt *quot, BignumInt recip, int rshift)\r
 654 {\r
 655     int i, k;\r
 656 \r
 657 #ifdef DIVISION_DEBUG\r
 658     {\r
 659         int d;\r
 660         printf("start division, m=0x");\r
 661         for (d = 0; d < mlen; d++)\r
 662             printf("%0*llx", BIGNUM_INT_BITS/4, (unsigned long long)m[d]);\r
 663         printf(", recip=%#0*llx, rshift=%d\n",\r
 664                BIGNUM_INT_BITS/4, (unsigned long long)recip, rshift);\r
 665     }\r
 666 #endif\r
 667 \r
 668     /*\r
 669      * Repeatedly use that reciprocal estimate to get a decent number\r
 670      * of quotient bits, and subtract off the resulting multiple of m.\r
 671      *\r
 672      * Normally we expect to terminate this loop by means of finding\r
 673      * out q=0 part way through, but one way in which we might not get\r
 674      * that far in the first place is if the input a is actually zero,\r
 675      * in which case we'll discard zero words from the front of a\r
 676      * until we reach the termination condition in the for statement\r
 677      * here.\r
 678      */\r
 679     for (i = 0; i <= alen - mlen ;) {\r
 680         BignumInt product;\r
 681         BignumInt aword, q;\r
 682         int shift, full_bitoffset, bitoffset, wordoffset;\r
 683 \r
 684 #ifdef DIVISION_DEBUG\r
 685         {\r
 686             int d;\r
 687             printf("main loop, a=0x");\r
 688             for (d = 0; d < alen; d++)\r
 689                 printf("%0*llx", BIGNUM_INT_BITS/4, (unsigned long long)a[d]);\r
 690             printf("\n");\r
 691         }\r
 692 #endif\r
 693 \r
 694         if (a[i] == 0) {\r
 695 #ifdef DIVISION_DEBUG\r
 696             printf("zero word at i=%d\n", i);\r
 697 #endif\r
 698             i++;\r
 699             continue;\r
 700         }\r
 701 \r
 702         aword = a[i];\r
 703         shift = bn_clz(aword);\r
 704         aword <<= shift;\r
 705         if (shift > 0 && i+1 < alen)\r
 706             aword |= a[i+1] >> (BIGNUM_INT_BITS - shift);\r
 707 \r
 708         {\r
 709             BignumInt unused;\r
 710             BignumMUL(q, unused, recip, aword);\r
 711             (void)unused;\r
 712         }\r
 713 \r
 714 #ifdef DIVISION_DEBUG\r
 715         printf("i=%d, aword=%#0*llx, shift=%d, q=%#0*llx\n",\r
 716                i, BIGNUM_INT_BITS/4, (unsigned long long)aword,\r
 717                shift, BIGNUM_INT_BITS/4, (unsigned long long)q);\r
 718 #endif\r
 719 \r
 720         /*\r
 721          * Work out the right bit and word offsets to use when\r
 722          * subtracting q*m from a.\r
 723          *\r
 724          * aword was taken from a[i], which means its LSB was at bit\r
 725          * position (alen-1-i) * BIGNUM_INT_BITS. But then we shifted\r
 726          * it left by 'shift', so now the low bit of aword corresponds\r
 727          * to bit position (alen-1-i) * BIGNUM_INT_BITS - shift, i.e.\r
 728          * aword is approximately equal to a / 2^(that).\r
 729          *\r
 730          * m0 comes from the top word of mod, so its LSB is at bit\r
 731          * position (mlen-1) * BIGNUM_INT_BITS - rshift, i.e. it can\r
 732          * be considered to be m / 2^(that power). 'recip' is the\r
 733          * reciprocal of m0, times 2^(BIGNUM_INT_BITS*2-1), i.e. it's\r
 734          * about 2^((mlen+1) * BIGNUM_INT_BITS - rshift - 1) / m.\r
 735          *\r
 736          * Hence, recip * aword is approximately equal to the product\r
 737          * of those, which simplifies to\r
 738          *\r
 739          * a/m * 2^((mlen+2+i-alen)*BIGNUM_INT_BITS + shift - rshift - 1)\r
 740          *\r
 741          * But we've also shifted recip*aword down by BIGNUM_INT_BITS\r
 742          * to form q, so we have\r
 743          *\r
 744          * q ~= a/m * 2^((mlen+1+i-alen)*BIGNUM_INT_BITS + shift - rshift - 1)\r
 745          *\r
 746          * and hence, when we now compute q*m, it will be about\r
 747          * a*2^(all that lot), i.e. the negation of that expression is\r
 748          * how far left we have to shift the product q*m to make it\r
 749          * approximately equal to a.\r
 750          */\r
 751         full_bitoffset = -((mlen+1+i-alen)*BIGNUM_INT_BITS + shift-rshift-1);\r
 752 #ifdef DIVISION_DEBUG\r
 753         printf("full_bitoffset=%d\n", full_bitoffset);\r
 754 #endif\r
 755 \r
 756         if (full_bitoffset < 0) {\r
 757             /*\r
 758              * If we find ourselves needing to shift q*m _right_, that\r
 759              * means we've reached the bottom of the quotient. Clip q\r
 760              * so that its right shift becomes zero, and if that means\r
 761              * q becomes _actually_ zero, this loop is done.\r
 762              */\r
 763             if (full_bitoffset <= -BIGNUM_INT_BITS)\r
 764                 break;\r
 765             q >>= -full_bitoffset;\r
 766             full_bitoffset = 0;\r
 767             if (!q)\r
 768                 break;\r
 769 #ifdef DIVISION_DEBUG\r
 770             printf("now full_bitoffset=%d, q=%#0*llx\n",\r
 771                    full_bitoffset, BIGNUM_INT_BITS/4, (unsigned long long)q);\r
 772 #endif\r
 773         }\r
 774 \r
 775         wordoffset = full_bitoffset / BIGNUM_INT_BITS;\r
 776         bitoffset = full_bitoffset % BIGNUM_INT_BITS;\r
 777 #ifdef DIVISION_DEBUG\r
 778         printf("wordoffset=%d, bitoffset=%d\n", wordoffset, bitoffset);\r
 779 #endif\r
 780 \r
 781         /* wordoffset as computed above is the offset between the LSWs\r
 782          * of m and a. But in fact m and a are stored MSW-first, so we\r
 783          * need to adjust it to be the offset between the actual array\r
 784          * indices, and flip the sign too. */\r
 785         wordoffset = alen - mlen - wordoffset;\r
 786 \r
 787         if (bitoffset == 0) {\r
 788             BignumCarry c = 1;\r
 789             BignumInt prev_hi_word = 0;\r
 790             for (k = mlen - 1; wordoffset+k >= i; k--) {\r
 791                 BignumInt mword = k<0 ? 0 : m[k];\r
 792                 BignumMULADD(prev_hi_word, product, q, mword, prev_hi_word);\r
 793 #ifdef DIVISION_DEBUG\r
 794                 printf("  aligned sub: product word for m[%d] = %#0*llx\n",\r
 795                        k, BIGNUM_INT_BITS/4,\r
 796                        (unsigned long long)product);\r
 797 #endif\r
 798 #ifdef DIVISION_DEBUG\r
 799                 printf("  aligned sub: subtrahend for a[%d] = %#0*llx\n",\r
 800                        wordoffset+k, BIGNUM_INT_BITS/4,\r
 801                        (unsigned long long)product);\r
 802 #endif\r
 803                 BignumADC(a[wordoffset+k], c, a[wordoffset+k], ~product, c);\r
 804             }\r
 805         } else {\r
 806             BignumInt add_word = 0;\r
 807             BignumInt c = 1;\r
 808             BignumInt prev_hi_word = 0;\r
 809             for (k = mlen - 1; wordoffset+k >= i; k--) {\r
 810                 BignumInt mword = k<0 ? 0 : m[k];\r
 811                 BignumMULADD(prev_hi_word, product, q, mword, prev_hi_word);\r
 812 #ifdef DIVISION_DEBUG\r
 813                 printf("  unaligned sub: product word for m[%d] = %#0*llx\n",\r
 814                        k, BIGNUM_INT_BITS/4,\r
 815                        (unsigned long long)product);\r
 816 #endif\r
 817 \r
 818                 add_word |= product << bitoffset;\r
 819 \r
 820 #ifdef DIVISION_DEBUG\r
 821                 printf("  unaligned sub: subtrahend for a[%d] = %#0*llx\n",\r
 822                        wordoffset+k,\r
 823                        BIGNUM_INT_BITS/4, (unsigned long long)add_word);\r
 824 #endif\r
 825                 BignumADC(a[wordoffset+k], c, a[wordoffset+k], ~add_word, c);\r
 826 \r
 827                 add_word = product >> (BIGNUM_INT_BITS - bitoffset);\r
 828             }\r
 829         }\r
 830 \r
 831         if (quot) {\r
 832 #ifdef DIVISION_DEBUG\r
 833             printf("adding quotient word %#0*llx << %d\n",\r
 834                    BIGNUM_INT_BITS/4, (unsigned long long)q, full_bitoffset);\r
 835 #endif\r
 836             internal_add_shifted(quot, q, full_bitoffset);\r
 837 #ifdef DIVISION_DEBUG\r
 838             {\r
 839                 int d;\r
 840                 printf("now quot=0x");\r
 841                 for (d = quot[0]; d > 0; d--)\r
 842                     printf("%0*llx", BIGNUM_INT_BITS/4,\r
 843                            (unsigned long long)quot[d]);\r
 844                 printf("\n");\r
 845             }\r
 846 #endif\r
 847         }\r
 848     }\r
 849 \r
 850 #ifdef DIVISION_DEBUG\r
 851     {\r
 852         int d;\r
 853         printf("end main loop, a=0x");\r
 854         for (d = 0; d < alen; d++)\r
 855             printf("%0*llx", BIGNUM_INT_BITS/4, (unsigned long long)a[d]);\r
 856         if (quot) {\r
 857             printf(", quot=0x");\r
 858             for (d = quot[0]; d > 0; d--)\r
 859                 printf("%0*llx", BIGNUM_INT_BITS/4,\r
 860                        (unsigned long long)quot[d]);\r
 861         }\r
 862         printf("\n");\r
 863     }\r
 864 #endif\r
 865 \r
 866     /*\r
 867      * The above loop should terminate with the remaining value in a\r
 868      * being strictly less than 2*m (if a >= 2*m then we should always\r
 869      * have managed to get a nonzero q word), but we can't guarantee\r
 870      * that it will be strictly less than m: consider a case where the\r
 871      * remainder is 1, and another where the remainder is m-1. By the\r
 872      * time a contains a value that's _about m_, you clearly can't\r
 873      * distinguish those cases by looking at only the top word of a -\r
 874      * you have to go all the way down to the bottom before you find\r
 875      * out whether it's just less or just more than m.\r
 876      *\r
 877      * Hence, we now do a final fixup in which we subtract one last\r
 878      * copy of m, or don't, accordingly. We should never have to\r
 879      * subtract more than one copy of m here.\r
 880      */\r
 881     for (i = 0; i < alen; i++) {\r
 882         /* Compare a with m, word by word, from the MSW down. As soon\r
 883          * as we encounter a difference, we know whether we need the\r
 884          * fixup. */\r
 885         int mindex = mlen-alen+i;\r
 886         BignumInt mword = mindex < 0 ? 0 : m[mindex];\r
 887         if (a[i] < mword) {\r
 888 #ifdef DIVISION_DEBUG\r
 889             printf("final fixup not needed, a < m\n");\r
 890 #endif\r
 891             return;\r
 892         } else if (a[i] > mword) {\r
 893 #ifdef DIVISION_DEBUG\r
 894             printf("final fixup is needed, a > m\n");\r
 895 #endif\r
 896             break;\r
 897         }\r
 898         /* If neither of those cases happened, the words are the same,\r
 899          * so keep going and look at the next one. */\r
 900     }\r
 901 #ifdef DIVISION_DEBUG\r
 902     if (i == mlen) /* if we printed neither of the above diagnostics */\r
 903         printf("final fixup is needed, a == m\n");\r
 904 #endif\r
 905 \r
 906     /*\r
 907      * If we got here without returning, then a >= m, so we must\r
 908      * subtract m, and increment the quotient.\r
 909      */\r
 910     {\r
 911         BignumCarry c = 1;\r
 912         for (i = alen - 1; i >= 0; i--) {\r
 913             int mindex = mlen-alen+i;\r
 914             BignumInt mword = mindex < 0 ? 0 : m[mindex];\r
 915             BignumADC(a[i], c, a[i], ~mword, c);\r
 916         }\r
 917     }\r
 918     if (quot)\r
 919         internal_add_shifted(quot, 1, 0);\r
 920 \r
 921 #ifdef DIVISION_DEBUG\r
 922     {\r
 923         int d;\r
 924         printf("after final fixup, a=0x");\r
 925         for (d = 0; d < alen; d++)\r
 926             printf("%0*llx", BIGNUM_INT_BITS/4, (unsigned long long)a[d]);\r
 927         if (quot) {\r
 928             printf(", quot=0x");\r
 929             for (d = quot[0]; d > 0; d--)\r
 930                 printf("%0*llx", BIGNUM_INT_BITS/4,\r
 931                        (unsigned long long)quot[d]);\r
 932         }\r
 933         printf("\n");\r
 934     }\r
 935 #endif\r
 936 }\r
 937 \r
 938 /*\r
 939  * Compute (base ^ exp) % mod, the pedestrian way.\r
 940  */\r
 941 Bignum modpow_simple(Bignum base_in, Bignum exp, Bignum mod)\r
 942 {\r
 943     BignumInt *a, *b, *n, *m, *scratch;\r
 944     BignumInt recip;\r
 945     int rshift;\r
 946     int mlen, scratchlen, i, j;\r
 947     Bignum base, result;\r
 948 \r
 949     /*\r
 950      * The most significant word of mod needs to be non-zero. It\r
 951      * should already be, but let's make sure.\r
 952      */\r
 953     assert(mod[mod[0]] != 0);\r
 954 \r
 955     /*\r
 956      * Make sure the base is smaller than the modulus, by reducing\r
 957      * it modulo the modulus if not.\r
 958      */\r
 959     base = bigmod(base_in, mod);\r
 960 \r
 961     /* Allocate m of size mlen, copy mod to m */\r
 962     /* We use big endian internally */\r
 963     mlen = mod[0];\r
 964     m = snewn(mlen, BignumInt);\r
 965     for (j = 0; j < mlen; j++)\r
 966         m[j] = mod[mod[0] - j];\r
 967 \r
 968     /* Allocate n of size mlen, copy base to n */\r
 969     n = snewn(mlen, BignumInt);\r
 970     i = mlen - base[0];\r
 971     for (j = 0; j < i; j++)\r
 972         n[j] = 0;\r
 973     for (j = 0; j < (int)base[0]; j++)\r
 974         n[i + j] = base[base[0] - j];\r
 975 \r
 976     /* Allocate a and b of size 2*mlen. Set a = 1 */\r
 977     a = snewn(2 * mlen, BignumInt);\r
 978     b = snewn(2 * mlen, BignumInt);\r
 979     for (i = 0; i < 2 * mlen; i++)\r
 980         a[i] = 0;\r
 981     a[2 * mlen - 1] = 1;\r
 982 \r
 983     /* Scratch space for multiplies */\r
 984     scratchlen = mul_compute_scratch(mlen);\r
 985     scratch = snewn(scratchlen, BignumInt);\r
 986 \r
 987     /* Skip leading zero bits of exp. */\r
 988     i = 0;\r
 989     j = BIGNUM_INT_BITS-1;\r
 990     while (i < (int)exp[0] && (exp[exp[0] - i] & ((BignumInt)1 << j)) == 0) {\r
 991         j--;\r
 992         if (j < 0) {\r
 993             i++;\r
 994             j = BIGNUM_INT_BITS-1;\r
 995         }\r
 996     }\r
 997 \r
 998     /* Compute reciprocal of the top full word of the modulus */\r
 999     {\r
1000         BignumInt m0 = m[0];\r
1001         rshift = bn_clz(m0);\r
1002         if (rshift) {\r
1003             m0 <<= rshift;\r
1004             if (mlen > 1)\r
1005                 m0 |= m[1] >> (BIGNUM_INT_BITS - rshift);\r
1006         }\r
1007         recip = reciprocal_word(m0);\r
1008     }\r
1009 \r
1010     /* Main computation */\r
1011     while (i < (int)exp[0]) {\r
1012         while (j >= 0) {\r
1013             internal_mul(a + mlen, a + mlen, b, mlen, scratch);\r
1014             internal_mod(b, mlen * 2, m, mlen, NULL, recip, rshift);\r
1015             if ((exp[exp[0] - i] & ((BignumInt)1 << j)) != 0) {\r
1016                 internal_mul(b + mlen, n, a, mlen, scratch);\r
1017                 internal_mod(a, mlen * 2, m, mlen, NULL, recip, rshift);\r
1018             } else {\r
1019                 BignumInt *t;\r
1020                 t = a;\r
1021                 a = b;\r
1022                 b = t;\r
1023             }\r
1024             j--;\r
1025         }\r
1026         i++;\r
1027         j = BIGNUM_INT_BITS-1;\r
1028     }\r
1029 \r
1030     /* Copy result to buffer */\r
1031     result = newbn(mod[0]);\r
1032     for (i = 0; i < mlen; i++)\r
1033         result[result[0] - i] = a[i + mlen];\r
1034     while (result[0] > 1 && result[result[0]] == 0)\r
1035         result[0]--;\r
1036 \r
1037     /* Free temporary arrays */\r
1038     smemclr(a, 2 * mlen * sizeof(*a));\r
1039     sfree(a);\r
1040     smemclr(scratch, scratchlen * sizeof(*scratch));\r
1041     sfree(scratch);\r
1042     smemclr(b, 2 * mlen * sizeof(*b));\r
1043     sfree(b);\r
1044     smemclr(m, mlen * sizeof(*m));\r
1045     sfree(m);\r
1046     smemclr(n, mlen * sizeof(*n));\r
1047     sfree(n);\r
1048 \r
1049     freebn(base);\r
1050 \r
1051     return result;\r
1052 }\r
1053 \r
1054 /*\r
1055  * Compute (base ^ exp) % mod. Uses the Montgomery multiplication\r
1056  * technique where possible, falling back to modpow_simple otherwise.\r
1057  */\r
1058 Bignum modpow(Bignum base_in, Bignum exp, Bignum mod)\r
1059 {\r
1060     BignumInt *a, *b, *x, *n, *mninv, *scratch;\r
1061     int len, scratchlen, i, j;\r
1062     Bignum base, base2, r, rn, inv, result;\r
1063 \r
1064     /*\r
1065      * The most significant word of mod needs to be non-zero. It\r
1066      * should already be, but let's make sure.\r
1067      */\r
1068     assert(mod[mod[0]] != 0);\r
1069 \r
1070     /*\r
1071      * mod had better be odd, or we can't do Montgomery multiplication\r
1072      * using a power of two at all.\r
1073      */\r
1074     if (!(mod[1] & 1))\r
1075         return modpow_simple(base_in, exp, mod);\r
1076 \r
1077     /*\r
1078      * Make sure the base is smaller than the modulus, by reducing\r
1079      * it modulo the modulus if not.\r
1080      */\r
1081     base = bigmod(base_in, mod);\r
1082 \r
1083     /*\r
1084      * Compute the inverse of n mod r, for monty_reduce. (In fact we\r
1085      * want the inverse of _minus_ n mod r, but we'll sort that out\r
1086      * below.)\r
1087      */\r
1088     len = mod[0];\r
1089     r = bn_power_2(BIGNUM_INT_BITS * len);\r
1090     inv = modinv(mod, r);\r
1091     assert(inv); /* cannot fail, since mod is odd and r is a power of 2 */\r
1092 \r
1093     /*\r
1094      * Multiply the base by r mod n, to get it into Montgomery\r
1095      * representation.\r
1096      */\r
1097     base2 = modmul(base, r, mod);\r
1098     freebn(base);\r
1099     base = base2;\r
1100 \r
1101     rn = bigmod(r, mod);               /* r mod n, i.e. Montgomerified 1 */\r
1102 \r
1103     freebn(r);                         /* won't need this any more */\r
1104 \r
1105     /*\r
1106      * Set up internal arrays of the right lengths, in big-endian\r
1107      * format, containing the base, the modulus, and the modulus's\r
1108      * inverse.\r
1109      */\r
1110     n = snewn(len, BignumInt);\r
1111     for (j = 0; j < len; j++)\r
1112         n[len - 1 - j] = mod[j + 1];\r
1113 \r
1114     mninv = snewn(len, BignumInt);\r
1115     for (j = 0; j < len; j++)\r
1116         mninv[len - 1 - j] = (j < (int)inv[0] ? inv[j + 1] : 0);\r
1117     freebn(inv);         /* we don't need this copy of it any more */\r
1118     /* Now negate mninv mod r, so it's the inverse of -n rather than +n. */\r
1119     x = snewn(len, BignumInt);\r
1120     for (j = 0; j < len; j++)\r
1121         x[j] = 0;\r
1122     internal_sub(x, mninv, mninv, len);\r
1123 \r
1124     /* x = snewn(len, BignumInt); */ /* already done above */\r
1125     for (j = 0; j < len; j++)\r
1126         x[len - 1 - j] = (j < (int)base[0] ? base[j + 1] : 0);\r
1127     freebn(base);        /* we don't need this copy of it any more */\r
1128 \r
1129     a = snewn(2*len, BignumInt);\r
1130     b = snewn(2*len, BignumInt);\r
1131     for (j = 0; j < len; j++)\r
1132         a[2*len - 1 - j] = (j < (int)rn[0] ? rn[j + 1] : 0);\r
1133     freebn(rn);\r
1134 \r
1135     /* Scratch space for multiplies */\r
1136     scratchlen = 3*len + mul_compute_scratch(len);\r
1137     scratch = snewn(scratchlen, BignumInt);\r
1138 \r
1139     /* Skip leading zero bits of exp. */\r
1140     i = 0;\r
1141     j = BIGNUM_INT_BITS-1;\r
1142     while (i < (int)exp[0] && (exp[exp[0] - i] & ((BignumInt)1 << j)) == 0) {\r
1143         j--;\r
1144         if (j < 0) {\r
1145             i++;\r
1146             j = BIGNUM_INT_BITS-1;\r
1147         }\r
1148     }\r
1149 \r
1150     /* Main computation */\r
1151     while (i < (int)exp[0]) {\r
1152         while (j >= 0) {\r
1153             internal_mul(a + len, a + len, b, len, scratch);\r
1154             monty_reduce(b, n, mninv, scratch, len);\r
1155             if ((exp[exp[0] - i] & ((BignumInt)1 << j)) != 0) {\r
1156                 internal_mul(b + len, x, a, len,  scratch);\r
1157                 monty_reduce(a, n, mninv, scratch, len);\r
1158             } else {\r
1159                 BignumInt *t;\r
1160                 t = a;\r
1161                 a = b;\r
1162                 b = t;\r
1163             }\r
1164             j--;\r
1165         }\r
1166         i++;\r
1167         j = BIGNUM_INT_BITS-1;\r
1168     }\r
1169 \r
1170     /*\r
1171      * Final monty_reduce to get back from the adjusted Montgomery\r
1172      * representation.\r
1173      */\r
1174     monty_reduce(a, n, mninv, scratch, len);\r
1175 \r
1176     /* Copy result to buffer */\r
1177     result = newbn(mod[0]);\r
1178     for (i = 0; i < len; i++)\r
1179         result[result[0] - i] = a[i + len];\r
1180     while (result[0] > 1 && result[result[0]] == 0)\r
1181         result[0]--;\r
1182 \r
1183     /* Free temporary arrays */\r
1184     smemclr(scratch, scratchlen * sizeof(*scratch));\r
1185     sfree(scratch);\r
1186     smemclr(a, 2 * len * sizeof(*a));\r
1187     sfree(a);\r
1188     smemclr(b, 2 * len * sizeof(*b));\r
1189     sfree(b);\r
1190     smemclr(mninv, len * sizeof(*mninv));\r
1191     sfree(mninv);\r
1192     smemclr(n, len * sizeof(*n));\r
1193     sfree(n);\r
1194     smemclr(x, len * sizeof(*x));\r
1195     sfree(x);\r
1196 \r
1197     return result;\r
1198 }\r
1199 \r
1200 /*\r
1201  * Compute (p * q) % mod.\r
1202  * The most significant word of mod MUST be non-zero.\r
1203  * We assume that the result array is the same size as the mod array.\r
1204  */\r
1205 Bignum modmul(Bignum p, Bignum q, Bignum mod)\r
1206 {\r
1207     BignumInt *a, *n, *m, *o, *scratch;\r
1208     BignumInt recip;\r
1209     int rshift, scratchlen;\r
1210     int pqlen, mlen, rlen, i, j;\r
1211     Bignum result;\r
1212 \r
1213     /*\r
1214      * The most significant word of mod needs to be non-zero. It\r
1215      * should already be, but let's make sure.\r
1216      */\r
1217     assert(mod[mod[0]] != 0);\r
1218 \r
1219     /* Allocate m of size mlen, copy mod to m */\r
1220     /* We use big endian internally */\r
1221     mlen = mod[0];\r
1222     m = snewn(mlen, BignumInt);\r
1223     for (j = 0; j < mlen; j++)\r
1224         m[j] = mod[mod[0] - j];\r
1225 \r
1226     pqlen = (p[0] > q[0] ? p[0] : q[0]);\r
1227 \r
1228     /*\r
1229      * Make sure that we're allowing enough space. The shifting below\r
1230      * will underflow the vectors we allocate if pqlen is too small.\r
1231      */\r
1232     if (2*pqlen <= mlen)\r
1233         pqlen = mlen/2 + 1;\r
1234 \r
1235     /* Allocate n of size pqlen, copy p to n */\r
1236     n = snewn(pqlen, BignumInt);\r
1237     i = pqlen - p[0];\r
1238     for (j = 0; j < i; j++)\r
1239         n[j] = 0;\r
1240     for (j = 0; j < (int)p[0]; j++)\r
1241         n[i + j] = p[p[0] - j];\r
1242 \r
1243     /* Allocate o of size pqlen, copy q to o */\r
1244     o = snewn(pqlen, BignumInt);\r
1245     i = pqlen - q[0];\r
1246     for (j = 0; j < i; j++)\r
1247         o[j] = 0;\r
1248     for (j = 0; j < (int)q[0]; j++)\r
1249         o[i + j] = q[q[0] - j];\r
1250 \r
1251     /* Allocate a of size 2*pqlen for result */\r
1252     a = snewn(2 * pqlen, BignumInt);\r
1253 \r
1254     /* Scratch space for multiplies */\r
1255     scratchlen = mul_compute_scratch(pqlen);\r
1256     scratch = snewn(scratchlen, BignumInt);\r
1257 \r
1258     /* Compute reciprocal of the top full word of the modulus */\r
1259     {\r
1260         BignumInt m0 = m[0];\r
1261         rshift = bn_clz(m0);\r
1262         if (rshift) {\r
1263             m0 <<= rshift;\r
1264             if (mlen > 1)\r
1265                 m0 |= m[1] >> (BIGNUM_INT_BITS - rshift);\r
1266         }\r
1267         recip = reciprocal_word(m0);\r
1268     }\r
1269 \r
1270     /* Main computation */\r
1271     internal_mul(n, o, a, pqlen, scratch);\r
1272     internal_mod(a, pqlen * 2, m, mlen, NULL, recip, rshift);\r
1273 \r
1274     /* Copy result to buffer */\r
1275     rlen = (mlen < pqlen * 2 ? mlen : pqlen * 2);\r
1276     result = newbn(rlen);\r
1277     for (i = 0; i < rlen; i++)\r
1278         result[result[0] - i] = a[i + 2 * pqlen - rlen];\r
1279     while (result[0] > 1 && result[result[0]] == 0)\r
1280         result[0]--;\r
1281 \r
1282     /* Free temporary arrays */\r
1283     smemclr(scratch, scratchlen * sizeof(*scratch));\r
1284     sfree(scratch);\r
1285     smemclr(a, 2 * pqlen * sizeof(*a));\r
1286     sfree(a);\r
1287     smemclr(m, mlen * sizeof(*m));\r
1288     sfree(m);\r
1289     smemclr(n, pqlen * sizeof(*n));\r
1290     sfree(n);\r
1291     smemclr(o, pqlen * sizeof(*o));\r
1292     sfree(o);\r
1293 \r
1294     return result;\r
1295 }\r
1296 \r
1297 Bignum modsub(const Bignum a, const Bignum b, const Bignum n)\r
1298 {\r
1299     Bignum a1, b1, ret;\r
1300 \r
1301     if (bignum_cmp(a, n) >= 0) a1 = bigmod(a, n);\r
1302     else a1 = a;\r
1303     if (bignum_cmp(b, n) >= 0) b1 = bigmod(b, n);\r
1304     else b1 = b;\r
1305 \r
1306     if (bignum_cmp(a1, b1) >= 0) /* a >= b */\r
1307     {\r
1308         ret = bigsub(a1, b1);\r
1309     }\r
1310     else\r
1311     {\r
1312         /* Handle going round the corner of the modulus without having\r
1313          * negative support in Bignum */\r
1314         Bignum tmp = bigsub(n, b1);\r
1315         assert(tmp);\r
1316         ret = bigadd(tmp, a1);\r
1317         freebn(tmp);\r
1318     }\r
1319 \r
1320     if (a != a1) freebn(a1);\r
1321     if (b != b1) freebn(b1);\r
1322 \r
1323     return ret;\r
1324 }\r
1325 \r
1326 /*\r
1327  * Compute p % mod.\r
1328  * The most significant word of mod MUST be non-zero.\r
1329  * We assume that the result array is the same size as the mod array.\r
1330  * We optionally write out a quotient if `quotient' is non-NULL.\r
1331  * We can avoid writing out the result if `result' is NULL.\r
1332  */\r
1333 static void bigdivmod(Bignum p, Bignum mod, Bignum result, Bignum quotient)\r
1334 {\r
1335     BignumInt *n, *m;\r
1336     BignumInt recip;\r
1337     int rshift;\r
1338     int plen, mlen, i, j;\r
1339 \r
1340     /*\r
1341      * The most significant word of mod needs to be non-zero. It\r
1342      * should already be, but let's make sure.\r
1343      */\r
1344     assert(mod[mod[0]] != 0);\r
1345 \r
1346     /* Allocate m of size mlen, copy mod to m */\r
1347     /* We use big endian internally */\r
1348     mlen = mod[0];\r
1349     m = snewn(mlen, BignumInt);\r
1350     for (j = 0; j < mlen; j++)\r
1351         m[j] = mod[mod[0] - j];\r
1352 \r
1353     plen = p[0];\r
1354     /* Ensure plen > mlen */\r
1355     if (plen <= mlen)\r
1356         plen = mlen + 1;\r
1357 \r
1358     /* Allocate n of size plen, copy p to n */\r
1359     n = snewn(plen, BignumInt);\r
1360     for (j = 0; j < plen; j++)\r
1361         n[j] = 0;\r
1362     for (j = 1; j <= (int)p[0]; j++)\r
1363         n[plen - j] = p[j];\r
1364 \r
1365     /* Compute reciprocal of the top full word of the modulus */\r
1366     {\r
1367         BignumInt m0 = m[0];\r
1368         rshift = bn_clz(m0);\r
1369         if (rshift) {\r
1370             m0 <<= rshift;\r
1371             if (mlen > 1)\r
1372                 m0 |= m[1] >> (BIGNUM_INT_BITS - rshift);\r
1373         }\r
1374         recip = reciprocal_word(m0);\r
1375     }\r
1376 \r
1377     /* Main computation */\r
1378     internal_mod(n, plen, m, mlen, quotient, recip, rshift);\r
1379 \r
1380     /* Copy result to buffer */\r
1381     if (result) {\r
1382         for (i = 1; i <= (int)result[0]; i++) {\r
1383             int j = plen - i;\r
1384             result[i] = j >= 0 ? n[j] : 0;\r
1385         }\r
1386     }\r
1387 \r
1388     /* Free temporary arrays */\r
1389     smemclr(m, mlen * sizeof(*m));\r
1390     sfree(m);\r
1391     smemclr(n, plen * sizeof(*n));\r
1392     sfree(n);\r
1393 }\r
1394 \r
1395 /*\r
1396  * Decrement a number.\r
1397  */\r
1398 void decbn(Bignum bn)\r
1399 {\r
1400     int i = 1;\r
1401     while (i < (int)bn[0] && bn[i] == 0)\r
1402         bn[i++] = BIGNUM_INT_MASK;\r
1403     bn[i]--;\r
1404 }\r
1405 \r
1406 Bignum bignum_from_bytes(const unsigned char *data, int nbytes)\r
1407 {\r
1408     Bignum result;\r
1409     int w, i;\r
1410 \r
1411     assert(nbytes >= 0 && nbytes < INT_MAX/8);\r
1412 \r
1413     w = (nbytes + BIGNUM_INT_BYTES - 1) / BIGNUM_INT_BYTES; /* bytes->words */\r
1414 \r
1415     result = newbn(w);\r
1416     for (i = 1; i <= w; i++)\r
1417         result[i] = 0;\r
1418     for (i = nbytes; i--;) {\r
1419         unsigned char byte = *data++;\r
1420         result[1 + i / BIGNUM_INT_BYTES] |=\r
1421             (BignumInt)byte << (8*i % BIGNUM_INT_BITS);\r
1422     }\r
1423 \r
1424     bn_restore_invariant(result);\r
1425     return result;\r
1426 }\r
1427 \r
1428 Bignum bignum_from_bytes_le(const unsigned char *data, int nbytes)\r
1429 {\r
1430     Bignum result;\r
1431     int w, i;\r
1432 \r
1433     assert(nbytes >= 0 && nbytes < INT_MAX/8);\r
1434 \r
1435     w = (nbytes + BIGNUM_INT_BYTES - 1) / BIGNUM_INT_BYTES; /* bytes->words */\r
1436 \r
1437     result = newbn(w);\r
1438     for (i = 1; i <= w; i++)\r
1439         result[i] = 0;\r
1440     for (i = 0; i < nbytes; ++i) {\r
1441         unsigned char byte = *data++;\r
1442         result[1 + i / BIGNUM_INT_BYTES] |=\r
1443             (BignumInt)byte << (8*i % BIGNUM_INT_BITS);\r
1444     }\r
1445 \r
1446     bn_restore_invariant(result);\r
1447     return result;\r
1448 }\r
1449 \r
1450 Bignum bignum_from_decimal(const char *decimal)\r
1451 {\r
1452     Bignum result = copybn(Zero);\r
1453 \r
1454     while (*decimal) {\r
1455         Bignum tmp, tmp2;\r
1456 \r
1457         if (!isdigit((unsigned char)*decimal)) {\r
1458             freebn(result);\r
1459             return 0;\r
1460         }\r
1461 \r
1462         tmp = bigmul(result, Ten);\r
1463         tmp2 = bignum_from_long(*decimal - '0');\r
1464         freebn(result);\r
1465         result = bigadd(tmp, tmp2);\r
1466         freebn(tmp);\r
1467         freebn(tmp2);\r
1468 \r
1469         decimal++;\r
1470     }\r
1471 \r
1472     return result;\r
1473 }\r
1474 \r
1475 Bignum bignum_random_in_range(const Bignum lower, const Bignum upper)\r
1476 {\r
1477     Bignum ret = NULL;\r
1478     unsigned char *bytes;\r
1479     int upper_len = bignum_bitcount(upper);\r
1480     int upper_bytes = upper_len / 8;\r
1481     int upper_bits = upper_len % 8;\r
1482     if (upper_bits) ++upper_bytes;\r
1483 \r
1484     bytes = snewn(upper_bytes, unsigned char);\r
1485     do {\r
1486         int i;\r
1487 \r
1488         if (ret) freebn(ret);\r
1489 \r
1490         for (i = 0; i < upper_bytes; ++i)\r
1491         {\r
1492             bytes[i] = (unsigned char)random_byte();\r
1493         }\r
1494         /* Mask the top to reduce failure rate to 50/50 */\r
1495         if (upper_bits)\r
1496         {\r
1497             bytes[i - 1] &= 0xFF >> (8 - upper_bits);\r
1498         }\r
1499 \r
1500         ret = bignum_from_bytes(bytes, upper_bytes);\r
1501     } while (bignum_cmp(ret, lower) < 0 || bignum_cmp(ret, upper) > 0);\r
1502     smemclr(bytes, upper_bytes);\r
1503     sfree(bytes);\r
1504 \r
1505     return ret;\r
1506 }\r
1507 \r
1508 /*\r
1509  * Read an SSH-1-format bignum from a data buffer. Return the number\r
1510  * of bytes consumed, or -1 if there wasn't enough data.\r
1511  */\r
1512 int ssh1_read_bignum(const unsigned char *data, int len, Bignum * result)\r
1513 {\r
1514     const unsigned char *p = data;\r
1515     int i;\r
1516     int w, b;\r
1517 \r
1518     if (len < 2)\r
1519         return -1;\r
1520 \r
1521     w = 0;\r
1522     for (i = 0; i < 2; i++)\r
1523         w = (w << 8) + *p++;\r
1524     b = (w + 7) / 8;                   /* bits -> bytes */\r
1525 \r
1526     if (len < b+2)\r
1527         return -1;\r
1528 \r
1529     if (!result)                       /* just return length */\r
1530         return b + 2;\r
1531 \r
1532     *result = bignum_from_bytes(p, b);\r
1533 \r
1534     return p + b - data;\r
1535 }\r
1536 \r
1537 /*\r
1538  * Return the bit count of a bignum, for SSH-1 encoding.\r
1539  */\r
1540 int bignum_bitcount(Bignum bn)\r
1541 {\r
1542     int bitcount = bn[0] * BIGNUM_INT_BITS - 1;\r
1543     while (bitcount >= 0\r
1544            && (bn[bitcount / BIGNUM_INT_BITS + 1] >> (bitcount % BIGNUM_INT_BITS)) == 0) bitcount--;\r
1545     return bitcount + 1;\r
1546 }\r
1547 \r
1548 /*\r
1549  * Return the byte length of a bignum when SSH-1 encoded.\r
1550  */\r
1551 int ssh1_bignum_length(Bignum bn)\r
1552 {\r
1553     return 2 + (bignum_bitcount(bn) + 7) / 8;\r
1554 }\r
1555 \r
1556 /*\r
1557  * Return the byte length of a bignum when SSH-2 encoded.\r
1558  */\r
1559 int ssh2_bignum_length(Bignum bn)\r
1560 {\r
1561     return 4 + (bignum_bitcount(bn) + 8) / 8;\r
1562 }\r
1563 \r
1564 /*\r
1565  * Return a byte from a bignum; 0 is least significant, etc.\r
1566  */\r
1567 int bignum_byte(Bignum bn, int i)\r
1568 {\r
1569     if (i < 0 || i >= (int)(BIGNUM_INT_BYTES * bn[0]))\r
1570         return 0;                      /* beyond the end */\r
1571     else\r
1572         return (bn[i / BIGNUM_INT_BYTES + 1] >>\r
1573                 ((i % BIGNUM_INT_BYTES)*8)) & 0xFF;\r
1574 }\r
1575 \r
1576 /*\r
1577  * Return a bit from a bignum; 0 is least significant, etc.\r
1578  */\r
1579 int bignum_bit(Bignum bn, int i)\r
1580 {\r
1581     if (i < 0 || i >= (int)(BIGNUM_INT_BITS * bn[0]))\r
1582         return 0;                      /* beyond the end */\r
1583     else\r
1584         return (bn[i / BIGNUM_INT_BITS + 1] >> (i % BIGNUM_INT_BITS)) & 1;\r
1585 }\r
1586 \r
1587 /*\r
1588  * Set a bit in a bignum; 0 is least significant, etc.\r
1589  */\r
1590 void bignum_set_bit(Bignum bn, int bitnum, int value)\r
1591 {\r
1592     if (bitnum < 0 || bitnum >= (int)(BIGNUM_INT_BITS * bn[0])) {\r
1593         if (value) abort();                    /* beyond the end */\r
1594     } else {\r
1595         int v = bitnum / BIGNUM_INT_BITS + 1;\r
1596         BignumInt mask = (BignumInt)1 << (bitnum % BIGNUM_INT_BITS);\r
1597         if (value)\r
1598             bn[v] |= mask;\r
1599         else\r
1600             bn[v] &= ~mask;\r
1601     }\r
1602 }\r
1603 \r
1604 /*\r
1605  * Write a SSH-1-format bignum into a buffer. It is assumed the\r
1606  * buffer is big enough. Returns the number of bytes used.\r
1607  */\r
1608 int ssh1_write_bignum(void *data, Bignum bn)\r
1609 {\r
1610     unsigned char *p = data;\r
1611     int len = ssh1_bignum_length(bn);\r
1612     int i;\r
1613     int bitc = bignum_bitcount(bn);\r
1614 \r
1615     *p++ = (bitc >> 8) & 0xFF;\r
1616     *p++ = (bitc) & 0xFF;\r
1617     for (i = len - 2; i--;)\r
1618         *p++ = bignum_byte(bn, i);\r
1619     return len;\r
1620 }\r
1621 \r
1622 /*\r
1623  * Compare two bignums. Returns like strcmp.\r
1624  */\r
1625 int bignum_cmp(Bignum a, Bignum b)\r
1626 {\r
1627     int amax = a[0], bmax = b[0];\r
1628     int i;\r
1629 \r
1630     /* Annoyingly we have two representations of zero */\r
1631     if (amax == 1 && a[amax] == 0)\r
1632         amax = 0;\r
1633     if (bmax == 1 && b[bmax] == 0)\r
1634         bmax = 0;\r
1635 \r
1636     assert(amax == 0 || a[amax] != 0);\r
1637     assert(bmax == 0 || b[bmax] != 0);\r
1638 \r
1639     i = (amax > bmax ? amax : bmax);\r
1640     while (i) {\r
1641         BignumInt aval = (i > amax ? 0 : a[i]);\r
1642         BignumInt bval = (i > bmax ? 0 : b[i]);\r
1643         if (aval < bval)\r
1644             return -1;\r
1645         if (aval > bval)\r
1646             return +1;\r
1647         i--;\r
1648     }\r
1649     return 0;\r
1650 }\r
1651 \r
1652 /*\r
1653  * Right-shift one bignum to form another.\r
1654  */\r
1655 Bignum bignum_rshift(Bignum a, int shift)\r
1656 {\r
1657     Bignum ret;\r
1658     int i, shiftw, shiftb, shiftbb, bits;\r
1659     BignumInt ai, ai1;\r
1660 \r
1661     assert(shift >= 0);\r
1662 \r
1663     bits = bignum_bitcount(a) - shift;\r
1664     ret = newbn((bits + BIGNUM_INT_BITS - 1) / BIGNUM_INT_BITS);\r
1665 \r
1666     if (ret) {\r
1667         shiftw = shift / BIGNUM_INT_BITS;\r
1668         shiftb = shift % BIGNUM_INT_BITS;\r
1669         shiftbb = BIGNUM_INT_BITS - shiftb;\r
1670 \r
1671         ai1 = a[shiftw + 1];\r
1672         for (i = 1; i <= (int)ret[0]; i++) {\r
1673             ai = ai1;\r
1674             ai1 = (i + shiftw + 1 <= (int)a[0] ? a[i + shiftw + 1] : 0);\r
1675             ret[i] = ((ai >> shiftb) | (ai1 << shiftbb)) & BIGNUM_INT_MASK;\r
1676         }\r
1677     }\r
1678 \r
1679     return ret;\r
1680 }\r
1681 \r
1682 /*\r
1683  * Left-shift one bignum to form another.\r
1684  */\r
1685 Bignum bignum_lshift(Bignum a, int shift)\r
1686 {\r
1687     Bignum ret;\r
1688     int bits, shiftWords, shiftBits;\r
1689 \r
1690     assert(shift >= 0);\r
1691 \r
1692     bits = bignum_bitcount(a) + shift;\r
1693     ret = newbn((bits + BIGNUM_INT_BITS - 1) / BIGNUM_INT_BITS);\r
1694 \r
1695     shiftWords = shift / BIGNUM_INT_BITS;\r
1696     shiftBits = shift % BIGNUM_INT_BITS;\r
1697 \r
1698     if (shiftBits == 0)\r
1699     {\r
1700         memcpy(&ret[1 + shiftWords], &a[1], sizeof(BignumInt) * a[0]);\r
1701     }\r
1702     else\r
1703     {\r
1704         int i;\r
1705         BignumInt carry = 0;\r
1706 \r
1707         /* Remember that Bignum[0] is length, so add 1 */\r
1708         for (i = shiftWords + 1; i < ((int)a[0]) + shiftWords + 1; ++i)\r
1709         {\r
1710             BignumInt from = a[i - shiftWords];\r
1711             ret[i] = (from << shiftBits) | carry;\r
1712             carry = from >> (BIGNUM_INT_BITS - shiftBits);\r
1713         }\r
1714         if (carry) ret[i] = carry;\r
1715     }\r
1716 \r
1717     return ret;\r
1718 }\r
1719 \r
1720 /*\r
1721  * Non-modular multiplication and addition.\r
1722  */\r
1723 Bignum bigmuladd(Bignum a, Bignum b, Bignum addend)\r
1724 {\r
1725     int alen = a[0], blen = b[0];\r
1726     int mlen = (alen > blen ? alen : blen);\r
1727     int rlen, i, maxspot;\r
1728     int wslen;\r
1729     BignumInt *workspace;\r
1730     Bignum ret;\r
1731 \r
1732     /* mlen space for a, mlen space for b, 2*mlen for result,\r
1733      * plus scratch space for multiplication */\r
1734     wslen = mlen * 4 + mul_compute_scratch(mlen);\r
1735     workspace = snewn(wslen, BignumInt);\r
1736     for (i = 0; i < mlen; i++) {\r
1737         workspace[0 * mlen + i] = (mlen - i <= (int)a[0] ? a[mlen - i] : 0);\r
1738         workspace[1 * mlen + i] = (mlen - i <= (int)b[0] ? b[mlen - i] : 0);\r
1739     }\r
1740 \r
1741     internal_mul(workspace + 0 * mlen, workspace + 1 * mlen,\r
1742                  workspace + 2 * mlen, mlen, workspace + 4 * mlen);\r
1743 \r
1744     /* now just copy the result back */\r
1745     rlen = alen + blen + 1;\r
1746     if (addend && rlen <= (int)addend[0])\r
1747         rlen = addend[0] + 1;\r
1748     ret = newbn(rlen);\r
1749     maxspot = 0;\r
1750     for (i = 1; i <= (int)ret[0]; i++) {\r
1751         ret[i] = (i <= 2 * mlen ? workspace[4 * mlen - i] : 0);\r
1752         if (ret[i] != 0)\r
1753             maxspot = i;\r
1754     }\r
1755     ret[0] = maxspot;\r
1756 \r
1757     /* now add in the addend, if any */\r
1758     if (addend) {\r
1759         BignumCarry carry = 0;\r
1760         for (i = 1; i <= rlen; i++) {\r
1761             BignumInt retword = (i <= (int)ret[0] ? ret[i] : 0);\r
1762             BignumInt addword = (i <= (int)addend[0] ? addend[i] : 0);\r
1763             BignumADC(ret[i], carry, retword, addword, carry);\r
1764             if (ret[i] != 0 && i > maxspot)\r
1765                 maxspot = i;\r
1766         }\r
1767     }\r
1768     ret[0] = maxspot;\r
1769 \r
1770     smemclr(workspace, wslen * sizeof(*workspace));\r
1771     sfree(workspace);\r
1772     return ret;\r
1773 }\r
1774 \r
1775 /*\r
1776  * Non-modular multiplication.\r
1777  */\r
1778 Bignum bigmul(Bignum a, Bignum b)\r
1779 {\r
1780     return bigmuladd(a, b, NULL);\r
1781 }\r
1782 \r
1783 /*\r
1784  * Simple addition.\r
1785  */\r
1786 Bignum bigadd(Bignum a, Bignum b)\r
1787 {\r
1788     int alen = a[0], blen = b[0];\r
1789     int rlen = (alen > blen ? alen : blen) + 1;\r
1790     int i, maxspot;\r
1791     Bignum ret;\r
1792     BignumCarry carry;\r
1793 \r
1794     ret = newbn(rlen);\r
1795 \r
1796     carry = 0;\r
1797     maxspot = 0;\r
1798     for (i = 1; i <= rlen; i++) {\r
1799         BignumInt aword = (i <= (int)a[0] ? a[i] : 0);\r
1800         BignumInt bword = (i <= (int)b[0] ? b[i] : 0);\r
1801         BignumADC(ret[i], carry, aword, bword, carry);\r
1802         if (ret[i] != 0 && i > maxspot)\r
1803             maxspot = i;\r
1804     }\r
1805     ret[0] = maxspot;\r
1806 \r
1807     return ret;\r
1808 }\r
1809 \r
1810 /*\r
1811  * Subtraction. Returns a-b, or NULL if the result would come out\r
1812  * negative (recall that this entire bignum module only handles\r
1813  * positive numbers).\r
1814  */\r
1815 Bignum bigsub(Bignum a, Bignum b)\r
1816 {\r
1817     int alen = a[0], blen = b[0];\r
1818     int rlen = (alen > blen ? alen : blen);\r
1819     int i, maxspot;\r
1820     Bignum ret;\r
1821     BignumCarry carry;\r
1822 \r
1823     ret = newbn(rlen);\r
1824 \r
1825     carry = 1;\r
1826     maxspot = 0;\r
1827     for (i = 1; i <= rlen; i++) {\r
1828         BignumInt aword = (i <= (int)a[0] ? a[i] : 0);\r
1829         BignumInt bword = (i <= (int)b[0] ? b[i] : 0);\r
1830         BignumADC(ret[i], carry, aword, ~bword, carry);\r
1831         if (ret[i] != 0 && i > maxspot)\r
1832             maxspot = i;\r
1833     }\r
1834     ret[0] = maxspot;\r
1835 \r
1836     if (!carry) {\r
1837         freebn(ret);\r
1838         return NULL;\r
1839     }\r
1840 \r
1841     return ret;\r
1842 }\r
1843 \r
1844 /*\r
1845  * Create a bignum which is the bitmask covering another one. That\r
1846  * is, the smallest integer which is >= N and is also one less than\r
1847  * a power of two.\r
1848  */\r
1849 Bignum bignum_bitmask(Bignum n)\r
1850 {\r
1851     Bignum ret = copybn(n);\r
1852     int i;\r
1853     BignumInt j;\r
1854 \r
1855     i = ret[0];\r
1856     while (n[i] == 0 && i > 0)\r
1857         i--;\r
1858     if (i <= 0)\r
1859         return ret;                    /* input was zero */\r
1860     j = 1;\r
1861     while (j < n[i])\r
1862         j = 2 * j + 1;\r
1863     ret[i] = j;\r
1864     while (--i > 0)\r
1865         ret[i] = BIGNUM_INT_MASK;\r
1866     return ret;\r
1867 }\r
1868 \r
1869 /*\r
1870  * Convert an unsigned long into a bignum.\r
1871  */\r
1872 Bignum bignum_from_long(unsigned long n)\r
1873 {\r
1874     const int maxwords =\r
1875         (sizeof(unsigned long) + sizeof(BignumInt) - 1) / sizeof(BignumInt);\r
1876     Bignum ret;\r
1877     int i;\r
1878 \r
1879     ret = newbn(maxwords);\r
1880     ret[0] = 0;\r
1881     for (i = 0; i < maxwords; i++) {\r
1882         ret[i+1] = n >> (i * BIGNUM_INT_BITS);\r
1883         if (ret[i+1] != 0)\r
1884             ret[0] = i+1;\r
1885     }\r
1886 \r
1887     return ret;\r
1888 }\r
1889 \r
1890 /*\r
1891  * Add a long to a bignum.\r
1892  */\r
1893 Bignum bignum_add_long(Bignum number, unsigned long n)\r
1894 {\r
1895     const int maxwords =\r
1896         (sizeof(unsigned long) + sizeof(BignumInt) - 1) / sizeof(BignumInt);\r
1897     Bignum ret;\r
1898     int words, i;\r
1899     BignumCarry carry;\r
1900 \r
1901     words = number[0];\r
1902     if (words < maxwords)\r
1903         words = maxwords;\r
1904     words++;\r
1905     ret = newbn(words);\r
1906 \r
1907     carry = 0;\r
1908     ret[0] = 0;\r
1909     for (i = 0; i < words; i++) {\r
1910         BignumInt nword = (i < maxwords ? n >> (i * BIGNUM_INT_BITS) : 0);\r
1911         BignumInt numword = (i < number[0] ? number[i+1] : 0);\r
1912         BignumADC(ret[i+1], carry, numword, nword, carry);\r
1913         if (ret[i+1] != 0)\r
1914             ret[0] = i+1;\r
1915     }\r
1916     return ret;\r
1917 }\r
1918 \r
1919 /*\r
1920  * Compute the residue of a bignum, modulo a (max 16-bit) short.\r
1921  */\r
1922 unsigned short bignum_mod_short(Bignum number, unsigned short modulus)\r
1923 {\r
1924     unsigned long mod = modulus, r = 0;\r
1925     /* Precompute (BIGNUM_INT_MASK+1) % mod */\r
1926     unsigned long base_r = (BIGNUM_INT_MASK - modulus + 1) % mod;\r
1927     int i;\r
1928 \r
1929     for (i = number[0]; i > 0; i--) {\r
1930         /*\r
1931          * Conceptually, ((r << BIGNUM_INT_BITS) + number[i]) % mod\r
1932          */\r
1933         r = ((r * base_r) + (number[i] % mod)) % mod;\r
1934     }\r
1935     return (unsigned short) r;\r
1936 }\r
1937 \r
1938 #ifdef DEBUG\r
1939 void diagbn(char *prefix, Bignum md)\r
1940 {\r
1941     int i, nibbles, morenibbles;\r
1942     static const char hex[] = "0123456789ABCDEF";\r
1943 \r
1944     debug(("%s0x", prefix ? prefix : ""));\r
1945 \r
1946     nibbles = (3 + bignum_bitcount(md)) / 4;\r
1947     if (nibbles < 1)\r
1948         nibbles = 1;\r
1949     morenibbles = 4 * md[0] - nibbles;\r
1950     for (i = 0; i < morenibbles; i++)\r
1951         debug(("-"));\r
1952     for (i = nibbles; i--;)\r
1953         debug(("%c",\r
1954                hex[(bignum_byte(md, i / 2) >> (4 * (i % 2))) & 0xF]));\r
1955 \r
1956     if (prefix)\r
1957         debug(("\n"));\r
1958 }\r
1959 #endif\r
1960 \r
1961 /*\r
1962  * Simple division.\r
1963  */\r
1964 Bignum bigdiv(Bignum a, Bignum b)\r
1965 {\r
1966     Bignum q = newbn(a[0]);\r
1967     bigdivmod(a, b, NULL, q);\r
1968     while (q[0] > 1 && q[q[0]] == 0)\r
1969         q[0]--;\r
1970     return q;\r
1971 }\r
1972 \r
1973 /*\r
1974  * Simple remainder.\r
1975  */\r
1976 Bignum bigmod(Bignum a, Bignum b)\r
1977 {\r
1978     Bignum r = newbn(b[0]);\r
1979     bigdivmod(a, b, r, NULL);\r
1980     while (r[0] > 1 && r[r[0]] == 0)\r
1981         r[0]--;\r
1982     return r;\r
1983 }\r
1984 \r
1985 /*\r
1986  * Greatest common divisor.\r
1987  */\r
1988 Bignum biggcd(Bignum av, Bignum bv)\r
1989 {\r
1990     Bignum a = copybn(av);\r
1991     Bignum b = copybn(bv);\r
1992 \r
1993     while (bignum_cmp(b, Zero) != 0) {\r
1994         Bignum t = newbn(b[0]);\r
1995         bigdivmod(a, b, t, NULL);\r
1996         while (t[0] > 1 && t[t[0]] == 0)\r
1997             t[0]--;\r
1998         freebn(a);\r
1999         a = b;\r
2000         b = t;\r
2001     }\r
2002 \r
2003     freebn(b);\r
2004     return a;\r
2005 }\r
2006 \r
2007 /*\r
2008  * Modular inverse, using Euclid's extended algorithm.\r
2009  */\r
2010 Bignum modinv(Bignum number, Bignum modulus)\r
2011 {\r
2012     Bignum a = copybn(modulus);\r
2013     Bignum b = copybn(number);\r
2014     Bignum xp = copybn(Zero);\r
2015     Bignum x = copybn(One);\r
2016     int sign = +1;\r
2017 \r
2018     assert(number[number[0]] != 0);\r
2019     assert(modulus[modulus[0]] != 0);\r
2020 \r
2021     while (bignum_cmp(b, One) != 0) {\r
2022         Bignum t, q;\r
2023 \r
2024         if (bignum_cmp(b, Zero) == 0) {\r
2025             /*\r
2026              * Found a common factor between the inputs, so we cannot\r
2027              * return a modular inverse at all.\r
2028              */\r
2029             freebn(b);\r
2030             freebn(a);\r
2031             freebn(xp);\r
2032             freebn(x);\r
2033             return NULL;\r
2034         }\r
2035 \r
2036         t = newbn(b[0]);\r
2037         q = newbn(a[0]);\r
2038         bigdivmod(a, b, t, q);\r
2039         while (t[0] > 1 && t[t[0]] == 0)\r
2040             t[0]--;\r
2041         while (q[0] > 1 && q[q[0]] == 0)\r
2042             q[0]--;\r
2043         freebn(a);\r
2044         a = b;\r
2045         b = t;\r
2046         t = xp;\r
2047         xp = x;\r
2048         x = bigmuladd(q, xp, t);\r
2049         sign = -sign;\r
2050         freebn(t);\r
2051         freebn(q);\r
2052     }\r
2053 \r
2054     freebn(b);\r
2055     freebn(a);\r
2056     freebn(xp);\r
2057 \r
2058     /* now we know that sign * x == 1, and that x < modulus */\r
2059     if (sign < 0) {\r
2060         /* set a new x to be modulus - x */\r
2061         Bignum newx = newbn(modulus[0]);\r
2062         BignumInt carry = 0;\r
2063         int maxspot = 1;\r
2064         int i;\r
2065 \r
2066         for (i = 1; i <= (int)newx[0]; i++) {\r
2067             BignumInt aword = (i <= (int)modulus[0] ? modulus[i] : 0);\r
2068             BignumInt bword = (i <= (int)x[0] ? x[i] : 0);\r
2069             newx[i] = aword - bword - carry;\r
2070             bword = ~bword;\r
2071             carry = carry ? (newx[i] >= bword) : (newx[i] > bword);\r
2072             if (newx[i] != 0)\r
2073                 maxspot = i;\r
2074         }\r
2075         newx[0] = maxspot;\r
2076         freebn(x);\r
2077         x = newx;\r
2078     }\r
2079 \r
2080     /* and return. */\r
2081     return x;\r
2082 }\r
2083 \r
2084 /*\r
2085  * Render a bignum into decimal. Return a malloced string holding\r
2086  * the decimal representation.\r
2087  */\r
2088 char *bignum_decimal(Bignum x)\r
2089 {\r
2090     int ndigits, ndigit;\r
2091     int i, iszero;\r
2092     BignumInt carry;\r
2093     char *ret;\r
2094     BignumInt *workspace;\r
2095 \r
2096     /*\r
2097      * First, estimate the number of digits. Since log(10)/log(2)\r
2098      * is just greater than 93/28 (the joys of continued fraction\r
2099      * approximations...) we know that for every 93 bits, we need\r
2100      * at most 28 digits. This will tell us how much to malloc.\r
2101      *\r
2102      * Formally: if x has i bits, that means x is strictly less\r
2103      * than 2^i. Since 2 is less than 10^(28/93), this is less than\r
2104      * 10^(28i/93). We need an integer power of ten, so we must\r
2105      * round up (rounding down might make it less than x again).\r
2106      * Therefore if we multiply the bit count by 28/93, rounding\r
2107      * up, we will have enough digits.\r
2108      *\r
2109      * i=0 (i.e., x=0) is an irritating special case.\r
2110      */\r
2111     i = bignum_bitcount(x);\r
2112     if (!i)\r
2113         ndigits = 1;                   /* x = 0 */\r
2114     else\r
2115         ndigits = (28 * i + 92) / 93;  /* multiply by 28/93 and round up */\r
2116     ndigits++;                         /* allow for trailing \0 */\r
2117     ret = snewn(ndigits, char);\r
2118 \r
2119     /*\r
2120      * Now allocate some workspace to hold the binary form as we\r
2121      * repeatedly divide it by ten. Initialise this to the\r
2122      * big-endian form of the number.\r
2123      */\r
2124     workspace = snewn(x[0], BignumInt);\r
2125     for (i = 0; i < (int)x[0]; i++)\r
2126         workspace[i] = x[x[0] - i];\r
2127 \r
2128     /*\r
2129      * Next, write the decimal number starting with the last digit.\r
2130      * We use ordinary short division, dividing 10 into the\r
2131      * workspace.\r
2132      */\r
2133     ndigit = ndigits - 1;\r
2134     ret[ndigit] = '\0';\r
2135     do {\r
2136         iszero = 1;\r
2137         carry = 0;\r
2138         for (i = 0; i < (int)x[0]; i++) {\r
2139             /*\r
2140              * Conceptually, we want to compute\r
2141              *\r
2142              *   (carry << BIGNUM_INT_BITS) + workspace[i]\r
2143              *   -----------------------------------------\r
2144              *                      10\r
2145              *\r
2146              * but we don't have an integer type longer than BignumInt\r
2147              * to work with. So we have to do it in pieces.\r
2148              */\r
2149 \r
2150             BignumInt q, r;\r
2151             q = workspace[i] / 10;\r
2152             r = workspace[i] % 10;\r
2153 \r
2154             /* I want (BIGNUM_INT_MASK+1)/10 but can't say so directly! */\r
2155             q += carry * ((BIGNUM_INT_MASK-9) / 10 + 1);\r
2156             r += carry * ((BIGNUM_INT_MASK-9) % 10);\r
2157 \r
2158             q += r / 10;\r
2159             r %= 10;\r
2160 \r
2161             workspace[i] = q;\r
2162             carry = r;\r
2163 \r
2164             if (workspace[i])\r
2165                 iszero = 0;\r
2166         }\r
2167         ret[--ndigit] = (char) (carry + '0');\r
2168     } while (!iszero);\r
2169 \r
2170     /*\r
2171      * There's a chance we've fallen short of the start of the\r
2172      * string. Correct if so.\r
2173      */\r
2174     if (ndigit > 0)\r
2175         memmove(ret, ret + ndigit, ndigits - ndigit);\r
2176 \r
2177     /*\r
2178      * Done.\r
2179      */\r
2180     smemclr(workspace, x[0] * sizeof(*workspace));\r
2181     sfree(workspace);\r
2182     return ret;\r
2183 }\r