src/TortoisePlink/SSHBN.C

   1 /*\r
   2  * Bignum routines for RSA and DH and stuff.\r
   3  */\r
   4 \r
   5 #include <stdio.h>\r
   6 #include <assert.h>\r
   7 #include <stdlib.h>\r
   8 #include <string.h>\r
   9 #include <limits.h>\r
  10 \r
  11 #include "misc.h"\r
  12 \r
  13 /*\r
  14  * Usage notes:\r
  15  *  * Do not call the DIVMOD_WORD macro with expressions such as array\r
  16  *    subscripts, as some implementations object to this (see below).\r
  17  *  * Note that none of the division methods below will cope if the\r
  18  *    quotient won't fit into BIGNUM_INT_BITS. Callers should be careful\r
  19  *    to avoid this case.\r
  20  *    If this condition occurs, in the case of the x86 DIV instruction,\r
  21  *    an overflow exception will occur, which (according to a correspondent)\r
  22  *    will manifest on Windows as something like\r
  23  *      0xC0000095: Integer overflow\r
  24  *    The C variant won't give the right answer, either.\r
  25  */\r
  26 \r
  27 #if defined __GNUC__ && defined __i386__\r
  28 typedef unsigned long BignumInt;\r
  29 typedef unsigned long long BignumDblInt;\r
  30 #define BIGNUM_INT_MASK  0xFFFFFFFFUL\r
  31 #define BIGNUM_TOP_BIT   0x80000000UL\r
  32 #define BIGNUM_INT_BITS  32\r
  33 #define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)\r
  34 #define DIVMOD_WORD(q, r, hi, lo, w) \\r
  35     __asm__("div %2" : \\r
  36             "=d" (r), "=a" (q) : \\r
  37             "r" (w), "d" (hi), "a" (lo))\r
  38 #elif defined _MSC_VER && defined _M_IX86\r
  39 typedef unsigned __int32 BignumInt;\r
  40 typedef unsigned __int64 BignumDblInt;\r
  41 #define BIGNUM_INT_MASK  0xFFFFFFFFUL\r
  42 #define BIGNUM_TOP_BIT   0x80000000UL\r
  43 #define BIGNUM_INT_BITS  32\r
  44 #define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)\r
  45 /* Note: MASM interprets array subscripts in the macro arguments as\r
  46  * assembler syntax, which gives the wrong answer. Don't supply them.\r
  47  * <http://msdn2.microsoft.com/en-us/library/bf1dw62z.aspx> */\r
  48 #define DIVMOD_WORD(q, r, hi, lo, w) do { \\r
  49     __asm mov edx, hi \\r
  50     __asm mov eax, lo \\r
  51     __asm div w \\r
  52     __asm mov r, edx \\r
  53     __asm mov q, eax \\r
  54 } while(0)\r
  55 #elif defined _LP64\r
  56 /* 64-bit architectures can do 32x32->64 chunks at a time */\r
  57 typedef unsigned int BignumInt;\r
  58 typedef unsigned long BignumDblInt;\r
  59 #define BIGNUM_INT_MASK  0xFFFFFFFFU\r
  60 #define BIGNUM_TOP_BIT   0x80000000U\r
  61 #define BIGNUM_INT_BITS  32\r
  62 #define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)\r
  63 #define DIVMOD_WORD(q, r, hi, lo, w) do { \\r
  64     BignumDblInt n = (((BignumDblInt)hi) << BIGNUM_INT_BITS) | lo; \\r
  65     q = n / w; \\r
  66     r = n % w; \\r
  67 } while (0)\r
  68 #elif defined _LLP64\r
  69 /* 64-bit architectures in which unsigned long is 32 bits, not 64 */\r
  70 typedef unsigned long BignumInt;\r
  71 typedef unsigned long long BignumDblInt;\r
  72 #define BIGNUM_INT_MASK  0xFFFFFFFFUL\r
  73 #define BIGNUM_TOP_BIT   0x80000000UL\r
  74 #define BIGNUM_INT_BITS  32\r
  75 #define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)\r
  76 #define DIVMOD_WORD(q, r, hi, lo, w) do { \\r
  77     BignumDblInt n = (((BignumDblInt)hi) << BIGNUM_INT_BITS) | lo; \\r
  78     q = n / w; \\r
  79     r = n % w; \\r
  80 } while (0)\r
  81 #else\r
  82 /* Fallback for all other cases */\r
  83 typedef unsigned short BignumInt;\r
  84 typedef unsigned long BignumDblInt;\r
  85 #define BIGNUM_INT_MASK  0xFFFFU\r
  86 #define BIGNUM_TOP_BIT   0x8000U\r
  87 #define BIGNUM_INT_BITS  16\r
  88 #define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)\r
  89 #define DIVMOD_WORD(q, r, hi, lo, w) do { \\r
  90     BignumDblInt n = (((BignumDblInt)hi) << BIGNUM_INT_BITS) | lo; \\r
  91     q = n / w; \\r
  92     r = n % w; \\r
  93 } while (0)\r
  94 #endif\r
  95 \r
  96 #define BIGNUM_INT_BYTES (BIGNUM_INT_BITS / 8)\r
  97 \r
  98 #define BIGNUM_INTERNAL\r
  99 typedef BignumInt *Bignum;\r
 100 \r
 101 #include "ssh.h"\r
 102 \r
 103 BignumInt bnZero[1] = { 0 };\r
 104 BignumInt bnOne[2] = { 1, 1 };\r
 105 \r
 106 /*\r
 107  * The Bignum format is an array of `BignumInt'. The first\r
 108  * element of the array counts the remaining elements. The\r
 109  * remaining elements express the actual number, base 2^BIGNUM_INT_BITS, _least_\r
 110  * significant digit first. (So it's trivial to extract the bit\r
 111  * with value 2^n for any n.)\r
 112  *\r
 113  * All Bignums in this module are positive. Negative numbers must\r
 114  * be dealt with outside it.\r
 115  *\r
 116  * INVARIANT: the most significant word of any Bignum must be\r
 117  * nonzero.\r
 118  */\r
 119 \r
 120 Bignum Zero = bnZero, One = bnOne;\r
 121 \r
 122 static Bignum newbn(int length)\r
 123 {\r
 124     Bignum b;\r
 125 \r
 126     assert(length >= 0 && length < INT_MAX / BIGNUM_INT_BITS);\r
 127 \r
 128     b = snewn(length + 1, BignumInt);\r
 129     if (!b)\r
 130         abort();                       /* FIXME */\r
 131     memset(b, 0, (length + 1) * sizeof(*b));\r
 132     b[0] = length;\r
 133     return b;\r
 134 }\r
 135 \r
 136 void bn_restore_invariant(Bignum b)\r
 137 {\r
 138     while (b[0] > 1 && b[b[0]] == 0)\r
 139         b[0]--;\r
 140 }\r
 141 \r
 142 Bignum copybn(Bignum orig)\r
 143 {\r
 144     Bignum b = snewn(orig[0] + 1, BignumInt);\r
 145     if (!b)\r
 146         abort();                       /* FIXME */\r
 147     memcpy(b, orig, (orig[0] + 1) * sizeof(*b));\r
 148     return b;\r
 149 }\r
 150 \r
 151 void freebn(Bignum b)\r
 152 {\r
 153     /*\r
 154      * Burn the evidence, just in case.\r
 155      */\r
 156     smemclr(b, sizeof(b[0]) * (b[0] + 1));\r
 157     sfree(b);\r
 158 }\r
 159 \r
 160 Bignum bn_power_2(int n)\r
 161 {\r
 162     Bignum ret;\r
 163 \r
 164     assert(n >= 0);\r
 165 \r
 166     ret = newbn(n / BIGNUM_INT_BITS + 1);\r
 167     bignum_set_bit(ret, n, 1);\r
 168     return ret;\r
 169 }\r
 170 \r
 171 /*\r
 172  * Internal addition. Sets c = a - b, where 'a', 'b' and 'c' are all\r
 173  * big-endian arrays of 'len' BignumInts. Returns a BignumInt carried\r
 174  * off the top.\r
 175  */\r
 176 static BignumInt internal_add(const BignumInt *a, const BignumInt *b,\r
 177                               BignumInt *c, int len)\r
 178 {\r
 179     int i;\r
 180     BignumDblInt carry = 0;\r
 181 \r
 182     for (i = len-1; i >= 0; i--) {\r
 183         carry += (BignumDblInt)a[i] + b[i];\r
 184         c[i] = (BignumInt)carry;\r
 185         carry >>= BIGNUM_INT_BITS;\r
 186     }\r
 187 \r
 188     return (BignumInt)carry;\r
 189 }\r
 190 \r
 191 /*\r
 192  * Internal subtraction. Sets c = a - b, where 'a', 'b' and 'c' are\r
 193  * all big-endian arrays of 'len' BignumInts. Any borrow from the top\r
 194  * is ignored.\r
 195  */\r
 196 static void internal_sub(const BignumInt *a, const BignumInt *b,\r
 197                          BignumInt *c, int len)\r
 198 {\r
 199     int i;\r
 200     BignumDblInt carry = 1;\r
 201 \r
 202     for (i = len-1; i >= 0; i--) {\r
 203         carry += (BignumDblInt)a[i] + (b[i] ^ BIGNUM_INT_MASK);\r
 204         c[i] = (BignumInt)carry;\r
 205         carry >>= BIGNUM_INT_BITS;\r
 206     }\r
 207 }\r
 208 \r
 209 /*\r
 210  * Compute c = a * b.\r
 211  * Input is in the first len words of a and b.\r
 212  * Result is returned in the first 2*len words of c.\r
 213  *\r
 214  * 'scratch' must point to an array of BignumInt of size at least\r
 215  * mul_compute_scratch(len). (This covers the needs of internal_mul\r
 216  * and all its recursive calls to itself.)\r
 217  */\r
 218 #define KARATSUBA_THRESHOLD 50\r
 219 static int mul_compute_scratch(int len)\r
 220 {\r
 221     int ret = 0;\r
 222     while (len > KARATSUBA_THRESHOLD) {\r
 223         int toplen = len/2, botlen = len - toplen; /* botlen is the bigger */\r
 224         int midlen = botlen + 1;\r
 225         ret += 4*midlen;\r
 226         len = midlen;\r
 227     }\r
 228     return ret;\r
 229 }\r
 230 static void internal_mul(const BignumInt *a, const BignumInt *b,\r
 231                          BignumInt *c, int len, BignumInt *scratch)\r
 232 {\r
 233     if (len > KARATSUBA_THRESHOLD) {\r
 234         int i;\r
 235 \r
 236         /*\r
 237          * Karatsuba divide-and-conquer algorithm. Cut each input in\r
 238          * half, so that it's expressed as two big 'digits' in a giant\r
 239          * base D:\r
 240          *\r
 241          *   a = a_1 D + a_0\r
 242          *   b = b_1 D + b_0\r
 243          *\r
 244          * Then the product is of course\r
 245          *\r
 246          *  ab = a_1 b_1 D^2 + (a_1 b_0 + a_0 b_1) D + a_0 b_0\r
 247          *\r
 248          * and we compute the three coefficients by recursively\r
 249          * calling ourself to do half-length multiplications.\r
 250          *\r
 251          * The clever bit that makes this worth doing is that we only\r
 252          * need _one_ half-length multiplication for the central\r
 253          * coefficient rather than the two that it obviouly looks\r
 254          * like, because we can use a single multiplication to compute\r
 255          *\r
 256          *   (a_1 + a_0) (b_1 + b_0) = a_1 b_1 + a_1 b_0 + a_0 b_1 + a_0 b_0\r
 257          *\r
 258          * and then we subtract the other two coefficients (a_1 b_1\r
 259          * and a_0 b_0) which we were computing anyway.\r
 260          *\r
 261          * Hence we get to multiply two numbers of length N in about\r
 262          * three times as much work as it takes to multiply numbers of\r
 263          * length N/2, which is obviously better than the four times\r
 264          * as much work it would take if we just did a long\r
 265          * conventional multiply.\r
 266          */\r
 267 \r
 268         int toplen = len/2, botlen = len - toplen; /* botlen is the bigger */\r
 269         int midlen = botlen + 1;\r
 270         BignumDblInt carry;\r
 271 #ifdef KARA_DEBUG\r
 272         int i;\r
 273 #endif\r
 274 \r
 275         /*\r
 276          * The coefficients a_1 b_1 and a_0 b_0 just avoid overlapping\r
 277          * in the output array, so we can compute them immediately in\r
 278          * place.\r
 279          */\r
 280 \r
 281 #ifdef KARA_DEBUG\r
 282         printf("a1,a0 = 0x");\r
 283         for (i = 0; i < len; i++) {\r
 284             if (i == toplen) printf(", 0x");\r
 285             printf("%0*x", BIGNUM_INT_BITS/4, a[i]);\r
 286         }\r
 287         printf("\n");\r
 288         printf("b1,b0 = 0x");\r
 289         for (i = 0; i < len; i++) {\r
 290             if (i == toplen) printf(", 0x");\r
 291             printf("%0*x", BIGNUM_INT_BITS/4, b[i]);\r
 292         }\r
 293         printf("\n");\r
 294 #endif\r
 295 \r
 296         /* a_1 b_1 */\r
 297         internal_mul(a, b, c, toplen, scratch);\r
 298 #ifdef KARA_DEBUG\r
 299         printf("a1b1 = 0x");\r
 300         for (i = 0; i < 2*toplen; i++) {\r
 301             printf("%0*x", BIGNUM_INT_BITS/4, c[i]);\r
 302         }\r
 303         printf("\n");\r
 304 #endif\r
 305 \r
 306         /* a_0 b_0 */\r
 307         internal_mul(a + toplen, b + toplen, c + 2*toplen, botlen, scratch);\r
 308 #ifdef KARA_DEBUG\r
 309         printf("a0b0 = 0x");\r
 310         for (i = 0; i < 2*botlen; i++) {\r
 311             printf("%0*x", BIGNUM_INT_BITS/4, c[2*toplen+i]);\r
 312         }\r
 313         printf("\n");\r
 314 #endif\r
 315 \r
 316         /* Zero padding. midlen exceeds toplen by at most 2, so just\r
 317          * zero the first two words of each input and the rest will be\r
 318          * copied over. */\r
 319         scratch[0] = scratch[1] = scratch[midlen] = scratch[midlen+1] = 0;\r
 320 \r
 321         for (i = 0; i < toplen; i++) {\r
 322             scratch[midlen - toplen + i] = a[i]; /* a_1 */\r
 323             scratch[2*midlen - toplen + i] = b[i]; /* b_1 */\r
 324         }\r
 325 \r
 326         /* compute a_1 + a_0 */\r
 327         scratch[0] = internal_add(scratch+1, a+toplen, scratch+1, botlen);\r
 328 #ifdef KARA_DEBUG\r
 329         printf("a1plusa0 = 0x");\r
 330         for (i = 0; i < midlen; i++) {\r
 331             printf("%0*x", BIGNUM_INT_BITS/4, scratch[i]);\r
 332         }\r
 333         printf("\n");\r
 334 #endif\r
 335         /* compute b_1 + b_0 */\r
 336         scratch[midlen] = internal_add(scratch+midlen+1, b+toplen,\r
 337                                        scratch+midlen+1, botlen);\r
 338 #ifdef KARA_DEBUG\r
 339         printf("b1plusb0 = 0x");\r
 340         for (i = 0; i < midlen; i++) {\r
 341             printf("%0*x", BIGNUM_INT_BITS/4, scratch[midlen+i]);\r
 342         }\r
 343         printf("\n");\r
 344 #endif\r
 345 \r
 346         /*\r
 347          * Now we can do the third multiplication.\r
 348          */\r
 349         internal_mul(scratch, scratch + midlen, scratch + 2*midlen, midlen,\r
 350                      scratch + 4*midlen);\r
 351 #ifdef KARA_DEBUG\r
 352         printf("a1plusa0timesb1plusb0 = 0x");\r
 353         for (i = 0; i < 2*midlen; i++) {\r
 354             printf("%0*x", BIGNUM_INT_BITS/4, scratch[2*midlen+i]);\r
 355         }\r
 356         printf("\n");\r
 357 #endif\r
 358 \r
 359         /*\r
 360          * Now we can reuse the first half of 'scratch' to compute the\r
 361          * sum of the outer two coefficients, to subtract from that\r
 362          * product to obtain the middle one.\r
 363          */\r
 364         scratch[0] = scratch[1] = scratch[2] = scratch[3] = 0;\r
 365         for (i = 0; i < 2*toplen; i++)\r
 366             scratch[2*midlen - 2*toplen + i] = c[i];\r
 367         scratch[1] = internal_add(scratch+2, c + 2*toplen,\r
 368                                   scratch+2, 2*botlen);\r
 369 #ifdef KARA_DEBUG\r
 370         printf("a1b1plusa0b0 = 0x");\r
 371         for (i = 0; i < 2*midlen; i++) {\r
 372             printf("%0*x", BIGNUM_INT_BITS/4, scratch[i]);\r
 373         }\r
 374         printf("\n");\r
 375 #endif\r
 376 \r
 377         internal_sub(scratch + 2*midlen, scratch,\r
 378                      scratch + 2*midlen, 2*midlen);\r
 379 #ifdef KARA_DEBUG\r
 380         printf("a1b0plusa0b1 = 0x");\r
 381         for (i = 0; i < 2*midlen; i++) {\r
 382             printf("%0*x", BIGNUM_INT_BITS/4, scratch[2*midlen+i]);\r
 383         }\r
 384         printf("\n");\r
 385 #endif\r
 386 \r
 387         /*\r
 388          * And now all we need to do is to add that middle coefficient\r
 389          * back into the output. We may have to propagate a carry\r
 390          * further up the output, but we can be sure it won't\r
 391          * propagate right the way off the top.\r
 392          */\r
 393         carry = internal_add(c + 2*len - botlen - 2*midlen,\r
 394                              scratch + 2*midlen,\r
 395                              c + 2*len - botlen - 2*midlen, 2*midlen);\r
 396         i = 2*len - botlen - 2*midlen - 1;\r
 397         while (carry) {\r
 398             assert(i >= 0);\r
 399             carry += c[i];\r
 400             c[i] = (BignumInt)carry;\r
 401             carry >>= BIGNUM_INT_BITS;\r
 402             i--;\r
 403         }\r
 404 #ifdef KARA_DEBUG\r
 405         printf("ab = 0x");\r
 406         for (i = 0; i < 2*len; i++) {\r
 407             printf("%0*x", BIGNUM_INT_BITS/4, c[i]);\r
 408         }\r
 409         printf("\n");\r
 410 #endif\r
 411 \r
 412     } else {\r
 413         int i;\r
 414         BignumInt carry;\r
 415         BignumDblInt t;\r
 416         const BignumInt *ap, *bp;\r
 417         BignumInt *cp, *cps;\r
 418 \r
 419         /*\r
 420          * Multiply in the ordinary O(N^2) way.\r
 421          */\r
 422 \r
 423         for (i = 0; i < 2 * len; i++)\r
 424             c[i] = 0;\r
 425 \r
 426         for (cps = c + 2*len, ap = a + len; ap-- > a; cps--) {\r
 427             carry = 0;\r
 428             for (cp = cps, bp = b + len; cp--, bp-- > b ;) {\r
 429                 t = (MUL_WORD(*ap, *bp) + carry) + *cp;\r
 430                 *cp = (BignumInt) t;\r
 431                 carry = (BignumInt)(t >> BIGNUM_INT_BITS);\r
 432             }\r
 433             *cp = carry;\r
 434         }\r
 435     }\r
 436 }\r
 437 \r
 438 /*\r
 439  * Variant form of internal_mul used for the initial step of\r
 440  * Montgomery reduction. Only bothers outputting 'len' words\r
 441  * (everything above that is thrown away).\r
 442  */\r
 443 static void internal_mul_low(const BignumInt *a, const BignumInt *b,\r
 444                              BignumInt *c, int len, BignumInt *scratch)\r
 445 {\r
 446     if (len > KARATSUBA_THRESHOLD) {\r
 447         int i;\r
 448 \r
 449         /*\r
 450          * Karatsuba-aware version of internal_mul_low. As before, we\r
 451          * express each input value as a shifted combination of two\r
 452          * halves:\r
 453          *\r
 454          *   a = a_1 D + a_0\r
 455          *   b = b_1 D + b_0\r
 456          *\r
 457          * Then the full product is, as before,\r
 458          *\r
 459          *  ab = a_1 b_1 D^2 + (a_1 b_0 + a_0 b_1) D + a_0 b_0\r
 460          *\r
 461          * Provided we choose D on the large side (so that a_0 and b_0\r
 462          * are _at least_ as long as a_1 and b_1), we don't need the\r
 463          * topmost term at all, and we only need half of the middle\r
 464          * term. So there's no point in doing the proper Karatsuba\r
 465          * optimisation which computes the middle term using the top\r
 466          * one, because we'd take as long computing the top one as\r
 467          * just computing the middle one directly.\r
 468          *\r
 469          * So instead, we do a much more obvious thing: we call the\r
 470          * fully optimised internal_mul to compute a_0 b_0, and we\r
 471          * recursively call ourself to compute the _bottom halves_ of\r
 472          * a_1 b_0 and a_0 b_1, each of which we add into the result\r
 473          * in the obvious way.\r
 474          *\r
 475          * In other words, there's no actual Karatsuba _optimisation_\r
 476          * in this function; the only benefit in doing it this way is\r
 477          * that we call internal_mul proper for a large part of the\r
 478          * work, and _that_ can optimise its operation.\r
 479          */\r
 480 \r
 481         int toplen = len/2, botlen = len - toplen; /* botlen is the bigger */\r
 482 \r
 483         /*\r
 484          * Scratch space for the various bits and pieces we're going\r
 485          * to be adding together: we need botlen*2 words for a_0 b_0\r
 486          * (though we may end up throwing away its topmost word), and\r
 487          * toplen words for each of a_1 b_0 and a_0 b_1. That adds up\r
 488          * to exactly 2*len.\r
 489          */\r
 490 \r
 491         /* a_0 b_0 */\r
 492         internal_mul(a + toplen, b + toplen, scratch + 2*toplen, botlen,\r
 493                      scratch + 2*len);\r
 494 \r
 495         /* a_1 b_0 */\r
 496         internal_mul_low(a, b + len - toplen, scratch + toplen, toplen,\r
 497                          scratch + 2*len);\r
 498 \r
 499         /* a_0 b_1 */\r
 500         internal_mul_low(a + len - toplen, b, scratch, toplen,\r
 501                          scratch + 2*len);\r
 502 \r
 503         /* Copy the bottom half of the big coefficient into place */\r
 504         for (i = 0; i < botlen; i++)\r
 505             c[toplen + i] = scratch[2*toplen + botlen + i];\r
 506 \r
 507         /* Add the two small coefficients, throwing away the returned carry */\r
 508         internal_add(scratch, scratch + toplen, scratch, toplen);\r
 509 \r
 510         /* And add that to the large coefficient, leaving the result in c. */\r
 511         internal_add(scratch, scratch + 2*toplen + botlen - toplen,\r
 512                      c, toplen);\r
 513 \r
 514     } else {\r
 515         int i;\r
 516         BignumInt carry;\r
 517         BignumDblInt t;\r
 518         const BignumInt *ap, *bp;\r
 519         BignumInt *cp, *cps;\r
 520 \r
 521         /*\r
 522          * Multiply in the ordinary O(N^2) way.\r
 523          */\r
 524 \r
 525         for (i = 0; i < len; i++)\r
 526             c[i] = 0;\r
 527 \r
 528         for (cps = c + len, ap = a + len; ap-- > a; cps--) {\r
 529             carry = 0;\r
 530             for (cp = cps, bp = b + len; bp--, cp-- > c ;) {\r
 531                 t = (MUL_WORD(*ap, *bp) + carry) + *cp;\r
 532                 *cp = (BignumInt) t;\r
 533                 carry = (BignumInt)(t >> BIGNUM_INT_BITS);\r
 534             }\r
 535         }\r
 536     }\r
 537 }\r
 538 \r
 539 /*\r
 540  * Montgomery reduction. Expects x to be a big-endian array of 2*len\r
 541  * BignumInts whose value satisfies 0 <= x < rn (where r = 2^(len *\r
 542  * BIGNUM_INT_BITS) is the Montgomery base). Returns in the same array\r
 543  * a value x' which is congruent to xr^{-1} mod n, and satisfies 0 <=\r
 544  * x' < n.\r
 545  *\r
 546  * 'n' and 'mninv' should be big-endian arrays of 'len' BignumInts\r
 547  * each, containing respectively n and the multiplicative inverse of\r
 548  * -n mod r.\r
 549  *\r
 550  * 'tmp' is an array of BignumInt used as scratch space, of length at\r
 551  * least 3*len + mul_compute_scratch(len).\r
 552  */\r
 553 static void monty_reduce(BignumInt *x, const BignumInt *n,\r
 554                          const BignumInt *mninv, BignumInt *tmp, int len)\r
 555 {\r
 556     int i;\r
 557     BignumInt carry;\r
 558 \r
 559     /*\r
 560      * Multiply x by (-n)^{-1} mod r. This gives us a value m such\r
 561      * that mn is congruent to -x mod r. Hence, mn+x is an exact\r
 562      * multiple of r, and is also (obviously) congruent to x mod n.\r
 563      */\r
 564     internal_mul_low(x + len, mninv, tmp, len, tmp + 3*len);\r
 565 \r
 566     /*\r
 567      * Compute t = (mn+x)/r in ordinary, non-modular, integer\r
 568      * arithmetic. By construction this is exact, and is congruent mod\r
 569      * n to x * r^{-1}, i.e. the answer we want.\r
 570      *\r
 571      * The following multiply leaves that answer in the _most_\r
 572      * significant half of the 'x' array, so then we must shift it\r
 573      * down.\r
 574      */\r
 575     internal_mul(tmp, n, tmp+len, len, tmp + 3*len);\r
 576     carry = internal_add(x, tmp+len, x, 2*len);\r
 577     for (i = 0; i < len; i++)\r
 578         x[len + i] = x[i], x[i] = 0;\r
 579 \r
 580     /*\r
 581      * Reduce t mod n. This doesn't require a full-on division by n,\r
 582      * but merely a test and single optional subtraction, since we can\r
 583      * show that 0 <= t < 2n.\r
 584      *\r
 585      * Proof:\r
 586      *  + we computed m mod r, so 0 <= m < r.\r
 587      *  + so 0 <= mn < rn, obviously\r
 588      *  + hence we only need 0 <= x < rn to guarantee that 0 <= mn+x < 2rn\r
 589      *  + yielding 0 <= (mn+x)/r < 2n as required.\r
 590      */\r
 591     if (!carry) {\r
 592         for (i = 0; i < len; i++)\r
 593             if (x[len + i] != n[i])\r
 594                 break;\r
 595     }\r
 596     if (carry || i >= len || x[len + i] > n[i])\r
 597         internal_sub(x+len, n, x+len, len);\r
 598 }\r
 599 \r
 600 static void internal_add_shifted(BignumInt *number,\r
 601                                  unsigned n, int shift)\r
 602 {\r
 603     int word = 1 + (shift / BIGNUM_INT_BITS);\r
 604     int bshift = shift % BIGNUM_INT_BITS;\r
 605     BignumDblInt addend;\r
 606 \r
 607     addend = (BignumDblInt)n << bshift;\r
 608 \r
 609     while (addend) {\r
 610         assert(word <= number[0]);\r
 611         addend += number[word];\r
 612         number[word] = (BignumInt) addend & BIGNUM_INT_MASK;\r
 613         addend >>= BIGNUM_INT_BITS;\r
 614         word++;\r
 615     }\r
 616 }\r
 617 \r
 618 /*\r
 619  * Compute a = a % m.\r
 620  * Input in first alen words of a and first mlen words of m.\r
 621  * Output in first alen words of a\r
 622  * (of which first alen-mlen words will be zero).\r
 623  * The MSW of m MUST have its high bit set.\r
 624  * Quotient is accumulated in the `quotient' array, which is a Bignum\r
 625  * rather than the internal bigendian format. Quotient parts are shifted\r
 626  * left by `qshift' before adding into quot.\r
 627  */\r
 628 static void internal_mod(BignumInt *a, int alen,\r
 629                          BignumInt *m, int mlen,\r
 630                          BignumInt *quot, int qshift)\r
 631 {\r
 632     BignumInt m0, m1;\r
 633     unsigned int h;\r
 634     int i, k;\r
 635 \r
 636     m0 = m[0];\r
 637     assert(m0 >> (BIGNUM_INT_BITS-1) == 1);\r
 638     if (mlen > 1)\r
 639         m1 = m[1];\r
 640     else\r
 641         m1 = 0;\r
 642 \r
 643     for (i = 0; i <= alen - mlen; i++) {\r
 644         BignumDblInt t;\r
 645         unsigned int q, r, c, ai1;\r
 646 \r
 647         if (i == 0) {\r
 648             h = 0;\r
 649         } else {\r
 650             h = a[i - 1];\r
 651             a[i - 1] = 0;\r
 652         }\r
 653 \r
 654         if (i == alen - 1)\r
 655             ai1 = 0;\r
 656         else\r
 657             ai1 = a[i + 1];\r
 658 \r
 659         /* Find q = h:a[i] / m0 */\r
 660         if (h >= m0) {\r
 661             /*\r
 662              * Special case.\r
 663              * \r
 664              * To illustrate it, suppose a BignumInt is 8 bits, and\r
 665              * we are dividing (say) A1:23:45:67 by A1:B2:C3. Then\r
 666              * our initial division will be 0xA123 / 0xA1, which\r
 667              * will give a quotient of 0x100 and a divide overflow.\r
 668              * However, the invariants in this division algorithm\r
 669              * are not violated, since the full number A1:23:... is\r
 670              * _less_ than the quotient prefix A1:B2:... and so the\r
 671              * following correction loop would have sorted it out.\r
 672              * \r
 673              * In this situation we set q to be the largest\r
 674              * quotient we _can_ stomach (0xFF, of course).\r
 675              */\r
 676             q = BIGNUM_INT_MASK;\r
 677         } else {\r
 678             /* Macro doesn't want an array subscript expression passed\r
 679              * into it (see definition), so use a temporary. */\r
 680             BignumInt tmplo = a[i];\r
 681             DIVMOD_WORD(q, r, h, tmplo, m0);\r
 682 \r
 683             /* Refine our estimate of q by looking at\r
 684              h:a[i]:a[i+1] / m0:m1 */\r
 685             t = MUL_WORD(m1, q);\r
 686             if (t > ((BignumDblInt) r << BIGNUM_INT_BITS) + ai1) {\r
 687                 q--;\r
 688                 t -= m1;\r
 689                 r = (r + m0) & BIGNUM_INT_MASK;     /* overflow? */\r
 690                 if (r >= (BignumDblInt) m0 &&\r
 691                     t > ((BignumDblInt) r << BIGNUM_INT_BITS) + ai1) q--;\r
 692             }\r
 693         }\r
 694 \r
 695         /* Subtract q * m from a[i...] */\r
 696         c = 0;\r
 697         for (k = mlen - 1; k >= 0; k--) {\r
 698             t = MUL_WORD(q, m[k]);\r
 699             t += c;\r
 700             c = (unsigned)(t >> BIGNUM_INT_BITS);\r
 701             if ((BignumInt) t > a[i + k])\r
 702                 c++;\r
 703             a[i + k] -= (BignumInt) t;\r
 704         }\r
 705 \r
 706         /* Add back m in case of borrow */\r
 707         if (c != h) {\r
 708             t = 0;\r
 709             for (k = mlen - 1; k >= 0; k--) {\r
 710                 t += m[k];\r
 711                 t += a[i + k];\r
 712                 a[i + k] = (BignumInt) t;\r
 713                 t = t >> BIGNUM_INT_BITS;\r
 714             }\r
 715             q--;\r
 716         }\r
 717         if (quot)\r
 718             internal_add_shifted(quot, q, qshift + BIGNUM_INT_BITS * (alen - mlen - i));\r
 719     }\r
 720 }\r
 721 \r
 722 /*\r
 723  * Compute (base ^ exp) % mod, the pedestrian way.\r
 724  */\r
 725 Bignum modpow_simple(Bignum base_in, Bignum exp, Bignum mod)\r
 726 {\r
 727     BignumInt *a, *b, *n, *m, *scratch;\r
 728     int mshift;\r
 729     int mlen, scratchlen, i, j;\r
 730     Bignum base, result;\r
 731 \r
 732     /*\r
 733      * The most significant word of mod needs to be non-zero. It\r
 734      * should already be, but let's make sure.\r
 735      */\r
 736     assert(mod[mod[0]] != 0);\r
 737 \r
 738     /*\r
 739      * Make sure the base is smaller than the modulus, by reducing\r
 740      * it modulo the modulus if not.\r
 741      */\r
 742     base = bigmod(base_in, mod);\r
 743 \r
 744     /* Allocate m of size mlen, copy mod to m */\r
 745     /* We use big endian internally */\r
 746     mlen = mod[0];\r
 747     m = snewn(mlen, BignumInt);\r
 748     for (j = 0; j < mlen; j++)\r
 749         m[j] = mod[mod[0] - j];\r
 750 \r
 751     /* Shift m left to make msb bit set */\r
 752     for (mshift = 0; mshift < BIGNUM_INT_BITS-1; mshift++)\r
 753         if ((m[0] << mshift) & BIGNUM_TOP_BIT)\r
 754             break;\r
 755     if (mshift) {\r
 756         for (i = 0; i < mlen - 1; i++)\r
 757             m[i] = (m[i] << mshift) | (m[i + 1] >> (BIGNUM_INT_BITS - mshift));\r
 758         m[mlen - 1] = m[mlen - 1] << mshift;\r
 759     }\r
 760 \r
 761     /* Allocate n of size mlen, copy base to n */\r
 762     n = snewn(mlen, BignumInt);\r
 763     i = mlen - base[0];\r
 764     for (j = 0; j < i; j++)\r
 765         n[j] = 0;\r
 766     for (j = 0; j < (int)base[0]; j++)\r
 767         n[i + j] = base[base[0] - j];\r
 768 \r
 769     /* Allocate a and b of size 2*mlen. Set a = 1 */\r
 770     a = snewn(2 * mlen, BignumInt);\r
 771     b = snewn(2 * mlen, BignumInt);\r
 772     for (i = 0; i < 2 * mlen; i++)\r
 773         a[i] = 0;\r
 774     a[2 * mlen - 1] = 1;\r
 775 \r
 776     /* Scratch space for multiplies */\r
 777     scratchlen = mul_compute_scratch(mlen);\r
 778     scratch = snewn(scratchlen, BignumInt);\r
 779 \r
 780     /* Skip leading zero bits of exp. */\r
 781     i = 0;\r
 782     j = BIGNUM_INT_BITS-1;\r
 783     while (i < (int)exp[0] && (exp[exp[0] - i] & (1 << j)) == 0) {\r
 784         j--;\r
 785         if (j < 0) {\r
 786             i++;\r
 787             j = BIGNUM_INT_BITS-1;\r
 788         }\r
 789     }\r
 790 \r
 791     /* Main computation */\r
 792     while (i < (int)exp[0]) {\r
 793         while (j >= 0) {\r
 794             internal_mul(a + mlen, a + mlen, b, mlen, scratch);\r
 795             internal_mod(b, mlen * 2, m, mlen, NULL, 0);\r
 796             if ((exp[exp[0] - i] & (1 << j)) != 0) {\r
 797                 internal_mul(b + mlen, n, a, mlen, scratch);\r
 798                 internal_mod(a, mlen * 2, m, mlen, NULL, 0);\r
 799             } else {\r
 800                 BignumInt *t;\r
 801                 t = a;\r
 802                 a = b;\r
 803                 b = t;\r
 804             }\r
 805             j--;\r
 806         }\r
 807         i++;\r
 808         j = BIGNUM_INT_BITS-1;\r
 809     }\r
 810 \r
 811     /* Fixup result in case the modulus was shifted */\r
 812     if (mshift) {\r
 813         for (i = mlen - 1; i < 2 * mlen - 1; i++)\r
 814             a[i] = (a[i] << mshift) | (a[i + 1] >> (BIGNUM_INT_BITS - mshift));\r
 815         a[2 * mlen - 1] = a[2 * mlen - 1] << mshift;\r
 816         internal_mod(a, mlen * 2, m, mlen, NULL, 0);\r
 817         for (i = 2 * mlen - 1; i >= mlen; i--)\r
 818             a[i] = (a[i] >> mshift) | (a[i - 1] << (BIGNUM_INT_BITS - mshift));\r
 819     }\r
 820 \r
 821     /* Copy result to buffer */\r
 822     result = newbn(mod[0]);\r
 823     for (i = 0; i < mlen; i++)\r
 824         result[result[0] - i] = a[i + mlen];\r
 825     while (result[0] > 1 && result[result[0]] == 0)\r
 826         result[0]--;\r
 827 \r
 828     /* Free temporary arrays */\r
 829     smemclr(a, 2 * mlen * sizeof(*a));\r
 830     sfree(a);\r
 831     smemclr(scratch, scratchlen * sizeof(*scratch));\r
 832     sfree(scratch);\r
 833     smemclr(b, 2 * mlen * sizeof(*b));\r
 834     sfree(b);\r
 835     smemclr(m, mlen * sizeof(*m));\r
 836     sfree(m);\r
 837     smemclr(n, mlen * sizeof(*n));\r
 838     sfree(n);\r
 839 \r
 840     freebn(base);\r
 841 \r
 842     return result;\r
 843 }\r
 844 \r
 845 /*\r
 846  * Compute (base ^ exp) % mod. Uses the Montgomery multiplication\r
 847  * technique where possible, falling back to modpow_simple otherwise.\r
 848  */\r
 849 Bignum modpow(Bignum base_in, Bignum exp, Bignum mod)\r
 850 {\r
 851     BignumInt *a, *b, *x, *n, *mninv, *scratch;\r
 852     int len, scratchlen, i, j;\r
 853     Bignum base, base2, r, rn, inv, result;\r
 854 \r
 855     /*\r
 856      * The most significant word of mod needs to be non-zero. It\r
 857      * should already be, but let's make sure.\r
 858      */\r
 859     assert(mod[mod[0]] != 0);\r
 860 \r
 861     /*\r
 862      * mod had better be odd, or we can't do Montgomery multiplication\r
 863      * using a power of two at all.\r
 864      */\r
 865     if (!(mod[1] & 1))\r
 866         return modpow_simple(base_in, exp, mod);\r
 867 \r
 868     /*\r
 869      * Make sure the base is smaller than the modulus, by reducing\r
 870      * it modulo the modulus if not.\r
 871      */\r
 872     base = bigmod(base_in, mod);\r
 873 \r
 874     /*\r
 875      * Compute the inverse of n mod r, for monty_reduce. (In fact we\r
 876      * want the inverse of _minus_ n mod r, but we'll sort that out\r
 877      * below.)\r
 878      */\r
 879     len = mod[0];\r
 880     r = bn_power_2(BIGNUM_INT_BITS * len);\r
 881     inv = modinv(mod, r);\r
 882     assert(inv); /* cannot fail, since mod is odd and r is a power of 2 */\r
 883 \r
 884     /*\r
 885      * Multiply the base by r mod n, to get it into Montgomery\r
 886      * representation.\r
 887      */\r
 888     base2 = modmul(base, r, mod);\r
 889     freebn(base);\r
 890     base = base2;\r
 891 \r
 892     rn = bigmod(r, mod);               /* r mod n, i.e. Montgomerified 1 */\r
 893 \r
 894     freebn(r);                         /* won't need this any more */\r
 895 \r
 896     /*\r
 897      * Set up internal arrays of the right lengths, in big-endian\r
 898      * format, containing the base, the modulus, and the modulus's\r
 899      * inverse.\r
 900      */\r
 901     n = snewn(len, BignumInt);\r
 902     for (j = 0; j < len; j++)\r
 903         n[len - 1 - j] = mod[j + 1];\r
 904 \r
 905     mninv = snewn(len, BignumInt);\r
 906     for (j = 0; j < len; j++)\r
 907         mninv[len - 1 - j] = (j < (int)inv[0] ? inv[j + 1] : 0);\r
 908     freebn(inv);         /* we don't need this copy of it any more */\r
 909     /* Now negate mninv mod r, so it's the inverse of -n rather than +n. */\r
 910     x = snewn(len, BignumInt);\r
 911     for (j = 0; j < len; j++)\r
 912         x[j] = 0;\r
 913     internal_sub(x, mninv, mninv, len);\r
 914 \r
 915     /* x = snewn(len, BignumInt); */ /* already done above */\r
 916     for (j = 0; j < len; j++)\r
 917         x[len - 1 - j] = (j < (int)base[0] ? base[j + 1] : 0);\r
 918     freebn(base);        /* we don't need this copy of it any more */\r
 919 \r
 920     a = snewn(2*len, BignumInt);\r
 921     b = snewn(2*len, BignumInt);\r
 922     for (j = 0; j < len; j++)\r
 923         a[2*len - 1 - j] = (j < (int)rn[0] ? rn[j + 1] : 0);\r
 924     freebn(rn);\r
 925 \r
 926     /* Scratch space for multiplies */\r
 927     scratchlen = 3*len + mul_compute_scratch(len);\r
 928     scratch = snewn(scratchlen, BignumInt);\r
 929 \r
 930     /* Skip leading zero bits of exp. */\r
 931     i = 0;\r
 932     j = BIGNUM_INT_BITS-1;\r
 933     while (i < (int)exp[0] && (exp[exp[0] - i] & (1 << j)) == 0) {\r
 934         j--;\r
 935         if (j < 0) {\r
 936             i++;\r
 937             j = BIGNUM_INT_BITS-1;\r
 938         }\r
 939     }\r
 940 \r
 941     /* Main computation */\r
 942     while (i < (int)exp[0]) {\r
 943         while (j >= 0) {\r
 944             internal_mul(a + len, a + len, b, len, scratch);\r
 945             monty_reduce(b, n, mninv, scratch, len);\r
 946             if ((exp[exp[0] - i] & (1 << j)) != 0) {\r
 947                 internal_mul(b + len, x, a, len,  scratch);\r
 948                 monty_reduce(a, n, mninv, scratch, len);\r
 949             } else {\r
 950                 BignumInt *t;\r
 951                 t = a;\r
 952                 a = b;\r
 953                 b = t;\r
 954             }\r
 955             j--;\r
 956         }\r
 957         i++;\r
 958         j = BIGNUM_INT_BITS-1;\r
 959     }\r
 960 \r
 961     /*\r
 962      * Final monty_reduce to get back from the adjusted Montgomery\r
 963      * representation.\r
 964      */\r
 965     monty_reduce(a, n, mninv, scratch, len);\r
 966 \r
 967     /* Copy result to buffer */\r
 968     result = newbn(mod[0]);\r
 969     for (i = 0; i < len; i++)\r
 970         result[result[0] - i] = a[i + len];\r
 971     while (result[0] > 1 && result[result[0]] == 0)\r
 972         result[0]--;\r
 973 \r
 974     /* Free temporary arrays */\r
 975     smemclr(scratch, scratchlen * sizeof(*scratch));\r
 976     sfree(scratch);\r
 977     smemclr(a, 2 * len * sizeof(*a));\r
 978     sfree(a);\r
 979     smemclr(b, 2 * len * sizeof(*b));\r
 980     sfree(b);\r
 981     smemclr(mninv, len * sizeof(*mninv));\r
 982     sfree(mninv);\r
 983     smemclr(n, len * sizeof(*n));\r
 984     sfree(n);\r
 985     smemclr(x, len * sizeof(*x));\r
 986     sfree(x);\r
 987 \r
 988     return result;\r
 989 }\r
 990 \r
 991 /*\r
 992  * Compute (p * q) % mod.\r
 993  * The most significant word of mod MUST be non-zero.\r
 994  * We assume that the result array is the same size as the mod array.\r
 995  */\r
 996 Bignum modmul(Bignum p, Bignum q, Bignum mod)\r
 997 {\r
 998     BignumInt *a, *n, *m, *o, *scratch;\r
 999     int mshift, scratchlen;\r
1000     int pqlen, mlen, rlen, i, j;\r
1001     Bignum result;\r
1002 \r
1003     /*\r
1004      * The most significant word of mod needs to be non-zero. It\r
1005      * should already be, but let's make sure.\r
1006      */\r
1007     assert(mod[mod[0]] != 0);\r
1008 \r
1009     /* Allocate m of size mlen, copy mod to m */\r
1010     /* We use big endian internally */\r
1011     mlen = mod[0];\r
1012     m = snewn(mlen, BignumInt);\r
1013     for (j = 0; j < mlen; j++)\r
1014         m[j] = mod[mod[0] - j];\r
1015 \r
1016     /* Shift m left to make msb bit set */\r
1017     for (mshift = 0; mshift < BIGNUM_INT_BITS-1; mshift++)\r
1018         if ((m[0] << mshift) & BIGNUM_TOP_BIT)\r
1019             break;\r
1020     if (mshift) {\r
1021         for (i = 0; i < mlen - 1; i++)\r
1022             m[i] = (m[i] << mshift) | (m[i + 1] >> (BIGNUM_INT_BITS - mshift));\r
1023         m[mlen - 1] = m[mlen - 1] << mshift;\r
1024     }\r
1025 \r
1026     pqlen = (p[0] > q[0] ? p[0] : q[0]);\r
1027 \r
1028     /*\r
1029      * Make sure that we're allowing enough space. The shifting below\r
1030      * will underflow the vectors we allocate if pqlen is too small.\r
1031      */\r
1032     if (2*pqlen <= mlen)\r
1033         pqlen = mlen/2 + 1;\r
1034 \r
1035     /* Allocate n of size pqlen, copy p to n */\r
1036     n = snewn(pqlen, BignumInt);\r
1037     i = pqlen - p[0];\r
1038     for (j = 0; j < i; j++)\r
1039         n[j] = 0;\r
1040     for (j = 0; j < (int)p[0]; j++)\r
1041         n[i + j] = p[p[0] - j];\r
1042 \r
1043     /* Allocate o of size pqlen, copy q to o */\r
1044     o = snewn(pqlen, BignumInt);\r
1045     i = pqlen - q[0];\r
1046     for (j = 0; j < i; j++)\r
1047         o[j] = 0;\r
1048     for (j = 0; j < (int)q[0]; j++)\r
1049         o[i + j] = q[q[0] - j];\r
1050 \r
1051     /* Allocate a of size 2*pqlen for result */\r
1052     a = snewn(2 * pqlen, BignumInt);\r
1053 \r
1054     /* Scratch space for multiplies */\r
1055     scratchlen = mul_compute_scratch(pqlen);\r
1056     scratch = snewn(scratchlen, BignumInt);\r
1057 \r
1058     /* Main computation */\r
1059     internal_mul(n, o, a, pqlen, scratch);\r
1060     internal_mod(a, pqlen * 2, m, mlen, NULL, 0);\r
1061 \r
1062     /* Fixup result in case the modulus was shifted */\r
1063     if (mshift) {\r
1064         for (i = 2 * pqlen - mlen - 1; i < 2 * pqlen - 1; i++)\r
1065             a[i] = (a[i] << mshift) | (a[i + 1] >> (BIGNUM_INT_BITS - mshift));\r
1066         a[2 * pqlen - 1] = a[2 * pqlen - 1] << mshift;\r
1067         internal_mod(a, pqlen * 2, m, mlen, NULL, 0);\r
1068         for (i = 2 * pqlen - 1; i >= 2 * pqlen - mlen; i--)\r
1069             a[i] = (a[i] >> mshift) | (a[i - 1] << (BIGNUM_INT_BITS - mshift));\r
1070     }\r
1071 \r
1072     /* Copy result to buffer */\r
1073     rlen = (mlen < pqlen * 2 ? mlen : pqlen * 2);\r
1074     result = newbn(rlen);\r
1075     for (i = 0; i < rlen; i++)\r
1076         result[result[0] - i] = a[i + 2 * pqlen - rlen];\r
1077     while (result[0] > 1 && result[result[0]] == 0)\r
1078         result[0]--;\r
1079 \r
1080     /* Free temporary arrays */\r
1081     smemclr(scratch, scratchlen * sizeof(*scratch));\r
1082     sfree(scratch);\r
1083     smemclr(a, 2 * pqlen * sizeof(*a));\r
1084     sfree(a);\r
1085     smemclr(m, mlen * sizeof(*m));\r
1086     sfree(m);\r
1087     smemclr(n, pqlen * sizeof(*n));\r
1088     sfree(n);\r
1089     smemclr(o, pqlen * sizeof(*o));\r
1090     sfree(o);\r
1091 \r
1092     return result;\r
1093 }\r
1094 \r
1095 /*\r
1096  * Compute p % mod.\r
1097  * The most significant word of mod MUST be non-zero.\r
1098  * We assume that the result array is the same size as the mod array.\r
1099  * We optionally write out a quotient if `quotient' is non-NULL.\r
1100  * We can avoid writing out the result if `result' is NULL.\r
1101  */\r
1102 static void bigdivmod(Bignum p, Bignum mod, Bignum result, Bignum quotient)\r
1103 {\r
1104     BignumInt *n, *m;\r
1105     int mshift;\r
1106     int plen, mlen, i, j;\r
1107 \r
1108     /*\r
1109      * The most significant word of mod needs to be non-zero. It\r
1110      * should already be, but let's make sure.\r
1111      */\r
1112     assert(mod[mod[0]] != 0);\r
1113 \r
1114     /* Allocate m of size mlen, copy mod to m */\r
1115     /* We use big endian internally */\r
1116     mlen = mod[0];\r
1117     m = snewn(mlen, BignumInt);\r
1118     for (j = 0; j < mlen; j++)\r
1119         m[j] = mod[mod[0] - j];\r
1120 \r
1121     /* Shift m left to make msb bit set */\r
1122     for (mshift = 0; mshift < BIGNUM_INT_BITS-1; mshift++)\r
1123         if ((m[0] << mshift) & BIGNUM_TOP_BIT)\r
1124             break;\r
1125     if (mshift) {\r
1126         for (i = 0; i < mlen - 1; i++)\r
1127             m[i] = (m[i] << mshift) | (m[i + 1] >> (BIGNUM_INT_BITS - mshift));\r
1128         m[mlen - 1] = m[mlen - 1] << mshift;\r
1129     }\r
1130 \r
1131     plen = p[0];\r
1132     /* Ensure plen > mlen */\r
1133     if (plen <= mlen)\r
1134         plen = mlen + 1;\r
1135 \r
1136     /* Allocate n of size plen, copy p to n */\r
1137     n = snewn(plen, BignumInt);\r
1138     for (j = 0; j < plen; j++)\r
1139         n[j] = 0;\r
1140     for (j = 1; j <= (int)p[0]; j++)\r
1141         n[plen - j] = p[j];\r
1142 \r
1143     /* Main computation */\r
1144     internal_mod(n, plen, m, mlen, quotient, mshift);\r
1145 \r
1146     /* Fixup result in case the modulus was shifted */\r
1147     if (mshift) {\r
1148         for (i = plen - mlen - 1; i < plen - 1; i++)\r
1149             n[i] = (n[i] << mshift) | (n[i + 1] >> (BIGNUM_INT_BITS - mshift));\r
1150         n[plen - 1] = n[plen - 1] << mshift;\r
1151         internal_mod(n, plen, m, mlen, quotient, 0);\r
1152         for (i = plen - 1; i >= plen - mlen; i--)\r
1153             n[i] = (n[i] >> mshift) | (n[i - 1] << (BIGNUM_INT_BITS - mshift));\r
1154     }\r
1155 \r
1156     /* Copy result to buffer */\r
1157     if (result) {\r
1158         for (i = 1; i <= (int)result[0]; i++) {\r
1159             int j = plen - i;\r
1160             result[i] = j >= 0 ? n[j] : 0;\r
1161         }\r
1162     }\r
1163 \r
1164     /* Free temporary arrays */\r
1165     smemclr(m, mlen * sizeof(*m));\r
1166     sfree(m);\r
1167     smemclr(n, plen * sizeof(*n));\r
1168     sfree(n);\r
1169 }\r
1170 \r
1171 /*\r
1172  * Decrement a number.\r
1173  */\r
1174 void decbn(Bignum bn)\r
1175 {\r
1176     int i = 1;\r
1177     while (i < (int)bn[0] && bn[i] == 0)\r
1178         bn[i++] = BIGNUM_INT_MASK;\r
1179     bn[i]--;\r
1180 }\r
1181 \r
1182 Bignum bignum_from_bytes(const unsigned char *data, int nbytes)\r
1183 {\r
1184     Bignum result;\r
1185     int w, i;\r
1186 \r
1187     assert(nbytes >= 0 && nbytes < INT_MAX/8);\r
1188 \r
1189     w = (nbytes + BIGNUM_INT_BYTES - 1) / BIGNUM_INT_BYTES; /* bytes->words */\r
1190 \r
1191     result = newbn(w);\r
1192     for (i = 1; i <= w; i++)\r
1193         result[i] = 0;\r
1194     for (i = nbytes; i--;) {\r
1195         unsigned char byte = *data++;\r
1196         result[1 + i / BIGNUM_INT_BYTES] |= byte << (8*i % BIGNUM_INT_BITS);\r
1197     }\r
1198 \r
1199     while (result[0] > 1 && result[result[0]] == 0)\r
1200         result[0]--;\r
1201     return result;\r
1202 }\r
1203 \r
1204 /*\r
1205  * Read an SSH-1-format bignum from a data buffer. Return the number\r
1206  * of bytes consumed, or -1 if there wasn't enough data.\r
1207  */\r
1208 int ssh1_read_bignum(const unsigned char *data, int len, Bignum * result)\r
1209 {\r
1210     const unsigned char *p = data;\r
1211     int i;\r
1212     int w, b;\r
1213 \r
1214     if (len < 2)\r
1215         return -1;\r
1216 \r
1217     w = 0;\r
1218     for (i = 0; i < 2; i++)\r
1219         w = (w << 8) + *p++;\r
1220     b = (w + 7) / 8;                   /* bits -> bytes */\r
1221 \r
1222     if (len < b+2)\r
1223         return -1;\r
1224 \r
1225     if (!result)                       /* just return length */\r
1226         return b + 2;\r
1227 \r
1228     *result = bignum_from_bytes(p, b);\r
1229 \r
1230     return p + b - data;\r
1231 }\r
1232 \r
1233 /*\r
1234  * Return the bit count of a bignum, for SSH-1 encoding.\r
1235  */\r
1236 int bignum_bitcount(Bignum bn)\r
1237 {\r
1238     int bitcount = bn[0] * BIGNUM_INT_BITS - 1;\r
1239     while (bitcount >= 0\r
1240            && (bn[bitcount / BIGNUM_INT_BITS + 1] >> (bitcount % BIGNUM_INT_BITS)) == 0) bitcount--;\r
1241     return bitcount + 1;\r
1242 }\r
1243 \r
1244 /*\r
1245  * Return the byte length of a bignum when SSH-1 encoded.\r
1246  */\r
1247 int ssh1_bignum_length(Bignum bn)\r
1248 {\r
1249     return 2 + (bignum_bitcount(bn) + 7) / 8;\r
1250 }\r
1251 \r
1252 /*\r
1253  * Return the byte length of a bignum when SSH-2 encoded.\r
1254  */\r
1255 int ssh2_bignum_length(Bignum bn)\r
1256 {\r
1257     return 4 + (bignum_bitcount(bn) + 8) / 8;\r
1258 }\r
1259 \r
1260 /*\r
1261  * Return a byte from a bignum; 0 is least significant, etc.\r
1262  */\r
1263 int bignum_byte(Bignum bn, int i)\r
1264 {\r
1265     if (i < 0 || i >= (int)(BIGNUM_INT_BYTES * bn[0]))\r
1266         return 0;                      /* beyond the end */\r
1267     else\r
1268         return (bn[i / BIGNUM_INT_BYTES + 1] >>\r
1269                 ((i % BIGNUM_INT_BYTES)*8)) & 0xFF;\r
1270 }\r
1271 \r
1272 /*\r
1273  * Return a bit from a bignum; 0 is least significant, etc.\r
1274  */\r
1275 int bignum_bit(Bignum bn, int i)\r
1276 {\r
1277     if (i < 0 || i >= (int)(BIGNUM_INT_BITS * bn[0]))\r
1278         return 0;                      /* beyond the end */\r
1279     else\r
1280         return (bn[i / BIGNUM_INT_BITS + 1] >> (i % BIGNUM_INT_BITS)) & 1;\r
1281 }\r
1282 \r
1283 /*\r
1284  * Set a bit in a bignum; 0 is least significant, etc.\r
1285  */\r
1286 void bignum_set_bit(Bignum bn, int bitnum, int value)\r
1287 {\r
1288     if (bitnum < 0 || bitnum >= (int)(BIGNUM_INT_BITS * bn[0]))\r
1289         abort();                       /* beyond the end */\r
1290     else {\r
1291         int v = bitnum / BIGNUM_INT_BITS + 1;\r
1292         int mask = 1 << (bitnum % BIGNUM_INT_BITS);\r
1293         if (value)\r
1294             bn[v] |= mask;\r
1295         else\r
1296             bn[v] &= ~mask;\r
1297     }\r
1298 }\r
1299 \r
1300 /*\r
1301  * Write a SSH-1-format bignum into a buffer. It is assumed the\r
1302  * buffer is big enough. Returns the number of bytes used.\r
1303  */\r
1304 int ssh1_write_bignum(void *data, Bignum bn)\r
1305 {\r
1306     unsigned char *p = data;\r
1307     int len = ssh1_bignum_length(bn);\r
1308     int i;\r
1309     int bitc = bignum_bitcount(bn);\r
1310 \r
1311     *p++ = (bitc >> 8) & 0xFF;\r
1312     *p++ = (bitc) & 0xFF;\r
1313     for (i = len - 2; i--;)\r
1314         *p++ = bignum_byte(bn, i);\r
1315     return len;\r
1316 }\r
1317 \r
1318 /*\r
1319  * Compare two bignums. Returns like strcmp.\r
1320  */\r
1321 int bignum_cmp(Bignum a, Bignum b)\r
1322 {\r
1323     int amax = a[0], bmax = b[0];\r
1324     int i;\r
1325 \r
1326     /* Annoyingly we have two representations of zero */\r
1327     if (amax == 1 && a[amax] == 0)\r
1328         amax = 0;\r
1329     if (bmax == 1 && b[bmax] == 0)\r
1330         bmax = 0;\r
1331 \r
1332     assert(amax == 0 || a[amax] != 0);\r
1333     assert(bmax == 0 || b[bmax] != 0);\r
1334 \r
1335     i = (amax > bmax ? amax : bmax);\r
1336     while (i) {\r
1337         BignumInt aval = (i > amax ? 0 : a[i]);\r
1338         BignumInt bval = (i > bmax ? 0 : b[i]);\r
1339         if (aval < bval)\r
1340             return -1;\r
1341         if (aval > bval)\r
1342             return +1;\r
1343         i--;\r
1344     }\r
1345     return 0;\r
1346 }\r
1347 \r
1348 /*\r
1349  * Right-shift one bignum to form another.\r
1350  */\r
1351 Bignum bignum_rshift(Bignum a, int shift)\r
1352 {\r
1353     Bignum ret;\r
1354     int i, shiftw, shiftb, shiftbb, bits;\r
1355     BignumInt ai, ai1;\r
1356 \r
1357     assert(shift >= 0);\r
1358 \r
1359     bits = bignum_bitcount(a) - shift;\r
1360     ret = newbn((bits + BIGNUM_INT_BITS - 1) / BIGNUM_INT_BITS);\r
1361 \r
1362     if (ret) {\r
1363         shiftw = shift / BIGNUM_INT_BITS;\r
1364         shiftb = shift % BIGNUM_INT_BITS;\r
1365         shiftbb = BIGNUM_INT_BITS - shiftb;\r
1366 \r
1367         ai1 = a[shiftw + 1];\r
1368         for (i = 1; i <= (int)ret[0]; i++) {\r
1369             ai = ai1;\r
1370             ai1 = (i + shiftw + 1 <= (int)a[0] ? a[i + shiftw + 1] : 0);\r
1371             ret[i] = ((ai >> shiftb) | (ai1 << shiftbb)) & BIGNUM_INT_MASK;\r
1372         }\r
1373     }\r
1374 \r
1375     return ret;\r
1376 }\r
1377 \r
1378 /*\r
1379  * Non-modular multiplication and addition.\r
1380  */\r
1381 Bignum bigmuladd(Bignum a, Bignum b, Bignum addend)\r
1382 {\r
1383     int alen = a[0], blen = b[0];\r
1384     int mlen = (alen > blen ? alen : blen);\r
1385     int rlen, i, maxspot;\r
1386     int wslen;\r
1387     BignumInt *workspace;\r
1388     Bignum ret;\r
1389 \r
1390     /* mlen space for a, mlen space for b, 2*mlen for result,\r
1391      * plus scratch space for multiplication */\r
1392     wslen = mlen * 4 + mul_compute_scratch(mlen);\r
1393     workspace = snewn(wslen, BignumInt);\r
1394     for (i = 0; i < mlen; i++) {\r
1395         workspace[0 * mlen + i] = (mlen - i <= (int)a[0] ? a[mlen - i] : 0);\r
1396         workspace[1 * mlen + i] = (mlen - i <= (int)b[0] ? b[mlen - i] : 0);\r
1397     }\r
1398 \r
1399     internal_mul(workspace + 0 * mlen, workspace + 1 * mlen,\r
1400                  workspace + 2 * mlen, mlen, workspace + 4 * mlen);\r
1401 \r
1402     /* now just copy the result back */\r
1403     rlen = alen + blen + 1;\r
1404     if (addend && rlen <= (int)addend[0])\r
1405         rlen = addend[0] + 1;\r
1406     ret = newbn(rlen);\r
1407     maxspot = 0;\r
1408     for (i = 1; i <= (int)ret[0]; i++) {\r
1409         ret[i] = (i <= 2 * mlen ? workspace[4 * mlen - i] : 0);\r
1410         if (ret[i] != 0)\r
1411             maxspot = i;\r
1412     }\r
1413     ret[0] = maxspot;\r
1414 \r
1415     /* now add in the addend, if any */\r
1416     if (addend) {\r
1417         BignumDblInt carry = 0;\r
1418         for (i = 1; i <= rlen; i++) {\r
1419             carry += (i <= (int)ret[0] ? ret[i] : 0);\r
1420             carry += (i <= (int)addend[0] ? addend[i] : 0);\r
1421             ret[i] = (BignumInt) carry & BIGNUM_INT_MASK;\r
1422             carry >>= BIGNUM_INT_BITS;\r
1423             if (ret[i] != 0 && i > maxspot)\r
1424                 maxspot = i;\r
1425         }\r
1426     }\r
1427     ret[0] = maxspot;\r
1428 \r
1429     smemclr(workspace, wslen * sizeof(*workspace));\r
1430     sfree(workspace);\r
1431     return ret;\r
1432 }\r
1433 \r
1434 /*\r
1435  * Non-modular multiplication.\r
1436  */\r
1437 Bignum bigmul(Bignum a, Bignum b)\r
1438 {\r
1439     return bigmuladd(a, b, NULL);\r
1440 }\r
1441 \r
1442 /*\r
1443  * Simple addition.\r
1444  */\r
1445 Bignum bigadd(Bignum a, Bignum b)\r
1446 {\r
1447     int alen = a[0], blen = b[0];\r
1448     int rlen = (alen > blen ? alen : blen) + 1;\r
1449     int i, maxspot;\r
1450     Bignum ret;\r
1451     BignumDblInt carry;\r
1452 \r
1453     ret = newbn(rlen);\r
1454 \r
1455     carry = 0;\r
1456     maxspot = 0;\r
1457     for (i = 1; i <= rlen; i++) {\r
1458         carry += (i <= (int)a[0] ? a[i] : 0);\r
1459         carry += (i <= (int)b[0] ? b[i] : 0);\r
1460         ret[i] = (BignumInt) carry & BIGNUM_INT_MASK;\r
1461         carry >>= BIGNUM_INT_BITS;\r
1462         if (ret[i] != 0 && i > maxspot)\r
1463             maxspot = i;\r
1464     }\r
1465     ret[0] = maxspot;\r
1466 \r
1467     return ret;\r
1468 }\r
1469 \r
1470 /*\r
1471  * Subtraction. Returns a-b, or NULL if the result would come out\r
1472  * negative (recall that this entire bignum module only handles\r
1473  * positive numbers).\r
1474  */\r
1475 Bignum bigsub(Bignum a, Bignum b)\r
1476 {\r
1477     int alen = a[0], blen = b[0];\r
1478     int rlen = (alen > blen ? alen : blen);\r
1479     int i, maxspot;\r
1480     Bignum ret;\r
1481     BignumDblInt carry;\r
1482 \r
1483     ret = newbn(rlen);\r
1484 \r
1485     carry = 1;\r
1486     maxspot = 0;\r
1487     for (i = 1; i <= rlen; i++) {\r
1488         carry += (i <= (int)a[0] ? a[i] : 0);\r
1489         carry += (i <= (int)b[0] ? b[i] ^ BIGNUM_INT_MASK : BIGNUM_INT_MASK);\r
1490         ret[i] = (BignumInt) carry & BIGNUM_INT_MASK;\r
1491         carry >>= BIGNUM_INT_BITS;\r
1492         if (ret[i] != 0 && i > maxspot)\r
1493             maxspot = i;\r
1494     }\r
1495     ret[0] = maxspot;\r
1496 \r
1497     if (!carry) {\r
1498         freebn(ret);\r
1499         return NULL;\r
1500     }\r
1501 \r
1502     return ret;\r
1503 }\r
1504 \r
1505 /*\r
1506  * Create a bignum which is the bitmask covering another one. That\r
1507  * is, the smallest integer which is >= N and is also one less than\r
1508  * a power of two.\r
1509  */\r
1510 Bignum bignum_bitmask(Bignum n)\r
1511 {\r
1512     Bignum ret = copybn(n);\r
1513     int i;\r
1514     BignumInt j;\r
1515 \r
1516     i = ret[0];\r
1517     while (n[i] == 0 && i > 0)\r
1518         i--;\r
1519     if (i <= 0)\r
1520         return ret;                    /* input was zero */\r
1521     j = 1;\r
1522     while (j < n[i])\r
1523         j = 2 * j + 1;\r
1524     ret[i] = j;\r
1525     while (--i > 0)\r
1526         ret[i] = BIGNUM_INT_MASK;\r
1527     return ret;\r
1528 }\r
1529 \r
1530 /*\r
1531  * Convert a (max 32-bit) long into a bignum.\r
1532  */\r
1533 Bignum bignum_from_long(unsigned long nn)\r
1534 {\r
1535     Bignum ret;\r
1536     BignumDblInt n = nn;\r
1537 \r
1538     ret = newbn(3);\r
1539     ret[1] = (BignumInt)(n & BIGNUM_INT_MASK);\r
1540     ret[2] = (BignumInt)((n >> BIGNUM_INT_BITS) & BIGNUM_INT_MASK);\r
1541     ret[3] = 0;\r
1542     ret[0] = (ret[2]  ? 2 : 1);\r
1543     return ret;\r
1544 }\r
1545 \r
1546 /*\r
1547  * Add a long to a bignum.\r
1548  */\r
1549 Bignum bignum_add_long(Bignum number, unsigned long addendx)\r
1550 {\r
1551     Bignum ret = newbn(number[0] + 1);\r
1552     int i, maxspot = 0;\r
1553     BignumDblInt carry = 0, addend = addendx;\r
1554 \r
1555     for (i = 1; i <= (int)ret[0]; i++) {\r
1556         carry += addend & BIGNUM_INT_MASK;\r
1557         carry += (i <= (int)number[0] ? number[i] : 0);\r
1558         addend >>= BIGNUM_INT_BITS;\r
1559         ret[i] = (BignumInt) carry & BIGNUM_INT_MASK;\r
1560         carry >>= BIGNUM_INT_BITS;\r
1561         if (ret[i] != 0)\r
1562             maxspot = i;\r
1563     }\r
1564     ret[0] = maxspot;\r
1565     return ret;\r
1566 }\r
1567 \r
1568 /*\r
1569  * Compute the residue of a bignum, modulo a (max 16-bit) short.\r
1570  */\r
1571 unsigned short bignum_mod_short(Bignum number, unsigned short modulus)\r
1572 {\r
1573     BignumDblInt mod, r;\r
1574     int i;\r
1575 \r
1576     r = 0;\r
1577     mod = modulus;\r
1578     for (i = number[0]; i > 0; i--)\r
1579         r = (r * (BIGNUM_TOP_BIT % mod) * 2 + number[i] % mod) % mod;\r
1580     return (unsigned short) r;\r
1581 }\r
1582 \r
1583 #ifdef DEBUG\r
1584 void diagbn(char *prefix, Bignum md)\r
1585 {\r
1586     int i, nibbles, morenibbles;\r
1587     static const char hex[] = "0123456789ABCDEF";\r
1588 \r
1589     debug(("%s0x", prefix ? prefix : ""));\r
1590 \r
1591     nibbles = (3 + bignum_bitcount(md)) / 4;\r
1592     if (nibbles < 1)\r
1593         nibbles = 1;\r
1594     morenibbles = 4 * md[0] - nibbles;\r
1595     for (i = 0; i < morenibbles; i++)\r
1596         debug(("-"));\r
1597     for (i = nibbles; i--;)\r
1598         debug(("%c",\r
1599                hex[(bignum_byte(md, i / 2) >> (4 * (i % 2))) & 0xF]));\r
1600 \r
1601     if (prefix)\r
1602         debug(("\n"));\r
1603 }\r
1604 #endif\r
1605 \r
1606 /*\r
1607  * Simple division.\r
1608  */\r
1609 Bignum bigdiv(Bignum a, Bignum b)\r
1610 {\r
1611     Bignum q = newbn(a[0]);\r
1612     bigdivmod(a, b, NULL, q);\r
1613     while (q[0] > 1 && q[q[0]] == 0)\r
1614         q[0]--;\r
1615     return q;\r
1616 }\r
1617 \r
1618 /*\r
1619  * Simple remainder.\r
1620  */\r
1621 Bignum bigmod(Bignum a, Bignum b)\r
1622 {\r
1623     Bignum r = newbn(b[0]);\r
1624     bigdivmod(a, b, r, NULL);\r
1625     while (r[0] > 1 && r[r[0]] == 0)\r
1626         r[0]--;\r
1627     return r;\r
1628 }\r
1629 \r
1630 /*\r
1631  * Greatest common divisor.\r
1632  */\r
1633 Bignum biggcd(Bignum av, Bignum bv)\r
1634 {\r
1635     Bignum a = copybn(av);\r
1636     Bignum b = copybn(bv);\r
1637 \r
1638     while (bignum_cmp(b, Zero) != 0) {\r
1639         Bignum t = newbn(b[0]);\r
1640         bigdivmod(a, b, t, NULL);\r
1641         while (t[0] > 1 && t[t[0]] == 0)\r
1642             t[0]--;\r
1643         freebn(a);\r
1644         a = b;\r
1645         b = t;\r
1646     }\r
1647 \r
1648     freebn(b);\r
1649     return a;\r
1650 }\r
1651 \r
1652 /*\r
1653  * Modular inverse, using Euclid's extended algorithm.\r
1654  */\r
1655 Bignum modinv(Bignum number, Bignum modulus)\r
1656 {\r
1657     Bignum a = copybn(modulus);\r
1658     Bignum b = copybn(number);\r
1659     Bignum xp = copybn(Zero);\r
1660     Bignum x = copybn(One);\r
1661     int sign = +1;\r
1662 \r
1663     assert(number[number[0]] != 0);\r
1664     assert(modulus[modulus[0]] != 0);\r
1665 \r
1666     while (bignum_cmp(b, One) != 0) {\r
1667         Bignum t, q;\r
1668 \r
1669         if (bignum_cmp(b, Zero) == 0) {\r
1670             /*\r
1671              * Found a common factor between the inputs, so we cannot\r
1672              * return a modular inverse at all.\r
1673              */\r
1674             freebn(b);\r
1675             freebn(a);\r
1676             freebn(xp);\r
1677             freebn(x);\r
1678             return NULL;\r
1679         }\r
1680 \r
1681         t = newbn(b[0]);\r
1682         q = newbn(a[0]);\r
1683         bigdivmod(a, b, t, q);\r
1684         while (t[0] > 1 && t[t[0]] == 0)\r
1685             t[0]--;\r
1686         while (q[0] > 1 && q[q[0]] == 0)\r
1687             q[0]--;\r
1688         freebn(a);\r
1689         a = b;\r
1690         b = t;\r
1691         t = xp;\r
1692         xp = x;\r
1693         x = bigmuladd(q, xp, t);\r
1694         sign = -sign;\r
1695         freebn(t);\r
1696         freebn(q);\r
1697     }\r
1698 \r
1699     freebn(b);\r
1700     freebn(a);\r
1701     freebn(xp);\r
1702 \r
1703     /* now we know that sign * x == 1, and that x < modulus */\r
1704     if (sign < 0) {\r
1705         /* set a new x to be modulus - x */\r
1706         Bignum newx = newbn(modulus[0]);\r
1707         BignumInt carry = 0;\r
1708         int maxspot = 1;\r
1709         int i;\r
1710 \r
1711         for (i = 1; i <= (int)newx[0]; i++) {\r
1712             BignumInt aword = (i <= (int)modulus[0] ? modulus[i] : 0);\r
1713             BignumInt bword = (i <= (int)x[0] ? x[i] : 0);\r
1714             newx[i] = aword - bword - carry;\r
1715             bword = ~bword;\r
1716             carry = carry ? (newx[i] >= bword) : (newx[i] > bword);\r
1717             if (newx[i] != 0)\r
1718                 maxspot = i;\r
1719         }\r
1720         newx[0] = maxspot;\r
1721         freebn(x);\r
1722         x = newx;\r
1723     }\r
1724 \r
1725     /* and return. */\r
1726     return x;\r
1727 }\r
1728 \r
1729 /*\r
1730  * Render a bignum into decimal. Return a malloced string holding\r
1731  * the decimal representation.\r
1732  */\r
1733 char *bignum_decimal(Bignum x)\r
1734 {\r
1735     int ndigits, ndigit;\r
1736     int i, iszero;\r
1737     BignumDblInt carry;\r
1738     char *ret;\r
1739     BignumInt *workspace;\r
1740 \r
1741     /*\r
1742      * First, estimate the number of digits. Since log(10)/log(2)\r
1743      * is just greater than 93/28 (the joys of continued fraction\r
1744      * approximations...) we know that for every 93 bits, we need\r
1745      * at most 28 digits. This will tell us how much to malloc.\r
1746      *\r
1747      * Formally: if x has i bits, that means x is strictly less\r
1748      * than 2^i. Since 2 is less than 10^(28/93), this is less than\r
1749      * 10^(28i/93). We need an integer power of ten, so we must\r
1750      * round up (rounding down might make it less than x again).\r
1751      * Therefore if we multiply the bit count by 28/93, rounding\r
1752      * up, we will have enough digits.\r
1753      *\r
1754      * i=0 (i.e., x=0) is an irritating special case.\r
1755      */\r
1756     i = bignum_bitcount(x);\r
1757     if (!i)\r
1758         ndigits = 1;                   /* x = 0 */\r
1759     else\r
1760         ndigits = (28 * i + 92) / 93;  /* multiply by 28/93 and round up */\r
1761     ndigits++;                         /* allow for trailing \0 */\r
1762     ret = snewn(ndigits, char);\r
1763 \r
1764     /*\r
1765      * Now allocate some workspace to hold the binary form as we\r
1766      * repeatedly divide it by ten. Initialise this to the\r
1767      * big-endian form of the number.\r
1768      */\r
1769     workspace = snewn(x[0], BignumInt);\r
1770     for (i = 0; i < (int)x[0]; i++)\r
1771         workspace[i] = x[x[0] - i];\r
1772 \r
1773     /*\r
1774      * Next, write the decimal number starting with the last digit.\r
1775      * We use ordinary short division, dividing 10 into the\r
1776      * workspace.\r
1777      */\r
1778     ndigit = ndigits - 1;\r
1779     ret[ndigit] = '\0';\r
1780     do {\r
1781         iszero = 1;\r
1782         carry = 0;\r
1783         for (i = 0; i < (int)x[0]; i++) {\r
1784             carry = (carry << BIGNUM_INT_BITS) + workspace[i];\r
1785             workspace[i] = (BignumInt) (carry / 10);\r
1786             if (workspace[i])\r
1787                 iszero = 0;\r
1788             carry %= 10;\r
1789         }\r
1790         ret[--ndigit] = (char) (carry + '0');\r
1791     } while (!iszero);\r
1792 \r
1793     /*\r
1794      * There's a chance we've fallen short of the start of the\r
1795      * string. Correct if so.\r
1796      */\r
1797     if (ndigit > 0)\r
1798         memmove(ret, ret + ndigit, ndigits - ndigit);\r
1799 \r
1800     /*\r
1801      * Done.\r
1802      */\r
1803     smemclr(workspace, x[0] * sizeof(*workspace));\r
1804     sfree(workspace);\r
1805     return ret;\r
1806 }\r
1807 \r
1808 #ifdef TESTBN\r
1809 \r
1810 #include <stdio.h>\r
1811 #include <stdlib.h>\r
1812 #include <ctype.h>\r
1813 \r
1814 /*\r
1815  * gcc -Wall -g -O0 -DTESTBN -o testbn sshbn.c misc.c conf.c tree234.c unix/uxmisc.c -I. -I unix -I charset\r
1816  *\r
1817  * Then feed to this program's standard input the output of\r
1818  * testdata/bignum.py .\r
1819  */\r
1820 \r
1821 void modalfatalbox(char *p, ...)\r
1822 {\r
1823     va_list ap;\r
1824     fprintf(stderr, "FATAL ERROR: ");\r
1825     va_start(ap, p);\r
1826     vfprintf(stderr, p, ap);\r
1827     va_end(ap);\r
1828     fputc('\n', stderr);\r
1829     exit(1);\r
1830 }\r
1831 \r
1832 #define fromxdigit(c) ( (c)>'9' ? ((c)&0xDF) - 'A' + 10 : (c) - '0' )\r
1833 \r
1834 int main(int argc, char **argv)\r
1835 {\r
1836     char *buf;\r
1837     int line = 0;\r
1838     int passes = 0, fails = 0;\r
1839 \r
1840     while ((buf = fgetline(stdin)) != NULL) {\r
1841         int maxlen = strlen(buf);\r
1842         unsigned char *data = snewn(maxlen, unsigned char);\r
1843         unsigned char *ptrs[5], *q;\r
1844         int ptrnum;\r
1845         char *bufp = buf;\r
1846 \r
1847         line++;\r
1848 \r
1849         q = data;\r
1850         ptrnum = 0;\r
1851 \r
1852         while (*bufp && !isspace((unsigned char)*bufp))\r
1853             bufp++;\r
1854         if (bufp)\r
1855             *bufp++ = '\0';\r
1856 \r
1857         while (*bufp) {\r
1858             char *start, *end;\r
1859             int i;\r
1860 \r
1861             while (*bufp && !isxdigit((unsigned char)*bufp))\r
1862                 bufp++;\r
1863             start = bufp;\r
1864 \r
1865             if (!*bufp)\r
1866                 break;\r
1867 \r
1868             while (*bufp && isxdigit((unsigned char)*bufp))\r
1869                 bufp++;\r
1870             end = bufp;\r
1871 \r
1872             if (ptrnum >= lenof(ptrs))\r
1873                 break;\r
1874             ptrs[ptrnum++] = q;\r
1875             \r
1876             for (i = -((end - start) & 1); i < end-start; i += 2) {\r
1877                 unsigned char val = (i < 0 ? 0 : fromxdigit(start[i]));\r
1878                 val = val * 16 + fromxdigit(start[i+1]);\r
1879                 *q++ = val;\r
1880             }\r
1881 \r
1882             ptrs[ptrnum] = q;\r
1883         }\r
1884 \r
1885         if (!strcmp(buf, "mul")) {\r
1886             Bignum a, b, c, p;\r
1887 \r
1888             if (ptrnum != 3) {\r
1889                 printf("%d: mul with %d parameters, expected 3\n", line, ptrnum);\r
1890                 exit(1);\r
1891             }\r
1892             a = bignum_from_bytes(ptrs[0], ptrs[1]-ptrs[0]);\r
1893             b = bignum_from_bytes(ptrs[1], ptrs[2]-ptrs[1]);\r
1894             c = bignum_from_bytes(ptrs[2], ptrs[3]-ptrs[2]);\r
1895             p = bigmul(a, b);\r
1896 \r
1897             if (bignum_cmp(c, p) == 0) {\r
1898                 passes++;\r
1899             } else {\r
1900                 char *as = bignum_decimal(a);\r
1901                 char *bs = bignum_decimal(b);\r
1902                 char *cs = bignum_decimal(c);\r
1903                 char *ps = bignum_decimal(p);\r
1904                 \r
1905                 printf("%d: fail: %s * %s gave %s expected %s\n",\r
1906                        line, as, bs, ps, cs);\r
1907                 fails++;\r
1908 \r
1909                 sfree(as);\r
1910                 sfree(bs);\r
1911                 sfree(cs);\r
1912                 sfree(ps);\r
1913             }\r
1914             freebn(a);\r
1915             freebn(b);\r
1916             freebn(c);\r
1917             freebn(p);\r
1918         } else if (!strcmp(buf, "modmul")) {\r
1919             Bignum a, b, m, c, p;\r
1920 \r
1921             if (ptrnum != 4) {\r
1922                 printf("%d: modmul with %d parameters, expected 4\n",\r
1923                        line, ptrnum);\r
1924                 exit(1);\r
1925             }\r
1926             a = bignum_from_bytes(ptrs[0], ptrs[1]-ptrs[0]);\r
1927             b = bignum_from_bytes(ptrs[1], ptrs[2]-ptrs[1]);\r
1928             m = bignum_from_bytes(ptrs[2], ptrs[3]-ptrs[2]);\r
1929             c = bignum_from_bytes(ptrs[3], ptrs[4]-ptrs[3]);\r
1930             p = modmul(a, b, m);\r
1931 \r
1932             if (bignum_cmp(c, p) == 0) {\r
1933                 passes++;\r
1934             } else {\r
1935                 char *as = bignum_decimal(a);\r
1936                 char *bs = bignum_decimal(b);\r
1937                 char *ms = bignum_decimal(m);\r
1938                 char *cs = bignum_decimal(c);\r
1939                 char *ps = bignum_decimal(p);\r
1940                 \r
1941                 printf("%d: fail: %s * %s mod %s gave %s expected %s\n",\r
1942                        line, as, bs, ms, ps, cs);\r
1943                 fails++;\r
1944 \r
1945                 sfree(as);\r
1946                 sfree(bs);\r
1947                 sfree(ms);\r
1948                 sfree(cs);\r
1949                 sfree(ps);\r
1950             }\r
1951             freebn(a);\r
1952             freebn(b);\r
1953             freebn(m);\r
1954             freebn(c);\r
1955             freebn(p);\r
1956         } else if (!strcmp(buf, "pow")) {\r
1957             Bignum base, expt, modulus, expected, answer;\r
1958 \r
1959             if (ptrnum != 4) {\r
1960                 printf("%d: mul with %d parameters, expected 4\n", line, ptrnum);\r
1961                 exit(1);\r
1962             }\r
1963 \r
1964             base = bignum_from_bytes(ptrs[0], ptrs[1]-ptrs[0]);\r
1965             expt = bignum_from_bytes(ptrs[1], ptrs[2]-ptrs[1]);\r
1966             modulus = bignum_from_bytes(ptrs[2], ptrs[3]-ptrs[2]);\r
1967             expected = bignum_from_bytes(ptrs[3], ptrs[4]-ptrs[3]);\r
1968             answer = modpow(base, expt, modulus);\r
1969 \r
1970             if (bignum_cmp(expected, answer) == 0) {\r
1971                 passes++;\r
1972             } else {\r
1973                 char *as = bignum_decimal(base);\r
1974                 char *bs = bignum_decimal(expt);\r
1975                 char *cs = bignum_decimal(modulus);\r
1976                 char *ds = bignum_decimal(answer);\r
1977                 char *ps = bignum_decimal(expected);\r
1978                 \r
1979                 printf("%d: fail: %s ^ %s mod %s gave %s expected %s\n",\r
1980                        line, as, bs, cs, ds, ps);\r
1981                 fails++;\r
1982 \r
1983                 sfree(as);\r
1984                 sfree(bs);\r
1985                 sfree(cs);\r
1986                 sfree(ds);\r
1987                 sfree(ps);\r
1988             }\r
1989             freebn(base);\r
1990             freebn(expt);\r
1991             freebn(modulus);\r
1992             freebn(expected);\r
1993             freebn(answer);\r
1994         } else {\r
1995             printf("%d: unrecognised test keyword: '%s'\n", line, buf);\r
1996             exit(1);\r
1997         }\r
1998 \r
1999         sfree(buf);\r
2000         sfree(data);\r
2001     }\r
2002 \r
2003     printf("passed %d failed %d total %d\n", passes, fails, passes+fails);\r
2004     return fails != 0;\r
2005 }\r
2006 \r
2007 #endif\r