usr/src/common/mpi/mpi.c

   1 /* BEGIN CSTYLED */
   2 /*
   3  *  mpi.c
   4  *
   5  *  Arbitrary precision integer arithmetic library
   6  *
   7  * ***** BEGIN LICENSE BLOCK *****
   8  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
   9  *
  10  * The contents of this file are subject to the Mozilla Public License Version
  11  * 1.1 (the "License"); you may not use this file except in compliance with
  12  * the License. You may obtain a copy of the License at
  13  * http://www.mozilla.org/MPL/
  14  *
  15  * Software distributed under the License is distributed on an "AS IS" basis,
  16  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  17  * for the specific language governing rights and limitations under the
  18  * License.
  19  *
  20  * The Original Code is the MPI Arbitrary Precision Integer Arithmetic library.
  21  *
  22  * The Initial Developer of the Original Code is
  23  * Michael J. Fromberger.
  24  * Portions created by the Initial Developer are Copyright (C) 1998
  25  * the Initial Developer. All Rights Reserved.
  26  *
  27  * Contributor(s):
  28  *   Netscape Communications Corporation
  29  *   Douglas Stebila <douglas@stebila.ca> of Sun Laboratories.
  30  *
  31  * Alternatively, the contents of this file may be used under the terms of
  32  * either the GNU General Public License Version 2 or later (the "GPL"), or
  33  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  34  * in which case the provisions of the GPL or the LGPL are applicable instead
  35  * of those above. If you wish to allow use of your version of this file only
  36  * under the terms of either the GPL or the LGPL, and not to allow others to
  37  * use your version of this file under the terms of the MPL, indicate your
  38  * decision by deleting the provisions above and replace them with the notice
  39  * and other provisions required by the GPL or the LGPL. If you do not delete
  40  * the provisions above, a recipient may use your version of this file under
  41  * the terms of any one of the MPL, the GPL or the LGPL.
  42  *
  43  * ***** END LICENSE BLOCK ***** */
  44 /*
  45  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  46  *
  47  * Sun elects to use this software under the MPL license.
  48  */
  49
  50 /* $Id: mpi.c,v 1.45 2006/09/29 20:12:21 alexei.volkov.bugs%sun.com Exp $ */
  51
  52 #include "mpi-priv.h"
  53 #if defined(OSF1)
  54 #include <c_asm.h>
  55 #endif
  56
  57 #if MP_LOGTAB
  58 /*
  59   A table of the logs of 2 for various bases (the 0 and 1 entries of
  60   this table are meaningless and should not be referenced).
  61
  62   This table is used to compute output lengths for the mp_toradix()
  63   function.  Since a number n in radix r takes up about log_r(n)
  64   digits, we estimate the output size by taking the least integer
  65   greater than log_r(n), where:
  66
  67   log_r(n) = log_2(n) * log_r(2)
  68
  69   This table, therefore, is a table of log_r(2) for 2 <= r <= 36,
  70   which are the output bases supported.
  71  */
  72 #include "logtab.h"
  73 #endif
  74
  75 /* {{{ Constant strings */
  76
  77 /* Constant strings returned by mp_strerror() */
  78 static const char *mp_err_string[] = {
  79   "unknown result code",     /* say what?            */
  80   "boolean true",            /* MP_OKAY, MP_YES      */
  81   "boolean false",           /* MP_NO                */
  82   "out of memory",           /* MP_MEM               */
  83   "argument out of range",   /* MP_RANGE             */
  84   "invalid input parameter", /* MP_BADARG            */
  85   "result is undefined"      /* MP_UNDEF             */
  86 };
  87
  88 /* Value to digit maps for radix conversion   */
  89
  90 /* s_dmap_1 - standard digits and letters */
  91 static const char *s_dmap_1 =
  92   "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
  93
  94 /* }}} */
  95
  96 unsigned long mp_allocs;
  97 unsigned long mp_frees;
  98 unsigned long mp_copies;
  99
 100 /* {{{ Default precision manipulation */
 101
 102 /* Default precision for newly created mp_int's      */
 103 static mp_size s_mp_defprec = MP_DEFPREC;
 104
 105 mp_size mp_get_prec(void)
 106 {
 107   return s_mp_defprec;
 108
 109 } /* end mp_get_prec() */
 110
 111 void         mp_set_prec(mp_size prec)
 112 {
 113   if(prec == 0)
 114     s_mp_defprec = MP_DEFPREC;
 115   else
 116     s_mp_defprec = prec;
 117
 118 } /* end mp_set_prec() */
 119
 120 /* }}} */
 121
 122 /*------------------------------------------------------------------------*/
 123 /* {{{ mp_init(mp, kmflag) */
 124
 125 /*
 126   mp_init(mp, kmflag)
 127
 128   Initialize a new zero-valued mp_int.  Returns MP_OKAY if successful,
 129   MP_MEM if memory could not be allocated for the structure.
 130  */
 131
 132 mp_err mp_init(mp_int *mp, int kmflag)
 133 {
 134   return mp_init_size(mp, s_mp_defprec, kmflag);
 135
 136 } /* end mp_init() */
 137
 138 /* }}} */
 139
 140 /* {{{ mp_init_size(mp, prec, kmflag) */
 141
 142 /*
 143   mp_init_size(mp, prec, kmflag)
 144
 145   Initialize a new zero-valued mp_int with at least the given
 146   precision; returns MP_OKAY if successful, or MP_MEM if memory could
 147   not be allocated for the structure.
 148  */
 149
 150 mp_err mp_init_size(mp_int *mp, mp_size prec, int kmflag)
 151 {
 152   ARGCHK(mp != NULL && prec > 0, MP_BADARG);
 153
 154   prec = MP_ROUNDUP(prec, s_mp_defprec);
 155   if((DIGITS(mp) = s_mp_alloc(prec, sizeof(mp_digit), kmflag)) == NULL)
 156     return MP_MEM;
 157
 158   SIGN(mp) = ZPOS;
 159   USED(mp) = 1;
 160   ALLOC(mp) = prec;
 161   FLAG(mp) = kmflag;
 162
 163   return MP_OKAY;
 164
 165 } /* end mp_init_size() */
 166
 167 /* }}} */
 168
 169 /* {{{ mp_init_copy(mp, from) */
 170
 171 /*
 172   mp_init_copy(mp, from)
 173
 174   Initialize mp as an exact copy of from.  Returns MP_OKAY if
 175   successful, MP_MEM if memory could not be allocated for the new
 176   structure.
 177  */
 178
 179 mp_err mp_init_copy(mp_int *mp, const mp_int *from)
 180 {
 181   ARGCHK(mp != NULL && from != NULL, MP_BADARG);
 182
 183   if(mp == from)
 184     return MP_OKAY;
 185
 186   if((DIGITS(mp) = s_mp_alloc(ALLOC(from), sizeof(mp_digit), FLAG(from))) == NULL)
 187     return MP_MEM;
 188
 189   s_mp_copy(DIGITS(from), DIGITS(mp), USED(from));
 190   USED(mp) = USED(from);
 191   ALLOC(mp) = ALLOC(from);
 192   SIGN(mp) = SIGN(from);
 193   FLAG(mp) = FLAG(from);
 194
 195   return MP_OKAY;
 196
 197 } /* end mp_init_copy() */
 198
 199 /* }}} */
 200
 201 /* {{{ mp_copy(from, to) */
 202
 203 /*
 204   mp_copy(from, to)
 205
 206   Copies the mp_int 'from' to the mp_int 'to'.  It is presumed that
 207   'to' has already been initialized (if not, use mp_init_copy()
 208   instead). If 'from' and 'to' are identical, nothing happens.
 209  */
 210
 211 mp_err mp_copy(const mp_int *from, mp_int *to)
 212 {
 213   ARGCHK(from != NULL && to != NULL, MP_BADARG);
 214
 215   if(from == to)
 216     return MP_OKAY;
 217
 218   ++mp_copies;
 219   { /* copy */
 220     mp_digit   *tmp;
 221
 222     /*
 223       If the allocated buffer in 'to' already has enough space to hold
 224       all the used digits of 'from', we'll re-use it to avoid hitting
 225       the memory allocater more than necessary; otherwise, we'd have
 226       to grow anyway, so we just allocate a hunk and make the copy as
 227       usual
 228      */
 229     if(ALLOC(to) >= USED(from)) {
 230       s_mp_setz(DIGITS(to) + USED(from), ALLOC(to) - USED(from));
 231       s_mp_copy(DIGITS(from), DIGITS(to), USED(from));
 232
 233     } else {
 234       if((tmp = s_mp_alloc(ALLOC(from), sizeof(mp_digit), FLAG(from))) == NULL)
 235         return MP_MEM;
 236
 237       s_mp_copy(DIGITS(from), tmp, USED(from));
 238
 239       if(DIGITS(to) != NULL) {
 240 #if MP_CRYPTO
 241         s_mp_setz(DIGITS(to), ALLOC(to));
 242 #endif
 243         s_mp_free(DIGITS(to), ALLOC(to));
 244       }
 245
 246       DIGITS(to) = tmp;
 247       ALLOC(to) = ALLOC(from);
 248     }
 249
 250     /* Copy the precision and sign from the original */
 251     USED(to) = USED(from);
 252     SIGN(to) = SIGN(from);
 253     FLAG(to) = FLAG(from);
 254   } /* end copy */
 255
 256   return MP_OKAY;
 257
 258 } /* end mp_copy() */
 259
 260 /* }}} */
 261
 262 /* {{{ mp_exch(mp1, mp2) */
 263
 264 /*
 265   mp_exch(mp1, mp2)
 266
 267   Exchange mp1 and mp2 without allocating any intermediate memory
 268   (well, unless you count the stack space needed for this call and the
 269   locals it creates...).  This cannot fail.
 270  */
 271
 272 void mp_exch(mp_int *mp1, mp_int *mp2)
 273 {
 274 #if MP_ARGCHK == 2
 275   assert(mp1 != NULL && mp2 != NULL);
 276 #else
 277   if(mp1 == NULL || mp2 == NULL)
 278     return;
 279 #endif
 280
 281   s_mp_exch(mp1, mp2);
 282
 283 } /* end mp_exch() */
 284
 285 /* }}} */
 286
 287 /* {{{ mp_clear(mp) */
 288
 289 /*
 290   mp_clear(mp)
 291
 292   Release the storage used by an mp_int, and void its fields so that
 293   if someone calls mp_clear() again for the same int later, we won't
 294   get tollchocked.
 295  */
 296
 297 void   mp_clear(mp_int *mp)
 298 {
 299   if(mp == NULL)
 300     return;
 301
 302   if(DIGITS(mp) != NULL) {
 303 #if MP_CRYPTO
 304     s_mp_setz(DIGITS(mp), ALLOC(mp));
 305 #endif
 306     s_mp_free(DIGITS(mp), ALLOC(mp));
 307     DIGITS(mp) = NULL;
 308   }
 309
 310   USED(mp) = 0;
 311   ALLOC(mp) = 0;
 312
 313 } /* end mp_clear() */
 314
 315 /* }}} */
 316
 317 /* {{{ mp_zero(mp) */
 318
 319 /*
 320   mp_zero(mp)
 321
 322   Set mp to zero.  Does not change the allocated size of the structure,
 323   and therefore cannot fail (except on a bad argument, which we ignore)
 324  */
 325 void   mp_zero(mp_int *mp)
 326 {
 327   if(mp == NULL)
 328     return;
 329
 330   s_mp_setz(DIGITS(mp), ALLOC(mp));
 331   USED(mp) = 1;
 332   SIGN(mp) = ZPOS;
 333
 334 } /* end mp_zero() */
 335
 336 /* }}} */
 337
 338 /* {{{ mp_set(mp, d) */
 339
 340 void   mp_set(mp_int *mp, mp_digit d)
 341 {
 342   if(mp == NULL)
 343     return;
 344
 345   mp_zero(mp);
 346   DIGIT(mp, 0) = d;
 347
 348 } /* end mp_set() */
 349
 350 /* }}} */
 351
 352 /* {{{ mp_set_int(mp, z) */
 353
 354 mp_err mp_set_int(mp_int *mp, long z)
 355 {
 356   int            ix;
 357   unsigned long  v = labs(z);
 358   mp_err         res;
 359
 360   ARGCHK(mp != NULL, MP_BADARG);
 361
 362   mp_zero(mp);
 363   if(z == 0)
 364     return MP_OKAY;  /* shortcut for zero */
 365
 366   if (sizeof v <= sizeof(mp_digit)) {
 367     DIGIT(mp,0) = v;
 368   } else {
 369     for (ix = sizeof(long) - 1; ix >= 0; ix--) {
 370       if ((res = s_mp_mul_d(mp, (UCHAR_MAX + 1))) != MP_OKAY)
 371         return res;
 372
 373       res = s_mp_add_d(mp, (mp_digit)((v >> (ix * CHAR_BIT)) & UCHAR_MAX));
 374       if (res != MP_OKAY)
 375         return res;
 376     }
 377   }
 378   if(z < 0)
 379     SIGN(mp) = NEG;
 380
 381   return MP_OKAY;
 382
 383 } /* end mp_set_int() */
 384
 385 /* }}} */
 386
 387 /* {{{ mp_set_ulong(mp, z) */
 388
 389 mp_err mp_set_ulong(mp_int *mp, unsigned long z)
 390 {
 391   int            ix;
 392   mp_err         res;
 393
 394   ARGCHK(mp != NULL, MP_BADARG);
 395
 396   mp_zero(mp);
 397   if(z == 0)
 398     return MP_OKAY;  /* shortcut for zero */
 399
 400   if (sizeof z <= sizeof(mp_digit)) {
 401     DIGIT(mp,0) = z;
 402   } else {
 403     for (ix = sizeof(long) - 1; ix >= 0; ix--) {
 404       if ((res = s_mp_mul_d(mp, (UCHAR_MAX + 1))) != MP_OKAY)
 405         return res;
 406
 407       res = s_mp_add_d(mp, (mp_digit)((z >> (ix * CHAR_BIT)) & UCHAR_MAX));
 408       if (res != MP_OKAY)
 409         return res;
 410     }
 411   }
 412   return MP_OKAY;
 413 } /* end mp_set_ulong() */
 414
 415 /* }}} */
 416
 417 /*------------------------------------------------------------------------*/
 418 /* {{{ Digit arithmetic */
 419
 420 /* {{{ mp_add_d(a, d, b) */
 421
 422 /*
 423   mp_add_d(a, d, b)
 424
 425   Compute the sum b = a + d, for a single digit d.  Respects the sign of
 426   its primary addend (single digits are unsigned anyway).
 427  */
 428
 429 mp_err mp_add_d(const mp_int *a, mp_digit d, mp_int *b)
 430 {
 431   mp_int   tmp;
 432   mp_err   res;
 433
 434   ARGCHK(a != NULL && b != NULL, MP_BADARG);
 435
 436   if((res = mp_init_copy(&tmp, a)) != MP_OKAY)
 437     return res;
 438
 439   if(SIGN(&tmp) == ZPOS) {
 440     if((res = s_mp_add_d(&tmp, d)) != MP_OKAY)
 441       goto CLEANUP;
 442   } else if(s_mp_cmp_d(&tmp, d) >= 0) {
 443     if((res = s_mp_sub_d(&tmp, d)) != MP_OKAY)
 444       goto CLEANUP;
 445   } else {
 446     mp_neg(&tmp, &tmp);
 447
 448     DIGIT(&tmp, 0) = d - DIGIT(&tmp, 0);
 449   }
 450
 451   if(s_mp_cmp_d(&tmp, 0) == 0)
 452     SIGN(&tmp) = ZPOS;
 453
 454   s_mp_exch(&tmp, b);
 455
 456 CLEANUP:
 457   mp_clear(&tmp);
 458   return res;
 459
 460 } /* end mp_add_d() */
 461
 462 /* }}} */
 463
 464 /* {{{ mp_sub_d(a, d, b) */
 465
 466 /*
 467   mp_sub_d(a, d, b)
 468
 469   Compute the difference b = a - d, for a single digit d.  Respects the
 470   sign of its subtrahend (single digits are unsigned anyway).
 471  */
 472
 473 mp_err mp_sub_d(const mp_int *a, mp_digit d, mp_int *b)
 474 {
 475   mp_int   tmp;
 476   mp_err   res;
 477
 478   ARGCHK(a != NULL && b != NULL, MP_BADARG);
 479
 480   if((res = mp_init_copy(&tmp, a)) != MP_OKAY)
 481     return res;
 482
 483   if(SIGN(&tmp) == NEG) {
 484     if((res = s_mp_add_d(&tmp, d)) != MP_OKAY)
 485       goto CLEANUP;
 486   } else if(s_mp_cmp_d(&tmp, d) >= 0) {
 487     if((res = s_mp_sub_d(&tmp, d)) != MP_OKAY)
 488       goto CLEANUP;
 489   } else {
 490     mp_neg(&tmp, &tmp);
 491
 492     DIGIT(&tmp, 0) = d - DIGIT(&tmp, 0);
 493     SIGN(&tmp) = NEG;
 494   }
 495
 496   if(s_mp_cmp_d(&tmp, 0) == 0)
 497     SIGN(&tmp) = ZPOS;
 498
 499   s_mp_exch(&tmp, b);
 500
 501 CLEANUP:
 502   mp_clear(&tmp);
 503   return res;
 504
 505 } /* end mp_sub_d() */
 506
 507 /* }}} */
 508
 509 /* {{{ mp_mul_d(a, d, b) */
 510
 511 /*
 512   mp_mul_d(a, d, b)
 513
 514   Compute the product b = a * d, for a single digit d.  Respects the sign
 515   of its multiplicand (single digits are unsigned anyway)
 516  */
 517
 518 mp_err mp_mul_d(const mp_int *a, mp_digit d, mp_int *b)
 519 {
 520   mp_err  res;
 521
 522   ARGCHK(a != NULL && b != NULL, MP_BADARG);
 523
 524   if(d == 0) {
 525     mp_zero(b);
 526     return MP_OKAY;
 527   }
 528
 529   if((res = mp_copy(a, b)) != MP_OKAY)
 530     return res;
 531
 532   res = s_mp_mul_d(b, d);
 533
 534   return res;
 535
 536 } /* end mp_mul_d() */
 537
 538 /* }}} */
 539
 540 /* {{{ mp_mul_2(a, c) */
 541
 542 mp_err mp_mul_2(const mp_int *a, mp_int *c)
 543 {
 544   mp_err  res;
 545
 546   ARGCHK(a != NULL && c != NULL, MP_BADARG);
 547
 548   if((res = mp_copy(a, c)) != MP_OKAY)
 549     return res;
 550
 551   return s_mp_mul_2(c);
 552
 553 } /* end mp_mul_2() */
 554
 555 /* }}} */
 556
 557 /* {{{ mp_div_d(a, d, q, r) */
 558
 559 /*
 560   mp_div_d(a, d, q, r)
 561
 562   Compute the quotient q = a / d and remainder r = a mod d, for a
 563   single digit d.  Respects the sign of its divisor (single digits are
 564   unsigned anyway).
 565  */
 566
 567 mp_err mp_div_d(const mp_int *a, mp_digit d, mp_int *q, mp_digit *r)
 568 {
 569   mp_err   res;
 570   mp_int   qp;
 571   mp_digit rem;
 572   int      pow;
 573
 574   ARGCHK(a != NULL, MP_BADARG);
 575
 576   if(d == 0)
 577     return MP_RANGE;
 578
 579   /* Shortcut for powers of two ... */
 580   if((pow = s_mp_ispow2d(d)) >= 0) {
 581     mp_digit  mask;
 582
 583     mask = ((mp_digit)1 << pow) - 1;
 584     rem = DIGIT(a, 0) & mask;
 585
 586     if(q) {
 587       mp_copy(a, q);
 588       s_mp_div_2d(q, pow);
 589     }
 590
 591     if(r)
 592       *r = rem;
 593
 594     return MP_OKAY;
 595   }
 596
 597   if((res = mp_init_copy(&qp, a)) != MP_OKAY)
 598     return res;
 599
 600   res = s_mp_div_d(&qp, d, &rem);
 601
 602   if(s_mp_cmp_d(&qp, 0) == 0)
 603     SIGN(q) = ZPOS;
 604
 605   if(r)
 606     *r = rem;
 607
 608   if(q)
 609     s_mp_exch(&qp, q);
 610
 611   mp_clear(&qp);
 612   return res;
 613
 614 } /* end mp_div_d() */
 615
 616 /* }}} */
 617
 618 /* {{{ mp_div_2(a, c) */
 619
 620 /*
 621   mp_div_2(a, c)
 622
 623   Compute c = a / 2, disregarding the remainder.
 624  */
 625
 626 mp_err mp_div_2(const mp_int *a, mp_int *c)
 627 {
 628   mp_err  res;
 629
 630   ARGCHK(a != NULL && c != NULL, MP_BADARG);
 631
 632   if((res = mp_copy(a, c)) != MP_OKAY)
 633     return res;
 634
 635   s_mp_div_2(c);
 636
 637   return MP_OKAY;
 638
 639 } /* end mp_div_2() */
 640
 641 /* }}} */
 642
 643 /* {{{ mp_expt_d(a, d, b) */
 644
 645 mp_err mp_expt_d(const mp_int *a, mp_digit d, mp_int *c)
 646 {
 647   mp_int   s, x;
 648   mp_err   res;
 649
 650   ARGCHK(a != NULL && c != NULL, MP_BADARG);
 651
 652   if((res = mp_init(&s, FLAG(a))) != MP_OKAY)
 653     return res;
 654   if((res = mp_init_copy(&x, a)) != MP_OKAY)
 655     goto X;
 656
 657   DIGIT(&s, 0) = 1;
 658
 659   while(d != 0) {
 660     if(d & 1) {
 661       if((res = s_mp_mul(&s, &x)) != MP_OKAY)
 662         goto CLEANUP;
 663     }
 664
 665     d /= 2;
 666
 667     if((res = s_mp_sqr(&x)) != MP_OKAY)
 668       goto CLEANUP;
 669   }
 670
 671   s_mp_exch(&s, c);
 672
 673 CLEANUP:
 674   mp_clear(&x);
 675 X:
 676   mp_clear(&s);
 677
 678   return res;
 679
 680 } /* end mp_expt_d() */
 681
 682 /* }}} */
 683
 684 /* }}} */
 685
 686 /*------------------------------------------------------------------------*/
 687 /* {{{ Full arithmetic */
 688
 689 /* {{{ mp_abs(a, b) */
 690
 691 /*
 692   mp_abs(a, b)
 693
 694   Compute b = |a|.  'a' and 'b' may be identical.
 695  */
 696
 697 mp_err mp_abs(const mp_int *a, mp_int *b)
 698 {
 699   mp_err   res;
 700
 701   ARGCHK(a != NULL && b != NULL, MP_BADARG);
 702
 703   if((res = mp_copy(a, b)) != MP_OKAY)
 704     return res;
 705
 706   SIGN(b) = ZPOS;
 707
 708   return MP_OKAY;
 709
 710 } /* end mp_abs() */
 711
 712 /* }}} */
 713
 714 /* {{{ mp_neg(a, b) */
 715
 716 /*
 717   mp_neg(a, b)
 718
 719   Compute b = -a.  'a' and 'b' may be identical.
 720  */
 721
 722 mp_err mp_neg(const mp_int *a, mp_int *b)
 723 {
 724   mp_err   res;
 725
 726   ARGCHK(a != NULL && b != NULL, MP_BADARG);
 727
 728   if((res = mp_copy(a, b)) != MP_OKAY)
 729     return res;
 730
 731   if(s_mp_cmp_d(b, 0) == MP_EQ)
 732     SIGN(b) = ZPOS;
 733   else
 734     SIGN(b) = (SIGN(b) == NEG) ? ZPOS : NEG;
 735
 736   return MP_OKAY;
 737
 738 } /* end mp_neg() */
 739
 740 /* }}} */
 741
 742 /* {{{ mp_add(a, b, c) */
 743
 744 /*
 745   mp_add(a, b, c)
 746
 747   Compute c = a + b.  All parameters may be identical.
 748  */
 749
 750 mp_err mp_add(const mp_int *a, const mp_int *b, mp_int *c)
 751 {
 752   mp_err  res;
 753
 754   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
 755
 756   if(SIGN(a) == SIGN(b)) { /* same sign:  add values, keep sign */
 757     MP_CHECKOK( s_mp_add_3arg(a, b, c) );
 758   } else if(s_mp_cmp(a, b) >= 0) {  /* different sign: |a| >= |b|   */
 759     MP_CHECKOK( s_mp_sub_3arg(a, b, c) );
 760   } else {                          /* different sign: |a|  < |b|   */
 761     MP_CHECKOK( s_mp_sub_3arg(b, a, c) );
 762   }
 763
 764   if (s_mp_cmp_d(c, 0) == MP_EQ)
 765     SIGN(c) = ZPOS;
 766
 767 CLEANUP:
 768   return res;
 769
 770 } /* end mp_add() */
 771
 772 /* }}} */
 773
 774 /* {{{ mp_sub(a, b, c) */
 775
 776 /*
 777   mp_sub(a, b, c)
 778
 779   Compute c = a - b.  All parameters may be identical.
 780  */
 781
 782 mp_err mp_sub(const mp_int *a, const mp_int *b, mp_int *c)
 783 {
 784   mp_err  res;
 785   int     magDiff;
 786
 787   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
 788
 789   if (a == b) {
 790     mp_zero(c);
 791     return MP_OKAY;
 792   }
 793
 794   if (MP_SIGN(a) != MP_SIGN(b)) {
 795     MP_CHECKOK( s_mp_add_3arg(a, b, c) );
 796   } else if (!(magDiff = s_mp_cmp(a, b))) {
 797     mp_zero(c);
 798     res = MP_OKAY;
 799   } else if (magDiff > 0) {
 800     MP_CHECKOK( s_mp_sub_3arg(a, b, c) );
 801   } else {
 802     MP_CHECKOK( s_mp_sub_3arg(b, a, c) );
 803     MP_SIGN(c) = !MP_SIGN(a);
 804   }
 805
 806   if (s_mp_cmp_d(c, 0) == MP_EQ)
 807     MP_SIGN(c) = MP_ZPOS;
 808
 809 CLEANUP:
 810   return res;
 811
 812 } /* end mp_sub() */
 813
 814 /* }}} */
 815
 816 /* {{{ mp_mul(a, b, c) */
 817
 818 /*
 819   mp_mul(a, b, c)
 820
 821   Compute c = a * b.  All parameters may be identical.
 822  */
 823 mp_err   mp_mul(const mp_int *a, const mp_int *b, mp_int * c)
 824 {
 825   mp_digit *pb;
 826   mp_int   tmp;
 827   mp_err   res;
 828   mp_size  ib;
 829   mp_size  useda, usedb;
 830
 831   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
 832
 833   if (a == c) {
 834     if ((res = mp_init_copy(&tmp, a)) != MP_OKAY)
 835       return res;
 836     if (a == b)
 837       b = &tmp;
 838     a = &tmp;
 839   } else if (b == c) {
 840     if ((res = mp_init_copy(&tmp, b)) != MP_OKAY)
 841       return res;
 842     b = &tmp;
 843   } else {
 844     MP_DIGITS(&tmp) = 0;
 845   }
 846
 847   if (MP_USED(a) < MP_USED(b)) {
 848     const mp_int *xch = b;      /* switch a and b, to do fewer outer loops */
 849     b = a;
 850     a = xch;
 851   }
 852
 853   MP_USED(c) = 1; MP_DIGIT(c, 0) = 0;
 854   if((res = s_mp_pad(c, USED(a) + USED(b))) != MP_OKAY)
 855     goto CLEANUP;
 856
 857 #ifdef NSS_USE_COMBA
 858   if ((MP_USED(a) == MP_USED(b)) && IS_POWER_OF_2(MP_USED(b))) {
 859       if (MP_USED(a) == 4) {
 860           s_mp_mul_comba_4(a, b, c);
 861           goto CLEANUP;
 862       }
 863       if (MP_USED(a) == 8) {
 864           s_mp_mul_comba_8(a, b, c);
 865           goto CLEANUP;
 866       }
 867       if (MP_USED(a) == 16) {
 868           s_mp_mul_comba_16(a, b, c);
 869           goto CLEANUP;
 870       }
 871       if (MP_USED(a) == 32) {
 872           s_mp_mul_comba_32(a, b, c);
 873           goto CLEANUP;
 874       }
 875   }
 876 #endif
 877
 878   pb = MP_DIGITS(b);
 879   s_mpv_mul_d(MP_DIGITS(a), MP_USED(a), *pb++, MP_DIGITS(c));
 880
 881   /* Outer loop:  Digits of b */
 882   useda = MP_USED(a);
 883   usedb = MP_USED(b);
 884   for (ib = 1; ib < usedb; ib++) {
 885     mp_digit b_i    = *pb++;
 886
 887     /* Inner product:  Digits of a */
 888     if (b_i)
 889       s_mpv_mul_d_add(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib);
 890     else
 891       MP_DIGIT(c, ib + useda) = b_i;
 892   }
 893
 894   s_mp_clamp(c);
 895
 896   if(SIGN(a) == SIGN(b) || s_mp_cmp_d(c, 0) == MP_EQ)
 897     SIGN(c) = ZPOS;
 898   else
 899     SIGN(c) = NEG;
 900
 901 CLEANUP:
 902   mp_clear(&tmp);
 903   return res;
 904 } /* end mp_mul() */
 905
 906 /* }}} */
 907
 908 /* {{{ mp_sqr(a, sqr) */
 909
 910 #if MP_SQUARE
 911 /*
 912   Computes the square of a.  This can be done more
 913   efficiently than a general multiplication, because many of the
 914   computation steps are redundant when squaring.  The inner product
 915   step is a bit more complicated, but we save a fair number of
 916   iterations of the multiplication loop.
 917  */
 918
 919 /* sqr = a^2;   Caller provides both a and tmp; */
 920 mp_err   mp_sqr(const mp_int *a, mp_int *sqr)
 921 {
 922   mp_digit *pa;
 923   mp_digit d;
 924   mp_err   res;
 925   mp_size  ix;
 926   mp_int   tmp;
 927   int      count;
 928
 929   ARGCHK(a != NULL && sqr != NULL, MP_BADARG);
 930
 931   if (a == sqr) {
 932     if((res = mp_init_copy(&tmp, a)) != MP_OKAY)
 933       return res;
 934     a = &tmp;
 935   } else {
 936     DIGITS(&tmp) = 0;
 937     res = MP_OKAY;
 938   }
 939
 940   ix = 2 * MP_USED(a);
 941   if (ix > MP_ALLOC(sqr)) {
 942     MP_USED(sqr) = 1;
 943     MP_CHECKOK( s_mp_grow(sqr, ix) );
 944   }
 945   MP_USED(sqr) = ix;
 946   MP_DIGIT(sqr, 0) = 0;
 947
 948 #ifdef NSS_USE_COMBA
 949   if (IS_POWER_OF_2(MP_USED(a))) {
 950       if (MP_USED(a) == 4) {
 951           s_mp_sqr_comba_4(a, sqr);
 952           goto CLEANUP;
 953       }
 954       if (MP_USED(a) == 8) {
 955           s_mp_sqr_comba_8(a, sqr);
 956           goto CLEANUP;
 957       }
 958       if (MP_USED(a) == 16) {
 959           s_mp_sqr_comba_16(a, sqr);
 960           goto CLEANUP;
 961       }
 962       if (MP_USED(a) == 32) {
 963           s_mp_sqr_comba_32(a, sqr);
 964           goto CLEANUP;
 965       }
 966   }
 967 #endif
 968
 969   pa = MP_DIGITS(a);
 970   count = MP_USED(a) - 1;
 971   if (count > 0) {
 972     d = *pa++;
 973     s_mpv_mul_d(pa, count, d, MP_DIGITS(sqr) + 1);
 974     for (ix = 3; --count > 0; ix += 2) {
 975       d = *pa++;
 976       s_mpv_mul_d_add(pa, count, d, MP_DIGITS(sqr) + ix);
 977     } /* for(ix ...) */
 978     MP_DIGIT(sqr, MP_USED(sqr)-1) = 0; /* above loop stopped short of this. */
 979
 980     /* now sqr *= 2 */
 981     s_mp_mul_2(sqr);
 982   } else {
 983     MP_DIGIT(sqr, 1) = 0;
 984   }
 985
 986   /* now add the squares of the digits of a to sqr. */
 987   s_mpv_sqr_add_prop(MP_DIGITS(a), MP_USED(a), MP_DIGITS(sqr));
 988
 989   SIGN(sqr) = ZPOS;
 990   s_mp_clamp(sqr);
 991
 992 CLEANUP:
 993   mp_clear(&tmp);
 994   return res;
 995
 996 } /* end mp_sqr() */
 997 #endif
 998
 999 /* }}} */
1000
1001 /* {{{ mp_div(a, b, q, r) */
1002
1003 /*
1004   mp_div(a, b, q, r)
1005
1006   Compute q = a / b and r = a mod b.  Input parameters may be re-used
1007   as output parameters.  If q or r is NULL, that portion of the
1008   computation will be discarded (although it will still be computed)
1009  */
1010 mp_err mp_div(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r)
1011 {
1012   mp_err   res;
1013   mp_int   *pQ, *pR;
1014   mp_int   qtmp, rtmp, btmp;
1015   int      cmp;
1016   mp_sign  signA;
1017   mp_sign  signB;
1018
1019   ARGCHK(a != NULL && b != NULL, MP_BADARG);
1020
1021   signA = MP_SIGN(a);
1022   signB = MP_SIGN(b);
1023
1024   if(mp_cmp_z(b) == MP_EQ)
1025     return MP_RANGE;
1026
1027   DIGITS(&qtmp) = 0;
1028   DIGITS(&rtmp) = 0;
1029   DIGITS(&btmp) = 0;
1030
1031   /* Set up some temporaries... */
1032   if (!r || r == a || r == b) {
1033     MP_CHECKOK( mp_init_copy(&rtmp, a) );
1034     pR = &rtmp;
1035   } else {
1036     MP_CHECKOK( mp_copy(a, r) );
1037     pR = r;
1038   }
1039
1040   if (!q || q == a || q == b) {
1041     MP_CHECKOK( mp_init_size(&qtmp, MP_USED(a), FLAG(a)) );
1042     pQ = &qtmp;
1043   } else {
1044     MP_CHECKOK( s_mp_pad(q, MP_USED(a)) );
1045     pQ = q;
1046     mp_zero(pQ);
1047   }
1048
1049   /*
1050     If |a| <= |b|, we can compute the solution without division;
1051     otherwise, we actually do the work required.
1052    */
1053   if ((cmp = s_mp_cmp(a, b)) <= 0) {
1054     if (cmp) {
1055       /* r was set to a above. */
1056       mp_zero(pQ);
1057     } else {
1058       mp_set(pQ, 1);
1059       mp_zero(pR);
1060     }
1061   } else {
1062     MP_CHECKOK( mp_init_copy(&btmp, b) );
1063     MP_CHECKOK( s_mp_div(pR, &btmp, pQ) );
1064   }
1065
1066   /* Compute the signs for the output  */
1067   MP_SIGN(pR) = signA;   /* Sr = Sa              */
1068   /* Sq = ZPOS if Sa == Sb */ /* Sq = NEG if Sa != Sb */
1069   MP_SIGN(pQ) = (signA == signB) ? ZPOS : NEG;
1070
1071   if(s_mp_cmp_d(pQ, 0) == MP_EQ)
1072     SIGN(pQ) = ZPOS;
1073   if(s_mp_cmp_d(pR, 0) == MP_EQ)
1074     SIGN(pR) = ZPOS;
1075
1076   /* Copy output, if it is needed      */
1077   if(q && q != pQ)
1078     s_mp_exch(pQ, q);
1079
1080   if(r && r != pR)
1081     s_mp_exch(pR, r);
1082
1083 CLEANUP:
1084   mp_clear(&btmp);
1085   mp_clear(&rtmp);
1086   mp_clear(&qtmp);
1087
1088   return res;
1089
1090 } /* end mp_div() */
1091
1092 /* }}} */
1093
1094 /* {{{ mp_div_2d(a, d, q, r) */
1095
1096 mp_err mp_div_2d(const mp_int *a, mp_digit d, mp_int *q, mp_int *r)
1097 {
1098   mp_err  res;
1099
1100   ARGCHK(a != NULL, MP_BADARG);
1101
1102   if(q) {
1103     if((res = mp_copy(a, q)) != MP_OKAY)
1104       return res;
1105   }
1106   if(r) {
1107     if((res = mp_copy(a, r)) != MP_OKAY)
1108       return res;
1109   }
1110   if(q) {
1111     s_mp_div_2d(q, d);
1112   }
1113   if(r) {
1114     s_mp_mod_2d(r, d);
1115   }
1116
1117   return MP_OKAY;
1118
1119 } /* end mp_div_2d() */
1120
1121 /* }}} */
1122
1123 /* {{{ mp_expt(a, b, c) */
1124
1125 /*
1126   mp_expt(a, b, c)
1127
1128   Compute c = a ** b, that is, raise a to the b power.  Uses a
1129   standard iterative square-and-multiply technique.
1130  */
1131
1132 mp_err mp_expt(mp_int *a, mp_int *b, mp_int *c)
1133 {
1134   mp_int   s, x;
1135   mp_err   res;
1136   mp_digit d;
1137   int      dig, bit;
1138
1139   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
1140
1141   if(mp_cmp_z(b) < 0)
1142     return MP_RANGE;
1143
1144   if((res = mp_init(&s, FLAG(a))) != MP_OKAY)
1145     return res;
1146
1147   mp_set(&s, 1);
1148
1149   if((res = mp_init_copy(&x, a)) != MP_OKAY)
1150     goto X;
1151
1152   /* Loop over low-order digits in ascending order */
1153   for(dig = 0; dig < (USED(b) - 1); dig++) {
1154     d = DIGIT(b, dig);
1155
1156     /* Loop over bits of each non-maximal digit */
1157     for(bit = 0; bit < DIGIT_BIT; bit++) {
1158       if(d & 1) {
1159         if((res = s_mp_mul(&s, &x)) != MP_OKAY)
1160           goto CLEANUP;
1161       }
1162
1163       d >>= 1;
1164
1165       if((res = s_mp_sqr(&x)) != MP_OKAY)
1166         goto CLEANUP;
1167     }
1168   }
1169
1170   /* Consider now the last digit... */
1171   d = DIGIT(b, dig);
1172
1173   while(d) {
1174     if(d & 1) {
1175       if((res = s_mp_mul(&s, &x)) != MP_OKAY)
1176         goto CLEANUP;
1177     }
1178
1179     d >>= 1;
1180
1181     if((res = s_mp_sqr(&x)) != MP_OKAY)
1182       goto CLEANUP;
1183   }
1184
1185   if(mp_iseven(b))
1186     SIGN(&s) = SIGN(a);
1187
1188   res = mp_copy(&s, c);
1189
1190 CLEANUP:
1191   mp_clear(&x);
1192 X:
1193   mp_clear(&s);
1194
1195   return res;
1196
1197 } /* end mp_expt() */
1198
1199 /* }}} */
1200
1201 /* {{{ mp_2expt(a, k) */
1202
1203 /* Compute a = 2^k */
1204
1205 mp_err mp_2expt(mp_int *a, mp_digit k)
1206 {
1207   ARGCHK(a != NULL, MP_BADARG);
1208
1209   return s_mp_2expt(a, k);
1210
1211 } /* end mp_2expt() */
1212
1213 /* }}} */
1214
1215 /* {{{ mp_mod(a, m, c) */
1216
1217 /*
1218   mp_mod(a, m, c)
1219
1220   Compute c = a (mod m).  Result will always be 0 <= c < m.
1221  */
1222
1223 mp_err mp_mod(const mp_int *a, const mp_int *m, mp_int *c)
1224 {
1225   mp_err  res;
1226   int     mag;
1227
1228   ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
1229
1230   if(SIGN(m) == NEG)
1231     return MP_RANGE;
1232
1233   /*
1234      If |a| > m, we need to divide to get the remainder and take the
1235      absolute value.
1236
1237      If |a| < m, we don't need to do any division, just copy and adjust
1238      the sign (if a is negative).
1239
1240      If |a| == m, we can simply set the result to zero.
1241
1242      This order is intended to minimize the average path length of the
1243      comparison chain on common workloads -- the most frequent cases are
1244      that |a| != m, so we do those first.
1245    */
1246   if((mag = s_mp_cmp(a, m)) > 0) {
1247     if((res = mp_div(a, m, NULL, c)) != MP_OKAY)
1248       return res;
1249
1250     if(SIGN(c) == NEG) {
1251       if((res = mp_add(c, m, c)) != MP_OKAY)
1252         return res;
1253     }
1254
1255   } else if(mag < 0) {
1256     if((res = mp_copy(a, c)) != MP_OKAY)
1257       return res;
1258
1259     if(mp_cmp_z(a) < 0) {
1260       if((res = mp_add(c, m, c)) != MP_OKAY)
1261         return res;
1262
1263     }
1264
1265   } else {
1266     mp_zero(c);
1267
1268   }
1269
1270   return MP_OKAY;
1271
1272 } /* end mp_mod() */
1273
1274 /* }}} */
1275
1276 /* {{{ mp_mod_d(a, d, c) */
1277
1278 /*
1279   mp_mod_d(a, d, c)
1280
1281   Compute c = a (mod d).  Result will always be 0 <= c < d
1282  */
1283 mp_err mp_mod_d(const mp_int *a, mp_digit d, mp_digit *c)
1284 {
1285   mp_err   res;
1286   mp_digit rem;
1287
1288   ARGCHK(a != NULL && c != NULL, MP_BADARG);
1289
1290   if(s_mp_cmp_d(a, d) > 0) {
1291     if((res = mp_div_d(a, d, NULL, &rem)) != MP_OKAY)
1292       return res;
1293
1294   } else {
1295     if(SIGN(a) == NEG)
1296       rem = d - DIGIT(a, 0);
1297     else
1298       rem = DIGIT(a, 0);
1299   }
1300
1301   if(c)
1302     *c = rem;
1303
1304   return MP_OKAY;
1305
1306 } /* end mp_mod_d() */
1307
1308 /* }}} */
1309
1310 /* {{{ mp_sqrt(a, b) */
1311
1312 /*
1313   mp_sqrt(a, b)
1314
1315   Compute the integer square root of a, and store the result in b.
1316   Uses an integer-arithmetic version of Newton's iterative linear
1317   approximation technique to determine this value; the result has the
1318   following two properties:
1319
1320      b^2 <= a
1321      (b+1)^2 >= a
1322
1323   It is a range error to pass a negative value.
1324  */
1325 mp_err mp_sqrt(const mp_int *a, mp_int *b)
1326 {
1327   mp_int   x, t;
1328   mp_err   res;
1329   mp_size  used;
1330
1331   ARGCHK(a != NULL && b != NULL, MP_BADARG);
1332
1333   /* Cannot take square root of a negative value */
1334   if(SIGN(a) == NEG)
1335     return MP_RANGE;
1336
1337   /* Special cases for zero and one, trivial     */
1338   if(mp_cmp_d(a, 1) <= 0)
1339     return mp_copy(a, b);
1340
1341   /* Initialize the temporaries we'll use below  */
1342   if((res = mp_init_size(&t, USED(a), FLAG(a))) != MP_OKAY)
1343     return res;
1344
1345   /* Compute an initial guess for the iteration as a itself */
1346   if((res = mp_init_copy(&x, a)) != MP_OKAY)
1347     goto X;
1348
1349   used = MP_USED(&x);
1350   if (used > 1) {
1351     s_mp_rshd(&x, used / 2);
1352   }
1353
1354   for(;;) {
1355     /* t = (x * x) - a */
1356     mp_copy(&x, &t);      /* can't fail, t is big enough for original x */
1357     if((res = mp_sqr(&t, &t)) != MP_OKAY ||
1358        (res = mp_sub(&t, a, &t)) != MP_OKAY)
1359       goto CLEANUP;
1360
1361     /* t = t / 2x       */
1362     s_mp_mul_2(&x);
1363     if((res = mp_div(&t, &x, &t, NULL)) != MP_OKAY)
1364       goto CLEANUP;
1365     s_mp_div_2(&x);
1366
1367     /* Terminate the loop, if the quotient is zero */
1368     if(mp_cmp_z(&t) == MP_EQ)
1369       break;
1370
1371     /* x = x - t       */
1372     if((res = mp_sub(&x, &t, &x)) != MP_OKAY)
1373       goto CLEANUP;
1374
1375   }
1376
1377   /* Copy result to output parameter */
1378   mp_sub_d(&x, 1, &x);
1379   s_mp_exch(&x, b);
1380
1381  CLEANUP:
1382   mp_clear(&x);
1383  X:
1384   mp_clear(&t);
1385
1386   return res;
1387
1388 } /* end mp_sqrt() */
1389
1390 /* }}} */
1391
1392 /* }}} */
1393
1394 /*------------------------------------------------------------------------*/
1395 /* {{{ Modular arithmetic */
1396
1397 #if MP_MODARITH
1398 /* {{{ mp_addmod(a, b, m, c) */
1399
1400 /*
1401   mp_addmod(a, b, m, c)
1402
1403   Compute c = (a + b) mod m
1404  */
1405
1406 mp_err mp_addmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
1407 {
1408   mp_err  res;
1409
1410   ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
1411
1412   if((res = mp_add(a, b, c)) != MP_OKAY)
1413     return res;
1414   if((res = mp_mod(c, m, c)) != MP_OKAY)
1415     return res;
1416
1417   return MP_OKAY;
1418
1419 }
1420
1421 /* }}} */
1422
1423 /* {{{ mp_submod(a, b, m, c) */
1424
1425 /*
1426   mp_submod(a, b, m, c)
1427
1428   Compute c = (a - b) mod m
1429  */
1430
1431 mp_err mp_submod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
1432 {
1433   mp_err  res;
1434
1435   ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
1436
1437   if((res = mp_sub(a, b, c)) != MP_OKAY)
1438     return res;
1439   if((res = mp_mod(c, m, c)) != MP_OKAY)
1440     return res;
1441
1442   return MP_OKAY;
1443
1444 }
1445
1446 /* }}} */
1447
1448 /* {{{ mp_mulmod(a, b, m, c) */
1449
1450 /*
1451   mp_mulmod(a, b, m, c)
1452
1453   Compute c = (a * b) mod m
1454  */
1455
1456 mp_err mp_mulmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
1457 {
1458   mp_err  res;
1459
1460   ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
1461
1462   if((res = mp_mul(a, b, c)) != MP_OKAY)
1463     return res;
1464   if((res = mp_mod(c, m, c)) != MP_OKAY)
1465     return res;
1466
1467   return MP_OKAY;
1468
1469 }
1470
1471 /* }}} */
1472
1473 /* {{{ mp_sqrmod(a, m, c) */
1474
1475 #if MP_SQUARE
1476 mp_err mp_sqrmod(const mp_int *a, const mp_int *m, mp_int *c)
1477 {
1478   mp_err  res;
1479
1480   ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
1481
1482   if((res = mp_sqr(a, c)) != MP_OKAY)
1483     return res;
1484   if((res = mp_mod(c, m, c)) != MP_OKAY)
1485     return res;
1486
1487   return MP_OKAY;
1488
1489 } /* end mp_sqrmod() */
1490 #endif
1491
1492 /* }}} */
1493
1494 /* {{{ s_mp_exptmod(a, b, m, c) */
1495
1496 /*
1497   s_mp_exptmod(a, b, m, c)
1498
1499   Compute c = (a ** b) mod m.  Uses a standard square-and-multiply
1500   method with modular reductions at each step. (This is basically the
1501   same code as mp_expt(), except for the addition of the reductions)
1502
1503   The modular reductions are done using Barrett's algorithm (see
1504   s_mp_reduce() below for details)
1505  */
1506
1507 mp_err s_mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
1508 {
1509   mp_int   s, x, mu;
1510   mp_err   res;
1511   mp_digit d;
1512   int      dig, bit;
1513
1514   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
1515
1516   if(mp_cmp_z(b) < 0 || mp_cmp_z(m) <= 0)
1517     return MP_RANGE;
1518
1519   if((res = mp_init(&s, FLAG(a))) != MP_OKAY)
1520     return res;
1521   if((res = mp_init_copy(&x, a)) != MP_OKAY ||
1522      (res = mp_mod(&x, m, &x)) != MP_OKAY)
1523     goto X;
1524   if((res = mp_init(&mu, FLAG(a))) != MP_OKAY)
1525     goto MU;
1526
1527   mp_set(&s, 1);
1528
1529   /* mu = b^2k / m */
1530   s_mp_add_d(&mu, 1);
1531   s_mp_lshd(&mu, 2 * USED(m));
1532   if((res = mp_div(&mu, m, &mu, NULL)) != MP_OKAY)
1533     goto CLEANUP;
1534
1535   /* Loop over digits of b in ascending order, except highest order */
1536   for(dig = 0; dig < (USED(b) - 1); dig++) {
1537     d = DIGIT(b, dig);
1538
1539     /* Loop over the bits of the lower-order digits */
1540     for(bit = 0; bit < DIGIT_BIT; bit++) {
1541       if(d & 1) {
1542         if((res = s_mp_mul(&s, &x)) != MP_OKAY)
1543           goto CLEANUP;
1544         if((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY)
1545           goto CLEANUP;
1546       }
1547
1548       d >>= 1;
1549
1550       if((res = s_mp_sqr(&x)) != MP_OKAY)
1551         goto CLEANUP;
1552       if((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY)
1553         goto CLEANUP;
1554     }
1555   }
1556
1557   /* Now do the last digit... */
1558   d = DIGIT(b, dig);
1559
1560   while(d) {
1561     if(d & 1) {
1562       if((res = s_mp_mul(&s, &x)) != MP_OKAY)
1563         goto CLEANUP;
1564       if((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY)
1565         goto CLEANUP;
1566     }
1567
1568     d >>= 1;
1569
1570     if((res = s_mp_sqr(&x)) != MP_OKAY)
1571       goto CLEANUP;
1572     if((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY)
1573       goto CLEANUP;
1574   }
1575
1576   s_mp_exch(&s, c);
1577
1578  CLEANUP:
1579   mp_clear(&mu);
1580  MU:
1581   mp_clear(&x);
1582  X:
1583   mp_clear(&s);
1584
1585   return res;
1586
1587 } /* end s_mp_exptmod() */
1588
1589 /* }}} */
1590
1591 /* {{{ mp_exptmod_d(a, d, m, c) */
1592
1593 mp_err mp_exptmod_d(const mp_int *a, mp_digit d, const mp_int *m, mp_int *c)
1594 {
1595   mp_int   s, x;
1596   mp_err   res;
1597
1598   ARGCHK(a != NULL && c != NULL, MP_BADARG);
1599
1600   if((res = mp_init(&s, FLAG(a))) != MP_OKAY)
1601     return res;
1602   if((res = mp_init_copy(&x, a)) != MP_OKAY)
1603     goto X;
1604
1605   mp_set(&s, 1);
1606
1607   while(d != 0) {
1608     if(d & 1) {
1609       if((res = s_mp_mul(&s, &x)) != MP_OKAY ||
1610          (res = mp_mod(&s, m, &s)) != MP_OKAY)
1611         goto CLEANUP;
1612     }
1613
1614     d /= 2;
1615
1616     if((res = s_mp_sqr(&x)) != MP_OKAY ||
1617        (res = mp_mod(&x, m, &x)) != MP_OKAY)
1618       goto CLEANUP;
1619   }
1620
1621   s_mp_exch(&s, c);
1622
1623 CLEANUP:
1624   mp_clear(&x);
1625 X:
1626   mp_clear(&s);
1627
1628   return res;
1629
1630 } /* end mp_exptmod_d() */
1631
1632 /* }}} */
1633 #endif /* if MP_MODARITH */
1634
1635 /* }}} */
1636
1637 /*------------------------------------------------------------------------*/
1638 /* {{{ Comparison functions */
1639
1640 /* {{{ mp_cmp_z(a) */
1641
1642 /*
1643   mp_cmp_z(a)
1644
1645   Compare a <=> 0.  Returns <0 if a<0, 0 if a=0, >0 if a>0.
1646  */
1647
1648 int    mp_cmp_z(const mp_int *a)
1649 {
1650   if(SIGN(a) == NEG)
1651     return MP_LT;
1652   else if(USED(a) == 1 && DIGIT(a, 0) == 0)
1653     return MP_EQ;
1654   else
1655     return MP_GT;
1656
1657 } /* end mp_cmp_z() */
1658
1659 /* }}} */
1660
1661 /* {{{ mp_cmp_d(a, d) */
1662
1663 /*
1664   mp_cmp_d(a, d)
1665
1666   Compare a <=> d.  Returns <0 if a<d, 0 if a=d, >0 if a>d
1667  */
1668
1669 int    mp_cmp_d(const mp_int *a, mp_digit d)
1670 {
1671   ARGCHK(a != NULL, MP_EQ);
1672
1673   if(SIGN(a) == NEG)
1674     return MP_LT;
1675
1676   return s_mp_cmp_d(a, d);
1677
1678 } /* end mp_cmp_d() */
1679
1680 /* }}} */
1681
1682 /* {{{ mp_cmp(a, b) */
1683
1684 int    mp_cmp(const mp_int *a, const mp_int *b)
1685 {
1686   ARGCHK(a != NULL && b != NULL, MP_EQ);
1687
1688   if(SIGN(a) == SIGN(b)) {
1689     int  mag;
1690
1691     if((mag = s_mp_cmp(a, b)) == MP_EQ)
1692       return MP_EQ;
1693
1694     if(SIGN(a) == ZPOS)
1695       return mag;
1696     else
1697       return -mag;
1698
1699   } else if(SIGN(a) == ZPOS) {
1700     return MP_GT;
1701   } else {
1702     return MP_LT;
1703   }
1704
1705 } /* end mp_cmp() */
1706
1707 /* }}} */
1708
1709 /* {{{ mp_cmp_mag(a, b) */
1710
1711 /*
1712   mp_cmp_mag(a, b)
1713
1714   Compares |a| <=> |b|, and returns an appropriate comparison result
1715  */
1716
1717 int    mp_cmp_mag(mp_int *a, mp_int *b)
1718 {
1719   ARGCHK(a != NULL && b != NULL, MP_EQ);
1720
1721   return s_mp_cmp(a, b);
1722
1723 } /* end mp_cmp_mag() */
1724
1725 /* }}} */
1726
1727 /* {{{ mp_cmp_int(a, z, kmflag) */
1728
1729 /*
1730   This just converts z to an mp_int, and uses the existing comparison
1731   routines.  This is sort of inefficient, but it's not clear to me how
1732   frequently this wil get used anyway.  For small positive constants,
1733   you can always use mp_cmp_d(), and for zero, there is mp_cmp_z().
1734  */
1735 int    mp_cmp_int(const mp_int *a, long z, int kmflag)
1736 {
1737   mp_int  tmp;
1738   int     out;
1739
1740   ARGCHK(a != NULL, MP_EQ);
1741
1742   mp_init(&tmp, kmflag); mp_set_int(&tmp, z);
1743   out = mp_cmp(a, &tmp);
1744   mp_clear(&tmp);
1745
1746   return out;
1747
1748 } /* end mp_cmp_int() */
1749
1750 /* }}} */
1751
1752 /* {{{ mp_isodd(a) */
1753
1754 /*
1755   mp_isodd(a)
1756
1757   Returns a true (non-zero) value if a is odd, false (zero) otherwise.
1758  */
1759 int    mp_isodd(const mp_int *a)
1760 {
1761   ARGCHK(a != NULL, 0);
1762
1763   return (int)(DIGIT(a, 0) & 1);
1764
1765 } /* end mp_isodd() */
1766
1767 /* }}} */
1768
1769 /* {{{ mp_iseven(a) */
1770
1771 int    mp_iseven(const mp_int *a)
1772 {
1773   return !mp_isodd(a);
1774
1775 } /* end mp_iseven() */
1776
1777 /* }}} */
1778
1779 /* }}} */
1780
1781 /*------------------------------------------------------------------------*/
1782 /* {{{ Number theoretic functions */
1783
1784 #if MP_NUMTH
1785 /* {{{ mp_gcd(a, b, c) */
1786
1787 /*
1788   Like the old mp_gcd() function, except computes the GCD using the
1789   binary algorithm due to Josef Stein in 1961 (via Knuth).
1790  */
1791 mp_err mp_gcd(mp_int *a, mp_int *b, mp_int *c)
1792 {
1793   mp_err   res;
1794   mp_int   u, v, t;
1795   mp_size  k = 0;
1796
1797   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
1798
1799   if(mp_cmp_z(a) == MP_EQ && mp_cmp_z(b) == MP_EQ)
1800       return MP_RANGE;
1801   if(mp_cmp_z(a) == MP_EQ) {
1802     return mp_copy(b, c);
1803   } else if(mp_cmp_z(b) == MP_EQ) {
1804     return mp_copy(a, c);
1805   }
1806
1807   if((res = mp_init(&t, FLAG(a))) != MP_OKAY)
1808     return res;
1809   if((res = mp_init_copy(&u, a)) != MP_OKAY)
1810     goto U;
1811   if((res = mp_init_copy(&v, b)) != MP_OKAY)
1812     goto V;
1813
1814   SIGN(&u) = ZPOS;
1815   SIGN(&v) = ZPOS;
1816
1817   /* Divide out common factors of 2 until at least 1 of a, b is even */
1818   while(mp_iseven(&u) && mp_iseven(&v)) {
1819     s_mp_div_2(&u);
1820     s_mp_div_2(&v);
1821     ++k;
1822   }
1823
1824   /* Initialize t */
1825   if(mp_isodd(&u)) {
1826     if((res = mp_copy(&v, &t)) != MP_OKAY)
1827       goto CLEANUP;
1828
1829     /* t = -v */
1830     if(SIGN(&v) == ZPOS)
1831       SIGN(&t) = NEG;
1832     else
1833       SIGN(&t) = ZPOS;
1834
1835   } else {
1836     if((res = mp_copy(&u, &t)) != MP_OKAY)
1837       goto CLEANUP;
1838
1839   }
1840
1841   for(;;) {
1842     while(mp_iseven(&t)) {
1843       s_mp_div_2(&t);
1844     }
1845
1846     if(mp_cmp_z(&t) == MP_GT) {
1847       if((res = mp_copy(&t, &u)) != MP_OKAY)
1848         goto CLEANUP;
1849
1850     } else {
1851       if((res = mp_copy(&t, &v)) != MP_OKAY)
1852         goto CLEANUP;
1853
1854       /* v = -t */
1855       if(SIGN(&t) == ZPOS)
1856         SIGN(&v) = NEG;
1857       else
1858         SIGN(&v) = ZPOS;
1859     }
1860
1861     if((res = mp_sub(&u, &v, &t)) != MP_OKAY)
1862       goto CLEANUP;
1863
1864     if(s_mp_cmp_d(&t, 0) == MP_EQ)
1865       break;
1866   }
1867
1868   s_mp_2expt(&v, k);       /* v = 2^k   */
1869   res = mp_mul(&u, &v, c); /* c = u * v */
1870
1871  CLEANUP:
1872   mp_clear(&v);
1873  V:
1874   mp_clear(&u);
1875  U:
1876   mp_clear(&t);
1877
1878   return res;
1879
1880 } /* end mp_gcd() */
1881
1882 /* }}} */
1883
1884 /* {{{ mp_lcm(a, b, c) */
1885
1886 /* We compute the least common multiple using the rule:
1887
1888    ab = [a, b](a, b)
1889
1890    ... by computing the product, and dividing out the gcd.
1891  */
1892
1893 mp_err mp_lcm(mp_int *a, mp_int *b, mp_int *c)
1894 {
1895   mp_int  gcd, prod;
1896   mp_err  res;
1897
1898   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
1899
1900   /* Set up temporaries */
1901   if((res = mp_init(&gcd, FLAG(a))) != MP_OKAY)
1902     return res;
1903   if((res = mp_init(&prod, FLAG(a))) != MP_OKAY)
1904     goto GCD;
1905
1906   if((res = mp_mul(a, b, &prod)) != MP_OKAY)
1907     goto CLEANUP;
1908   if((res = mp_gcd(a, b, &gcd)) != MP_OKAY)
1909     goto CLEANUP;
1910
1911   res = mp_div(&prod, &gcd, c, NULL);
1912
1913  CLEANUP:
1914   mp_clear(&prod);
1915  GCD:
1916   mp_clear(&gcd);
1917
1918   return res;
1919
1920 } /* end mp_lcm() */
1921
1922 /* }}} */
1923
1924 /* {{{ mp_xgcd(a, b, g, x, y) */
1925
1926 /*
1927   mp_xgcd(a, b, g, x, y)
1928
1929   Compute g = (a, b) and values x and y satisfying Bezout's identity
1930   (that is, ax + by = g).  This uses the binary extended GCD algorithm
1931   based on the Stein algorithm used for mp_gcd()
1932   See algorithm 14.61 in Handbook of Applied Cryptogrpahy.
1933  */
1934
1935 mp_err mp_xgcd(const mp_int *a, const mp_int *b, mp_int *g, mp_int *x, mp_int *y)
1936 {
1937   mp_int   gx, xc, yc, u, v, A, B, C, D;
1938   mp_int  *clean[9];
1939   mp_err   res;
1940   int      last = -1;
1941
1942   if(mp_cmp_z(b) == 0)
1943     return MP_RANGE;
1944
1945   /* Initialize all these variables we need */
1946   MP_CHECKOK( mp_init(&u, FLAG(a)) );
1947   clean[++last] = &u;
1948   MP_CHECKOK( mp_init(&v, FLAG(a)) );
1949   clean[++last] = &v;
1950   MP_CHECKOK( mp_init(&gx, FLAG(a)) );
1951   clean[++last] = &gx;
1952   MP_CHECKOK( mp_init(&A, FLAG(a)) );
1953   clean[++last] = &A;
1954   MP_CHECKOK( mp_init(&B, FLAG(a)) );
1955   clean[++last] = &B;
1956   MP_CHECKOK( mp_init(&C, FLAG(a)) );
1957   clean[++last] = &C;
1958   MP_CHECKOK( mp_init(&D, FLAG(a)) );
1959   clean[++last] = &D;
1960   MP_CHECKOK( mp_init_copy(&xc, a) );
1961   clean[++last] = &xc;
1962   mp_abs(&xc, &xc);
1963   MP_CHECKOK( mp_init_copy(&yc, b) );
1964   clean[++last] = &yc;
1965   mp_abs(&yc, &yc);
1966
1967   mp_set(&gx, 1);
1968
1969   /* Divide by two until at least one of them is odd */
1970   while(mp_iseven(&xc) && mp_iseven(&yc)) {
1971     mp_size nx = mp_trailing_zeros(&xc);
1972     mp_size ny = mp_trailing_zeros(&yc);
1973     mp_size n  = MP_MIN(nx, ny);
1974     s_mp_div_2d(&xc,n);
1975     s_mp_div_2d(&yc,n);
1976     MP_CHECKOK( s_mp_mul_2d(&gx,n) );
1977   }
1978
1979   mp_copy(&xc, &u);
1980   mp_copy(&yc, &v);
1981   mp_set(&A, 1); mp_set(&D, 1);
1982
1983   /* Loop through binary GCD algorithm */
1984   do {
1985     while(mp_iseven(&u)) {
1986       s_mp_div_2(&u);
1987
1988       if(mp_iseven(&A) && mp_iseven(&B)) {
1989         s_mp_div_2(&A); s_mp_div_2(&B);
1990       } else {
1991         MP_CHECKOK( mp_add(&A, &yc, &A) );
1992         s_mp_div_2(&A);
1993         MP_CHECKOK( mp_sub(&B, &xc, &B) );
1994         s_mp_div_2(&B);
1995       }
1996     }
1997
1998     while(mp_iseven(&v)) {
1999       s_mp_div_2(&v);
2000
2001       if(mp_iseven(&C) && mp_iseven(&D)) {
2002         s_mp_div_2(&C); s_mp_div_2(&D);
2003       } else {
2004         MP_CHECKOK( mp_add(&C, &yc, &C) );
2005         s_mp_div_2(&C);
2006         MP_CHECKOK( mp_sub(&D, &xc, &D) );
2007         s_mp_div_2(&D);
2008       }
2009     }
2010
2011     if(mp_cmp(&u, &v) >= 0) {
2012       MP_CHECKOK( mp_sub(&u, &v, &u) );
2013       MP_CHECKOK( mp_sub(&A, &C, &A) );
2014       MP_CHECKOK( mp_sub(&B, &D, &B) );
2015     } else {
2016       MP_CHECKOK( mp_sub(&v, &u, &v) );
2017       MP_CHECKOK( mp_sub(&C, &A, &C) );
2018       MP_CHECKOK( mp_sub(&D, &B, &D) );
2019     }
2020   } while (mp_cmp_z(&u) != 0);
2021
2022   /* copy results to output */
2023   if(x)
2024     MP_CHECKOK( mp_copy(&C, x) );
2025
2026   if(y)
2027     MP_CHECKOK( mp_copy(&D, y) );
2028
2029   if(g)
2030     MP_CHECKOK( mp_mul(&gx, &v, g) );
2031
2032  CLEANUP:
2033   while(last >= 0)
2034     mp_clear(clean[last--]);
2035
2036   return res;
2037
2038 } /* end mp_xgcd() */
2039
2040 /* }}} */
2041
2042 mp_size mp_trailing_zeros(const mp_int *mp)
2043 {
2044   mp_digit d;
2045   mp_size  n = 0;
2046   int      ix;
2047
2048   if (!mp || !MP_DIGITS(mp) || !mp_cmp_z(mp))
2049     return n;
2050
2051   for (ix = 0; !(d = MP_DIGIT(mp,ix)) && (ix < MP_USED(mp)); ++ix)
2052     n += MP_DIGIT_BIT;
2053   if (!d)
2054     return 0;   /* shouldn't happen, but ... */
2055 #if !defined(MP_USE_UINT_DIGIT)
2056   if (!(d & 0xffffffffU)) {
2057     d >>= 32;
2058     n  += 32;
2059   }
2060 #endif
2061   if (!(d & 0xffffU)) {
2062     d >>= 16;
2063     n  += 16;
2064   }
2065   if (!(d & 0xffU)) {
2066     d >>= 8;
2067     n  += 8;
2068   }
2069   if (!(d & 0xfU)) {
2070     d >>= 4;
2071     n  += 4;
2072   }
2073   if (!(d & 0x3U)) {
2074     d >>= 2;
2075     n  += 2;
2076   }
2077   if (!(d & 0x1U)) {
2078     d >>= 1;
2079     n  += 1;
2080   }
2081 #if MP_ARGCHK == 2
2082   assert(0 != (d & 1));
2083 #endif
2084   return n;
2085 }
2086
2087 /* Given a and prime p, computes c and k such that a*c == 2**k (mod p).
2088 ** Returns k (positive) or error (negative).
2089 ** This technique from the paper "Fast Modular Reciprocals" (unpublished)
2090 ** by Richard Schroeppel (a.k.a. Captain Nemo).
2091 */
2092 mp_err s_mp_almost_inverse(const mp_int *a, const mp_int *p, mp_int *c)
2093 {
2094   mp_err res;
2095   mp_err k    = 0;
2096   mp_int d, f, g;
2097
2098   ARGCHK(a && p && c, MP_BADARG);
2099
2100   MP_DIGITS(&d) = 0;
2101   MP_DIGITS(&f) = 0;
2102   MP_DIGITS(&g) = 0;
2103   MP_CHECKOK( mp_init(&d, FLAG(a)) );
2104   MP_CHECKOK( mp_init_copy(&f, a) );    /* f = a */
2105   MP_CHECKOK( mp_init_copy(&g, p) );    /* g = p */
2106
2107   mp_set(c, 1);
2108   mp_zero(&d);
2109
2110   if (mp_cmp_z(&f) == 0) {
2111     res = MP_UNDEF;
2112   } else
2113   for (;;) {
2114     int diff_sign;
2115     while (mp_iseven(&f)) {
2116       mp_size n = mp_trailing_zeros(&f);
2117       if (!n) {
2118         res = MP_UNDEF;
2119         goto CLEANUP;
2120       }
2121       s_mp_div_2d(&f, n);
2122       MP_CHECKOK( s_mp_mul_2d(&d, n) );
2123       k += n;
2124     }
2125     if (mp_cmp_d(&f, 1) == MP_EQ) {     /* f == 1 */
2126       res = k;
2127       break;
2128     }
2129     diff_sign = mp_cmp(&f, &g);
2130     if (diff_sign < 0) {                /* f < g */
2131       s_mp_exch(&f, &g);
2132       s_mp_exch(c, &d);
2133     } else if (diff_sign == 0) {                /* f == g */
2134       res = MP_UNDEF;           /* a and p are not relatively prime */
2135       break;
2136     }
2137     if ((MP_DIGIT(&f,0) % 4) == (MP_DIGIT(&g,0) % 4)) {
2138       MP_CHECKOK( mp_sub(&f, &g, &f) ); /* f = f - g */
2139       MP_CHECKOK( mp_sub(c,  &d,  c) ); /* c = c - d */
2140     } else {
2141       MP_CHECKOK( mp_add(&f, &g, &f) ); /* f = f + g */
2142       MP_CHECKOK( mp_add(c,  &d,  c) ); /* c = c + d */
2143     }
2144   }
2145   if (res >= 0) {
2146     while (MP_SIGN(c) != MP_ZPOS) {
2147       MP_CHECKOK( mp_add(c, p, c) );
2148     }
2149     res = k;
2150   }
2151
2152 CLEANUP:
2153   mp_clear(&d);
2154   mp_clear(&f);
2155   mp_clear(&g);
2156   return res;
2157 }
2158
2159 /* Compute T = (P ** -1) mod MP_RADIX.  Also works for 16-bit mp_digits.
2160 ** This technique from the paper "Fast Modular Reciprocals" (unpublished)
2161 ** by Richard Schroeppel (a.k.a. Captain Nemo).
2162 */
2163 mp_digit  s_mp_invmod_radix(mp_digit P)
2164 {
2165   mp_digit T = P;
2166   T *= 2 - (P * T);
2167   T *= 2 - (P * T);
2168   T *= 2 - (P * T);
2169   T *= 2 - (P * T);
2170 #if !defined(MP_USE_UINT_DIGIT)
2171   T *= 2 - (P * T);
2172   T *= 2 - (P * T);
2173 #endif
2174   return T;
2175 }
2176
2177 /* Given c, k, and prime p, where a*c == 2**k (mod p),
2178 ** Compute x = (a ** -1) mod p.  This is similar to Montgomery reduction.
2179 ** This technique from the paper "Fast Modular Reciprocals" (unpublished)
2180 ** by Richard Schroeppel (a.k.a. Captain Nemo).
2181 */
2182 mp_err  s_mp_fixup_reciprocal(const mp_int *c, const mp_int *p, int k, mp_int *x)
2183 {
2184   int      k_orig = k;
2185   mp_digit r;
2186   mp_size  ix;
2187   mp_err   res;
2188
2189   if (mp_cmp_z(c) < 0) {                /* c < 0 */
2190     MP_CHECKOK( mp_add(c, p, x) );      /* x = c + p */
2191   } else {
2192     MP_CHECKOK( mp_copy(c, x) );        /* x = c */
2193   }
2194
2195   /* make sure x is large enough */
2196   ix = MP_HOWMANY(k, MP_DIGIT_BIT) + MP_USED(p) + 1;
2197   ix = MP_MAX(ix, MP_USED(x));
2198   MP_CHECKOK( s_mp_pad(x, ix) );
2199
2200   r = 0 - s_mp_invmod_radix(MP_DIGIT(p,0));
2201
2202   for (ix = 0; k > 0; ix++) {
2203     int      j = MP_MIN(k, MP_DIGIT_BIT);
2204     mp_digit v = r * MP_DIGIT(x, ix);
2205     if (j < MP_DIGIT_BIT) {
2206       v &= ((mp_digit)1 << j) - 1;      /* v = v mod (2 ** j) */
2207     }
2208     s_mp_mul_d_add_offset(p, v, x, ix); /* x += p * v * (RADIX ** ix) */
2209     k -= j;
2210   }
2211   s_mp_clamp(x);
2212   s_mp_div_2d(x, k_orig);
2213   res = MP_OKAY;
2214
2215 CLEANUP:
2216   return res;
2217 }
2218
2219 /* compute mod inverse using Schroeppel's method, only if m is odd */
2220 mp_err s_mp_invmod_odd_m(const mp_int *a, const mp_int *m, mp_int *c)
2221 {
2222   int k;
2223   mp_err  res;
2224   mp_int  x;
2225
2226   ARGCHK(a && m && c, MP_BADARG);
2227
2228   if(mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
2229     return MP_RANGE;
2230   if (mp_iseven(m))
2231     return MP_UNDEF;
2232
2233   MP_DIGITS(&x) = 0;
2234
2235   if (a == c) {
2236     if ((res = mp_init_copy(&x, a)) != MP_OKAY)
2237       return res;
2238     if (a == m)
2239       m = &x;
2240     a = &x;
2241   } else if (m == c) {
2242     if ((res = mp_init_copy(&x, m)) != MP_OKAY)
2243       return res;
2244     m = &x;
2245   } else {
2246     MP_DIGITS(&x) = 0;
2247   }
2248
2249   MP_CHECKOK( s_mp_almost_inverse(a, m, c) );
2250   k = res;
2251   MP_CHECKOK( s_mp_fixup_reciprocal(c, m, k, c) );
2252 CLEANUP:
2253   mp_clear(&x);
2254   return res;
2255 }
2256
2257 /* Known good algorithm for computing modular inverse.  But slow. */
2258 mp_err mp_invmod_xgcd(const mp_int *a, const mp_int *m, mp_int *c)
2259 {
2260   mp_int  g, x;
2261   mp_err  res;
2262
2263   ARGCHK(a && m && c, MP_BADARG);
2264
2265   if(mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
2266     return MP_RANGE;
2267
2268   MP_DIGITS(&g) = 0;
2269   MP_DIGITS(&x) = 0;
2270   MP_CHECKOK( mp_init(&x, FLAG(a)) );
2271   MP_CHECKOK( mp_init(&g, FLAG(a)) );
2272
2273   MP_CHECKOK( mp_xgcd(a, m, &g, &x, NULL) );
2274
2275   if (mp_cmp_d(&g, 1) != MP_EQ) {
2276     res = MP_UNDEF;
2277     goto CLEANUP;
2278   }
2279
2280   res = mp_mod(&x, m, c);
2281   SIGN(c) = SIGN(a);
2282
2283 CLEANUP:
2284   mp_clear(&x);
2285   mp_clear(&g);
2286
2287   return res;
2288 }
2289
2290 /* modular inverse where modulus is 2**k. */
2291 /* c = a**-1 mod 2**k */
2292 mp_err s_mp_invmod_2d(const mp_int *a, mp_size k, mp_int *c)
2293 {
2294   mp_err res;
2295   mp_size ix = k + 4;
2296   mp_int t0, t1, val, tmp, two2k;
2297
2298   static const mp_digit d2 = 2;
2299   static const mp_int two = { 0, MP_ZPOS, 1, 1, (mp_digit *)&d2 };
2300
2301   if (mp_iseven(a))
2302     return MP_UNDEF;
2303   if (k <= MP_DIGIT_BIT) {
2304     mp_digit i = s_mp_invmod_radix(MP_DIGIT(a,0));
2305     if (k < MP_DIGIT_BIT)
2306       i &= ((mp_digit)1 << k) - (mp_digit)1;
2307     mp_set(c, i);
2308     return MP_OKAY;
2309   }
2310   MP_DIGITS(&t0) = 0;
2311   MP_DIGITS(&t1) = 0;
2312   MP_DIGITS(&val) = 0;
2313   MP_DIGITS(&tmp) = 0;
2314   MP_DIGITS(&two2k) = 0;
2315   MP_CHECKOK( mp_init_copy(&val, a) );
2316   s_mp_mod_2d(&val, k);
2317   MP_CHECKOK( mp_init_copy(&t0, &val) );
2318   MP_CHECKOK( mp_init_copy(&t1, &t0)  );
2319   MP_CHECKOK( mp_init(&tmp, FLAG(a)) );
2320   MP_CHECKOK( mp_init(&two2k, FLAG(a)) );
2321   MP_CHECKOK( s_mp_2expt(&two2k, k) );
2322   do {
2323     MP_CHECKOK( mp_mul(&val, &t1, &tmp)  );
2324     MP_CHECKOK( mp_sub(&two, &tmp, &tmp) );
2325     MP_CHECKOK( mp_mul(&t1, &tmp, &t1)   );
2326     s_mp_mod_2d(&t1, k);
2327     while (MP_SIGN(&t1) != MP_ZPOS) {
2328       MP_CHECKOK( mp_add(&t1, &two2k, &t1) );
2329     }
2330     if (mp_cmp(&t1, &t0) == MP_EQ)
2331       break;
2332     MP_CHECKOK( mp_copy(&t1, &t0) );
2333   } while (--ix > 0);
2334   if (!ix) {
2335     res = MP_UNDEF;
2336   } else {
2337     mp_exch(c, &t1);
2338   }
2339
2340 CLEANUP:
2341   mp_clear(&t0);
2342   mp_clear(&t1);
2343   mp_clear(&val);
2344   mp_clear(&tmp);
2345   mp_clear(&two2k);
2346   return res;
2347 }
2348
2349 mp_err s_mp_invmod_even_m(const mp_int *a, const mp_int *m, mp_int *c)
2350 {
2351   mp_err res;
2352   mp_size k;
2353   mp_int oddFactor, evenFactor; /* factors of the modulus */
2354   mp_int oddPart, evenPart;     /* parts to combine via CRT. */
2355   mp_int C2, tmp1, tmp2;
2356
2357   /*static const mp_digit d1 = 1; */
2358   /*static const mp_int one = { MP_ZPOS, 1, 1, (mp_digit *)&d1 }; */
2359
2360   if ((res = s_mp_ispow2(m)) >= 0) {
2361     k = res;
2362     return s_mp_invmod_2d(a, k, c);
2363   }
2364   MP_DIGITS(&oddFactor) = 0;
2365   MP_DIGITS(&evenFactor) = 0;
2366   MP_DIGITS(&oddPart) = 0;
2367   MP_DIGITS(&evenPart) = 0;
2368   MP_DIGITS(&C2)     = 0;
2369   MP_DIGITS(&tmp1)   = 0;
2370   MP_DIGITS(&tmp2)   = 0;
2371
2372   MP_CHECKOK( mp_init_copy(&oddFactor, m) );    /* oddFactor = m */
2373   MP_CHECKOK( mp_init(&evenFactor, FLAG(m)) );
2374   MP_CHECKOK( mp_init(&oddPart, FLAG(m)) );
2375   MP_CHECKOK( mp_init(&evenPart, FLAG(m)) );
2376   MP_CHECKOK( mp_init(&C2, FLAG(m))     );
2377   MP_CHECKOK( mp_init(&tmp1, FLAG(m))   );
2378   MP_CHECKOK( mp_init(&tmp2, FLAG(m))   );
2379
2380   k = mp_trailing_zeros(m);
2381   s_mp_div_2d(&oddFactor, k);
2382   MP_CHECKOK( s_mp_2expt(&evenFactor, k) );
2383
2384   /* compute a**-1 mod oddFactor. */
2385   MP_CHECKOK( s_mp_invmod_odd_m(a, &oddFactor, &oddPart) );
2386   /* compute a**-1 mod evenFactor, where evenFactor == 2**k. */
2387   MP_CHECKOK( s_mp_invmod_2d(   a,       k,    &evenPart) );
2388
2389   /* Use Chinese Remainer theorem to compute a**-1 mod m. */
2390   /* let m1 = oddFactor,  v1 = oddPart,
2391    * let m2 = evenFactor, v2 = evenPart.
2392    */
2393
2394   /* Compute C2 = m1**-1 mod m2. */
2395   MP_CHECKOK( s_mp_invmod_2d(&oddFactor, k,    &C2) );
2396
2397   /* compute u = (v2 - v1)*C2 mod m2 */
2398   MP_CHECKOK( mp_sub(&evenPart, &oddPart,   &tmp1) );
2399   MP_CHECKOK( mp_mul(&tmp1,     &C2,        &tmp2) );
2400   s_mp_mod_2d(&tmp2, k);
2401   while (MP_SIGN(&tmp2) != MP_ZPOS) {
2402     MP_CHECKOK( mp_add(&tmp2, &evenFactor, &tmp2) );
2403   }
2404
2405   /* compute answer = v1 + u*m1 */
2406   MP_CHECKOK( mp_mul(&tmp2,     &oddFactor, c) );
2407   MP_CHECKOK( mp_add(&oddPart,  c,          c) );
2408   /* not sure this is necessary, but it's low cost if not. */
2409   MP_CHECKOK( mp_mod(c,         m,          c) );
2410
2411 CLEANUP:
2412   mp_clear(&oddFactor);
2413   mp_clear(&evenFactor);
2414   mp_clear(&oddPart);
2415   mp_clear(&evenPart);
2416   mp_clear(&C2);
2417   mp_clear(&tmp1);
2418   mp_clear(&tmp2);
2419   return res;
2420 }
2421
2422
2423 /* {{{ mp_invmod(a, m, c) */
2424
2425 /*
2426   mp_invmod(a, m, c)
2427
2428   Compute c = a^-1 (mod m), if there is an inverse for a (mod m).
2429   This is equivalent to the question of whether (a, m) = 1.  If not,
2430   MP_UNDEF is returned, and there is no inverse.
2431  */
2432
2433 mp_err mp_invmod(const mp_int *a, const mp_int *m, mp_int *c)
2434 {
2435
2436   ARGCHK(a && m && c, MP_BADARG);
2437
2438   if(mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
2439     return MP_RANGE;
2440
2441   if (mp_isodd(m)) {
2442     return s_mp_invmod_odd_m(a, m, c);
2443   }
2444   if (mp_iseven(a))
2445     return MP_UNDEF;    /* not invertable */
2446
2447   return s_mp_invmod_even_m(a, m, c);
2448
2449 } /* end mp_invmod() */
2450
2451 /* }}} */
2452 #endif /* if MP_NUMTH */
2453
2454 /* }}} */
2455
2456 /*------------------------------------------------------------------------*/
2457 /* {{{ mp_print(mp, ofp) */
2458
2459 #if MP_IOFUNC
2460 /*
2461   mp_print(mp, ofp)
2462
2463   Print a textual representation of the given mp_int on the output
2464   stream 'ofp'.  Output is generated using the internal radix.
2465  */
2466
2467 void   mp_print(mp_int *mp, FILE *ofp)
2468 {
2469   int   ix;
2470
2471   if(mp == NULL || ofp == NULL)
2472     return;
2473
2474   fputc((SIGN(mp) == NEG) ? '-' : '+', ofp);
2475
2476   for(ix = USED(mp) - 1; ix >= 0; ix--) {
2477     fprintf(ofp, DIGIT_FMT, DIGIT(mp, ix));
2478   }
2479
2480 } /* end mp_print() */
2481
2482 #endif /* if MP_IOFUNC */
2483
2484 /* }}} */
2485
2486 /*------------------------------------------------------------------------*/
2487 /* {{{ More I/O Functions */
2488
2489 /* {{{ mp_read_raw(mp, str, len) */
2490
2491 /*
2492    mp_read_raw(mp, str, len)
2493
2494    Read in a raw value (base 256) into the given mp_int
2495  */
2496
2497 mp_err  mp_read_raw(mp_int *mp, char *str, int len)
2498 {
2499   int            ix;
2500   mp_err         res;
2501   unsigned char *ustr = (unsigned char *)str;
2502
2503   ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG);
2504
2505   mp_zero(mp);
2506
2507   /* Get sign from first byte */
2508   if(ustr[0])
2509     SIGN(mp) = NEG;
2510   else
2511     SIGN(mp) = ZPOS;
2512
2513   /* Read the rest of the digits */
2514   for(ix = 1; ix < len; ix++) {
2515     if((res = mp_mul_d(mp, 256, mp)) != MP_OKAY)
2516       return res;
2517     if((res = mp_add_d(mp, ustr[ix], mp)) != MP_OKAY)
2518       return res;
2519   }
2520
2521   return MP_OKAY;
2522
2523 } /* end mp_read_raw() */
2524
2525 /* }}} */
2526
2527 /* {{{ mp_raw_size(mp) */
2528
2529 int    mp_raw_size(mp_int *mp)
2530 {
2531   ARGCHK(mp != NULL, 0);
2532
2533   return (USED(mp) * sizeof(mp_digit)) + 1;
2534
2535 } /* end mp_raw_size() */
2536
2537 /* }}} */
2538
2539 /* {{{ mp_toraw(mp, str) */
2540
2541 mp_err mp_toraw(mp_int *mp, char *str)
2542 {
2543   int  ix, jx, pos = 1;
2544
2545   ARGCHK(mp != NULL && str != NULL, MP_BADARG);
2546
2547   str[0] = (char)SIGN(mp);
2548
2549   /* Iterate over each digit... */
2550   for(ix = USED(mp) - 1; ix >= 0; ix--) {
2551     mp_digit  d = DIGIT(mp, ix);
2552
2553     /* Unpack digit bytes, high order first */
2554     for(jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
2555       str[pos++] = (char)(d >> (jx * CHAR_BIT));
2556     }
2557   }
2558
2559   return MP_OKAY;
2560
2561 } /* end mp_toraw() */
2562
2563 /* }}} */
2564
2565 /* {{{ mp_read_radix(mp, str, radix) */
2566
2567 /*
2568   mp_read_radix(mp, str, radix)
2569
2570   Read an integer from the given string, and set mp to the resulting
2571   value.  The input is presumed to be in base 10.  Leading non-digit
2572   characters are ignored, and the function reads until a non-digit
2573   character or the end of the string.
2574  */
2575
2576 mp_err  mp_read_radix(mp_int *mp, const char *str, int radix)
2577 {
2578   int     ix = 0, val = 0;
2579   mp_err  res;
2580   mp_sign sig = ZPOS;
2581
2582   ARGCHK(mp != NULL && str != NULL && radix >= 2 && radix <= MAX_RADIX,
2583          MP_BADARG);
2584
2585   mp_zero(mp);
2586
2587   /* Skip leading non-digit characters until a digit or '-' or '+' */
2588   while(str[ix] &&
2589         (s_mp_tovalue(str[ix], radix) < 0) &&
2590         str[ix] != '-' &&
2591         str[ix] != '+') {
2592     ++ix;
2593   }
2594
2595   if(str[ix] == '-') {
2596     sig = NEG;
2597     ++ix;
2598   } else if(str[ix] == '+') {
2599     sig = ZPOS; /* this is the default anyway... */
2600     ++ix;
2601   }
2602
2603   while((val = s_mp_tovalue(str[ix], radix)) >= 0) {
2604     if((res = s_mp_mul_d(mp, radix)) != MP_OKAY)
2605       return res;
2606     if((res = s_mp_add_d(mp, val)) != MP_OKAY)
2607       return res;
2608     ++ix;
2609   }
2610
2611   if(s_mp_cmp_d(mp, 0) == MP_EQ)
2612     SIGN(mp) = ZPOS;
2613   else
2614     SIGN(mp) = sig;
2615
2616   return MP_OKAY;
2617
2618 } /* end mp_read_radix() */
2619
2620 mp_err mp_read_variable_radix(mp_int *a, const char * str, int default_radix)
2621 {
2622   int     radix = default_radix;
2623   int     cx;
2624   mp_sign sig   = ZPOS;
2625   mp_err  res;
2626
2627   /* Skip leading non-digit characters until a digit or '-' or '+' */
2628   while ((cx = *str) != 0 &&
2629         (s_mp_tovalue(cx, radix) < 0) &&
2630         cx != '-' &&
2631         cx != '+') {
2632     ++str;
2633   }
2634
2635   if (cx == '-') {
2636     sig = NEG;
2637     ++str;
2638   } else if (cx == '+') {
2639     sig = ZPOS; /* this is the default anyway... */
2640     ++str;
2641   }
2642
2643   if (str[0] == '0') {
2644     if ((str[1] | 0x20) == 'x') {
2645       radix = 16;
2646       str += 2;
2647     } else {
2648       radix = 8;
2649       str++;
2650     }
2651   }
2652   res = mp_read_radix(a, str, radix);
2653   if (res == MP_OKAY) {
2654     MP_SIGN(a) = (s_mp_cmp_d(a, 0) == MP_EQ) ? ZPOS : sig;
2655   }
2656   return res;
2657 }
2658
2659 /* }}} */
2660
2661 /* {{{ mp_radix_size(mp, radix) */
2662
2663 int    mp_radix_size(mp_int *mp, int radix)
2664 {
2665   int  bits;
2666
2667   if(!mp || radix < 2 || radix > MAX_RADIX)
2668     return 0;
2669
2670   bits = USED(mp) * DIGIT_BIT - 1;
2671
2672   return s_mp_outlen(bits, radix);
2673
2674 } /* end mp_radix_size() */
2675
2676 /* }}} */
2677
2678 /* {{{ mp_toradix(mp, str, radix) */
2679
2680 mp_err mp_toradix(mp_int *mp, char *str, int radix)
2681 {
2682   int  ix, pos = 0;
2683
2684   ARGCHK(mp != NULL && str != NULL, MP_BADARG);
2685   ARGCHK(radix > 1 && radix <= MAX_RADIX, MP_RANGE);
2686
2687   if(mp_cmp_z(mp) == MP_EQ) {
2688     str[0] = '0';
2689     str[1] = '\0';
2690   } else {
2691     mp_err   res;
2692     mp_int   tmp;
2693     mp_sign  sgn;
2694     mp_digit rem, rdx = (mp_digit)radix;
2695     char     ch;
2696
2697     if((res = mp_init_copy(&tmp, mp)) != MP_OKAY)
2698       return res;
2699
2700     /* Save sign for later, and take absolute value */
2701     sgn = SIGN(&tmp); SIGN(&tmp) = ZPOS;
2702
2703     /* Generate output digits in reverse order      */
2704     while(mp_cmp_z(&tmp) != 0) {
2705       if((res = mp_div_d(&tmp, rdx, &tmp, &rem)) != MP_OKAY) {
2706         mp_clear(&tmp);
2707         return res;
2708       }
2709
2710       /* Generate digits, use capital letters */
2711       ch = s_mp_todigit(rem, radix, 0);
2712
2713       str[pos++] = ch;
2714     }
2715
2716     /* Add - sign if original value was negative */
2717     if(sgn == NEG)
2718       str[pos++] = '-';
2719
2720     /* Add trailing NUL to end the string        */
2721     str[pos--] = '\0';
2722
2723     /* Reverse the digits and sign indicator     */
2724     ix = 0;
2725     while(ix < pos) {
2726       char tmp = str[ix];
2727
2728       str[ix] = str[pos];
2729       str[pos] = tmp;
2730       ++ix;
2731       --pos;
2732     }
2733
2734     mp_clear(&tmp);
2735   }
2736
2737   return MP_OKAY;
2738
2739 } /* end mp_toradix() */
2740
2741 /* }}} */
2742
2743 /* {{{ mp_tovalue(ch, r) */
2744
2745 int    mp_tovalue(char ch, int r)
2746 {
2747   return s_mp_tovalue(ch, r);
2748
2749 } /* end mp_tovalue() */
2750
2751 /* }}} */
2752
2753 /* }}} */
2754
2755 /* {{{ mp_strerror(ec) */
2756
2757 /*
2758   mp_strerror(ec)
2759
2760   Return a string describing the meaning of error code 'ec'.  The
2761   string returned is allocated in static memory, so the caller should
2762   not attempt to modify or free the memory associated with this
2763   string.
2764  */
2765 const char  *mp_strerror(mp_err ec)
2766 {
2767   int   aec = (ec < 0) ? -ec : ec;
2768
2769   /* Code values are negative, so the senses of these comparisons
2770      are accurate */
2771   if(ec < MP_LAST_CODE || ec > MP_OKAY) {
2772     return mp_err_string[0];  /* unknown error code */
2773   } else {
2774     return mp_err_string[aec + 1];
2775   }
2776
2777 } /* end mp_strerror() */
2778
2779 /* }}} */
2780
2781 /*========================================================================*/
2782 /*------------------------------------------------------------------------*/
2783 /* Static function definitions (internal use only)                        */
2784
2785 /* {{{ Memory management */
2786
2787 /* {{{ s_mp_grow(mp, min) */
2788
2789 /* Make sure there are at least 'min' digits allocated to mp              */
2790 mp_err   s_mp_grow(mp_int *mp, mp_size min)
2791 {
2792   if(min > ALLOC(mp)) {
2793     mp_digit   *tmp;
2794
2795     /* Set min to next nearest default precision block size */
2796     min = MP_ROUNDUP(min, s_mp_defprec);
2797
2798     if((tmp = s_mp_alloc(min, sizeof(mp_digit), FLAG(mp))) == NULL)
2799       return MP_MEM;
2800
2801     s_mp_copy(DIGITS(mp), tmp, USED(mp));
2802
2803 #if MP_CRYPTO
2804     s_mp_setz(DIGITS(mp), ALLOC(mp));
2805 #endif
2806     s_mp_free(DIGITS(mp), ALLOC(mp));
2807     DIGITS(mp) = tmp;
2808     ALLOC(mp) = min;
2809   }
2810
2811   return MP_OKAY;
2812
2813 } /* end s_mp_grow() */
2814
2815 /* }}} */
2816
2817 /* {{{ s_mp_pad(mp, min) */
2818
2819 /* Make sure the used size of mp is at least 'min', growing if needed     */
2820 mp_err   s_mp_pad(mp_int *mp, mp_size min)
2821 {
2822   if(min > USED(mp)) {
2823     mp_err  res;
2824
2825     /* Make sure there is room to increase precision  */
2826     if (min > ALLOC(mp)) {
2827       if ((res = s_mp_grow(mp, min)) != MP_OKAY)
2828         return res;
2829     } else {
2830       s_mp_setz(DIGITS(mp) + USED(mp), min - USED(mp));
2831     }
2832
2833     /* Increase precision; should already be 0-filled */
2834     USED(mp) = min;
2835   }
2836
2837   return MP_OKAY;
2838
2839 } /* end s_mp_pad() */
2840
2841 /* }}} */
2842
2843 /* {{{ s_mp_setz(dp, count) */
2844
2845 #if MP_MACRO == 0
2846 /* Set 'count' digits pointed to by dp to be zeroes                       */
2847 void s_mp_setz(mp_digit *dp, mp_size count)
2848 {
2849 #if MP_MEMSET == 0
2850   int  ix;
2851
2852   for(ix = 0; ix < count; ix++)
2853     dp[ix] = 0;
2854 #else
2855   memset(dp, 0, count * sizeof(mp_digit));
2856 #endif
2857
2858 } /* end s_mp_setz() */
2859 #endif
2860
2861 /* }}} */
2862
2863 /* {{{ s_mp_copy(sp, dp, count) */
2864
2865 #if MP_MACRO == 0
2866 /* Copy 'count' digits from sp to dp                                      */
2867 void s_mp_copy(const mp_digit *sp, mp_digit *dp, mp_size count)
2868 {
2869 #if MP_MEMCPY == 0
2870   int  ix;
2871
2872   for(ix = 0; ix < count; ix++)
2873     dp[ix] = sp[ix];
2874 #else
2875   memcpy(dp, sp, count * sizeof(mp_digit));
2876 #endif
2877
2878 } /* end s_mp_copy() */
2879 #endif
2880
2881 /* }}} */
2882
2883 /* {{{ s_mp_alloc(nb, ni, kmflag) */
2884
2885 #if MP_MACRO == 0
2886 /* Allocate ni records of nb bytes each, and return a pointer to that     */
2887 void    *s_mp_alloc(size_t nb, size_t ni, int kmflag)
2888 {
2889   ++mp_allocs;
2890 #ifdef _KERNEL
2891   return kmem_zalloc(nb * ni, kmflag);
2892 #else
2893   return calloc(nb, ni);
2894 #endif
2895
2896 } /* end s_mp_alloc() */
2897 #endif
2898
2899 /* }}} */
2900
2901 /* {{{ s_mp_free(ptr) */
2902
2903 #if MP_MACRO == 0
2904 /* Free the memory pointed to by ptr                                      */
2905 void     s_mp_free(void *ptr, mp_size alloc)
2906 {
2907   if(ptr) {
2908     ++mp_frees;
2909 #ifdef _KERNEL
2910     kmem_free(ptr, alloc * sizeof (mp_digit));
2911 #else
2912     free(ptr);
2913 #endif
2914   }
2915 } /* end s_mp_free() */
2916 #endif
2917
2918 /* }}} */
2919
2920 /* {{{ s_mp_clamp(mp) */
2921
2922 #if MP_MACRO == 0
2923 /* Remove leading zeroes from the given value                             */
2924 void     s_mp_clamp(mp_int *mp)
2925 {
2926   mp_size used = MP_USED(mp);
2927   while (used > 1 && DIGIT(mp, used - 1) == 0)
2928     --used;
2929   MP_USED(mp) = used;
2930 } /* end s_mp_clamp() */
2931 #endif
2932
2933 /* }}} */
2934
2935 /* {{{ s_mp_exch(a, b) */
2936
2937 /* Exchange the data for a and b; (b, a) = (a, b)                         */
2938 void     s_mp_exch(mp_int *a, mp_int *b)
2939 {
2940   mp_int   tmp;
2941
2942   tmp = *a;
2943   *a = *b;
2944   *b = tmp;
2945
2946 } /* end s_mp_exch() */
2947
2948 /* }}} */
2949
2950 /* }}} */
2951
2952 /* {{{ Arithmetic helpers */
2953
2954 /* {{{ s_mp_lshd(mp, p) */
2955
2956 /*
2957    Shift mp leftward by p digits, growing if needed, and zero-filling
2958    the in-shifted digits at the right end.  This is a convenient
2959    alternative to multiplication by powers of the radix
2960    The value of USED(mp) must already have been set to the value for
2961    the shifted result.
2962  */
2963
2964 mp_err   s_mp_lshd(mp_int *mp, mp_size p)
2965 {
2966   mp_err  res;
2967   mp_size pos;
2968   int     ix;
2969
2970   if(p == 0)
2971     return MP_OKAY;
2972
2973   if (MP_USED(mp) == 1 && MP_DIGIT(mp, 0) == 0)
2974     return MP_OKAY;
2975
2976   if((res = s_mp_pad(mp, USED(mp) + p)) != MP_OKAY)
2977     return res;
2978
2979   pos = USED(mp) - 1;
2980
2981   /* Shift all the significant figures over as needed */
2982   for(ix = pos - p; ix >= 0; ix--)
2983     DIGIT(mp, ix + p) = DIGIT(mp, ix);
2984
2985   /* Fill the bottom digits with zeroes */
2986   for(ix = 0; ix < p; ix++)
2987     DIGIT(mp, ix) = 0;
2988
2989   return MP_OKAY;
2990
2991 } /* end s_mp_lshd() */
2992
2993 /* }}} */
2994
2995 /* {{{ s_mp_mul_2d(mp, d) */
2996
2997 /*
2998   Multiply the integer by 2^d, where d is a number of bits.  This
2999   amounts to a bitwise shift of the value.
3000  */
3001 mp_err   s_mp_mul_2d(mp_int *mp, mp_digit d)
3002 {
3003   mp_err   res;
3004   mp_digit dshift, bshift;
3005   mp_digit mask;
3006
3007   ARGCHK(mp != NULL,  MP_BADARG);
3008
3009   dshift = d / MP_DIGIT_BIT;
3010   bshift = d % MP_DIGIT_BIT;
3011   /* bits to be shifted out of the top word */
3012   mask   = ((mp_digit)~0 << (MP_DIGIT_BIT - bshift));
3013   mask  &= MP_DIGIT(mp, MP_USED(mp) - 1);
3014
3015   if (MP_OKAY != (res = s_mp_pad(mp, MP_USED(mp) + dshift + (mask != 0) )))
3016     return res;
3017
3018   if (dshift && MP_OKAY != (res = s_mp_lshd(mp, dshift)))
3019     return res;
3020
3021   if (bshift) {
3022     mp_digit *pa = MP_DIGITS(mp);
3023     mp_digit *alim = pa + MP_USED(mp);
3024     mp_digit  prev = 0;
3025
3026     for (pa += dshift; pa < alim; ) {
3027       mp_digit x = *pa;
3028       *pa++ = (x << bshift) | prev;
3029       prev = x >> (DIGIT_BIT - bshift);
3030     }
3031   }
3032
3033   s_mp_clamp(mp);
3034   return MP_OKAY;
3035 } /* end s_mp_mul_2d() */
3036
3037 /* {{{ s_mp_rshd(mp, p) */
3038
3039 /*
3040    Shift mp rightward by p digits.  Maintains the invariant that
3041    digits above the precision are all zero.  Digits shifted off the
3042    end are lost.  Cannot fail.
3043  */
3044
3045 void     s_mp_rshd(mp_int *mp, mp_size p)
3046 {
3047   mp_size  ix;
3048   mp_digit *src, *dst;
3049
3050   if(p == 0)
3051     return;
3052
3053   /* Shortcut when all digits are to be shifted off */
3054   if(p >= USED(mp)) {
3055     s_mp_setz(DIGITS(mp), ALLOC(mp));
3056     USED(mp) = 1;
3057     SIGN(mp) = ZPOS;
3058     return;
3059   }
3060
3061   /* Shift all the significant figures over as needed */
3062   dst = MP_DIGITS(mp);
3063   src = dst + p;
3064   for (ix = USED(mp) - p; ix > 0; ix--)
3065     *dst++ = *src++;
3066
3067   MP_USED(mp) -= p;
3068   /* Fill the top digits with zeroes */
3069   while (p-- > 0)
3070     *dst++ = 0;
3071
3072 #if 0
3073   /* Strip off any leading zeroes    */
3074   s_mp_clamp(mp);
3075 #endif
3076
3077 } /* end s_mp_rshd() */
3078
3079 /* }}} */
3080
3081 /* {{{ s_mp_div_2(mp) */
3082
3083 /* Divide by two -- take advantage of radix properties to do it fast      */
3084 void     s_mp_div_2(mp_int *mp)
3085 {
3086   s_mp_div_2d(mp, 1);
3087
3088 } /* end s_mp_div_2() */
3089
3090 /* }}} */
3091
3092 /* {{{ s_mp_mul_2(mp) */
3093
3094 mp_err s_mp_mul_2(mp_int *mp)
3095 {
3096   mp_digit *pd;
3097   int      ix, used;
3098   mp_digit kin = 0;
3099
3100   /* Shift digits leftward by 1 bit */
3101   used = MP_USED(mp);
3102   pd = MP_DIGITS(mp);
3103   for (ix = 0; ix < used; ix++) {
3104     mp_digit d = *pd;
3105     *pd++ = (d << 1) | kin;
3106     kin = (d >> (DIGIT_BIT - 1));
3107   }
3108
3109   /* Deal with rollover from last digit */
3110   if (kin) {
3111     if (ix >= ALLOC(mp)) {
3112       mp_err res;
3113       if((res = s_mp_grow(mp, ALLOC(mp) + 1)) != MP_OKAY)
3114         return res;
3115     }
3116
3117     DIGIT(mp, ix) = kin;
3118     USED(mp) += 1;
3119   }
3120
3121   return MP_OKAY;
3122
3123 } /* end s_mp_mul_2() */
3124
3125 /* }}} */
3126
3127 /* {{{ s_mp_mod_2d(mp, d) */
3128
3129 /*
3130   Remainder the integer by 2^d, where d is a number of bits.  This
3131   amounts to a bitwise AND of the value, and does not require the full
3132   division code
3133  */
3134 void     s_mp_mod_2d(mp_int *mp, mp_digit d)
3135 {
3136   mp_size  ndig = (d / DIGIT_BIT), nbit = (d % DIGIT_BIT);
3137   mp_size  ix;
3138   mp_digit dmask;
3139
3140   if(ndig >= USED(mp))
3141     return;
3142
3143   /* Flush all the bits above 2^d in its digit */
3144   dmask = ((mp_digit)1 << nbit) - 1;
3145   DIGIT(mp, ndig) &= dmask;
3146
3147   /* Flush all digits above the one with 2^d in it */
3148   for(ix = ndig + 1; ix < USED(mp); ix++)
3149     DIGIT(mp, ix) = 0;
3150
3151   s_mp_clamp(mp);
3152
3153 } /* end s_mp_mod_2d() */
3154
3155 /* }}} */
3156
3157 /* {{{ s_mp_div_2d(mp, d) */
3158
3159 /*
3160   Divide the integer by 2^d, where d is a number of bits.  This
3161   amounts to a bitwise shift of the value, and does not require the
3162   full division code (used in Barrett reduction, see below)
3163  */
3164 void     s_mp_div_2d(mp_int *mp, mp_digit d)
3165 {
3166   int       ix;
3167   mp_digit  save, next, mask;
3168
3169   s_mp_rshd(mp, d / DIGIT_BIT);
3170   d %= DIGIT_BIT;
3171   if (d) {
3172     mask = ((mp_digit)1 << d) - 1;
3173     save = 0;
3174     for(ix = USED(mp) - 1; ix >= 0; ix--) {
3175       next = DIGIT(mp, ix) & mask;
3176       DIGIT(mp, ix) = (DIGIT(mp, ix) >> d) | (save << (DIGIT_BIT - d));
3177       save = next;
3178     }
3179   }
3180   s_mp_clamp(mp);
3181
3182 } /* end s_mp_div_2d() */
3183
3184 /* }}} */
3185
3186 /* {{{ s_mp_norm(a, b, *d) */
3187
3188 /*
3189   s_mp_norm(a, b, *d)
3190
3191   Normalize a and b for division, where b is the divisor.  In order
3192   that we might make good guesses for quotient digits, we want the
3193   leading digit of b to be at least half the radix, which we
3194   accomplish by multiplying a and b by a power of 2.  The exponent
3195   (shift count) is placed in *pd, so that the remainder can be shifted
3196   back at the end of the division process.
3197  */
3198
3199 mp_err   s_mp_norm(mp_int *a, mp_int *b, mp_digit *pd)
3200 {
3201   mp_digit  d;
3202   mp_digit  mask;
3203   mp_digit  b_msd;
3204   mp_err    res    = MP_OKAY;
3205
3206   d = 0;
3207   mask  = DIGIT_MAX & ~(DIGIT_MAX >> 1);        /* mask is msb of digit */
3208   b_msd = DIGIT(b, USED(b) - 1);
3209   while (!(b_msd & mask)) {
3210     b_msd <<= 1;
3211     ++d;
3212   }
3213
3214   if (d) {
3215     MP_CHECKOK( s_mp_mul_2d(a, d) );
3216     MP_CHECKOK( s_mp_mul_2d(b, d) );
3217   }
3218
3219   *pd = d;
3220 CLEANUP:
3221   return res;
3222
3223 } /* end s_mp_norm() */
3224
3225 /* }}} */
3226
3227 /* }}} */
3228
3229 /* {{{ Primitive digit arithmetic */
3230
3231 /* {{{ s_mp_add_d(mp, d) */
3232
3233 /* Add d to |mp| in place                                                 */
3234 mp_err   s_mp_add_d(mp_int *mp, mp_digit d)    /* unsigned digit addition */
3235 {
3236 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3237   mp_word   w, k = 0;
3238   mp_size   ix = 1;
3239
3240   w = (mp_word)DIGIT(mp, 0) + d;
3241   DIGIT(mp, 0) = ACCUM(w);
3242   k = CARRYOUT(w);
3243
3244   while(ix < USED(mp) && k) {
3245     w = (mp_word)DIGIT(mp, ix) + k;
3246     DIGIT(mp, ix) = ACCUM(w);
3247     k = CARRYOUT(w);
3248     ++ix;
3249   }
3250
3251   if(k != 0) {
3252     mp_err  res;
3253
3254     if((res = s_mp_pad(mp, USED(mp) + 1)) != MP_OKAY)
3255       return res;
3256
3257     DIGIT(mp, ix) = (mp_digit)k;
3258   }
3259
3260   return MP_OKAY;
3261 #else
3262   mp_digit * pmp = MP_DIGITS(mp);
3263   mp_digit sum, mp_i, carry = 0;
3264   mp_err   res = MP_OKAY;
3265   int used = (int)MP_USED(mp);
3266
3267   mp_i = *pmp;
3268   *pmp++ = sum = d + mp_i;
3269   carry = (sum < d);
3270   while (carry && --used > 0) {
3271     mp_i = *pmp;
3272     *pmp++ = sum = carry + mp_i;
3273     carry = !sum;
3274   }
3275   if (carry && !used) {
3276     /* mp is growing */
3277     used = MP_USED(mp);
3278     MP_CHECKOK( s_mp_pad(mp, used + 1) );
3279     MP_DIGIT(mp, used) = carry;
3280   }
3281 CLEANUP:
3282   return res;
3283 #endif
3284 } /* end s_mp_add_d() */
3285
3286 /* }}} */
3287
3288 /* {{{ s_mp_sub_d(mp, d) */
3289
3290 /* Subtract d from |mp| in place, assumes |mp| > d                        */
3291 mp_err   s_mp_sub_d(mp_int *mp, mp_digit d)    /* unsigned digit subtract */
3292 {
3293 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3294   mp_word   w, b = 0;
3295   mp_size   ix = 1;
3296
3297   /* Compute initial subtraction    */
3298   w = (RADIX + (mp_word)DIGIT(mp, 0)) - d;
3299   b = CARRYOUT(w) ? 0 : 1;
3300   DIGIT(mp, 0) = ACCUM(w);
3301
3302   /* Propagate borrows leftward     */
3303   while(b && ix < USED(mp)) {
3304     w = (RADIX + (mp_word)DIGIT(mp, ix)) - b;
3305     b = CARRYOUT(w) ? 0 : 1;
3306     DIGIT(mp, ix) = ACCUM(w);
3307     ++ix;
3308   }
3309
3310   /* Remove leading zeroes          */
3311   s_mp_clamp(mp);
3312
3313   /* If we have a borrow out, it's a violation of the input invariant */
3314   if(b)
3315     return MP_RANGE;
3316   else
3317     return MP_OKAY;
3318 #else
3319   mp_digit *pmp = MP_DIGITS(mp);
3320   mp_digit mp_i, diff, borrow;
3321   mp_size  used = MP_USED(mp);
3322
3323   mp_i = *pmp;
3324   *pmp++ = diff = mp_i - d;
3325   borrow = (diff > mp_i);
3326   while (borrow && --used) {
3327     mp_i = *pmp;
3328     *pmp++ = diff = mp_i - borrow;
3329     borrow = (diff > mp_i);
3330   }
3331   s_mp_clamp(mp);
3332   return (borrow && !used) ? MP_RANGE : MP_OKAY;
3333 #endif
3334 } /* end s_mp_sub_d() */
3335
3336 /* }}} */
3337
3338 /* {{{ s_mp_mul_d(a, d) */
3339
3340 /* Compute a = a * d, single digit multiplication                         */
3341 mp_err   s_mp_mul_d(mp_int *a, mp_digit d)
3342 {
3343   mp_err  res;
3344   mp_size used;
3345   int     pow;
3346
3347   if (!d) {
3348     mp_zero(a);
3349     return MP_OKAY;
3350   }
3351   if (d == 1)
3352     return MP_OKAY;
3353   if (0 <= (pow = s_mp_ispow2d(d))) {
3354     return s_mp_mul_2d(a, (mp_digit)pow);
3355   }
3356
3357   used = MP_USED(a);
3358   MP_CHECKOK( s_mp_pad(a, used + 1) );
3359
3360   s_mpv_mul_d(MP_DIGITS(a), used, d, MP_DIGITS(a));
3361
3362   s_mp_clamp(a);
3363
3364 CLEANUP:
3365   return res;
3366
3367 } /* end s_mp_mul_d() */
3368
3369 /* }}} */
3370
3371 /* {{{ s_mp_div_d(mp, d, r) */
3372
3373 /*
3374   s_mp_div_d(mp, d, r)
3375
3376   Compute the quotient mp = mp / d and remainder r = mp mod d, for a
3377   single digit d.  If r is null, the remainder will be discarded.
3378  */
3379
3380 mp_err   s_mp_div_d(mp_int *mp, mp_digit d, mp_digit *r)
3381 {
3382 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
3383   mp_word   w = 0, q;
3384 #else
3385   mp_digit  w, q;
3386 #endif
3387   int       ix;
3388   mp_err    res;
3389   mp_int    quot;
3390   mp_int    rem;
3391
3392   if(d == 0)
3393     return MP_RANGE;
3394   if (d == 1) {
3395     if (r)
3396       *r = 0;
3397     return MP_OKAY;
3398   }
3399   /* could check for power of 2 here, but mp_div_d does that. */
3400   if (MP_USED(mp) == 1) {
3401     mp_digit n   = MP_DIGIT(mp,0);
3402     mp_digit rem;
3403
3404     q   = n / d;
3405     rem = n % d;
3406     MP_DIGIT(mp,0) = q;
3407     if (r)
3408       *r = rem;
3409     return MP_OKAY;
3410   }
3411
3412   MP_DIGITS(&rem)  = 0;
3413   MP_DIGITS(&quot) = 0;
3414   /* Make room for the quotient */
3415   MP_CHECKOK( mp_init_size(&quot, USED(mp), FLAG(mp)) );
3416
3417 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
3418   for(ix = USED(mp) - 1; ix >= 0; ix--) {
3419     w = (w << DIGIT_BIT) | DIGIT(mp, ix);
3420
3421     if(w >= d) {
3422       q = w / d;
3423       w = w % d;
3424     } else {
3425       q = 0;
3426     }
3427
3428     s_mp_lshd(&quot, 1);
3429     DIGIT(&quot, 0) = (mp_digit)q;
3430   }
3431 #else
3432   {
3433     mp_digit p;
3434 #if !defined(MP_ASSEMBLY_DIV_2DX1D)
3435     mp_digit norm;
3436 #endif
3437
3438     MP_CHECKOK( mp_init_copy(&rem, mp) );
3439
3440 #if !defined(MP_ASSEMBLY_DIV_2DX1D)
3441     MP_DIGIT(&quot, 0) = d;
3442     MP_CHECKOK( s_mp_norm(&rem, &quot, &norm) );
3443     if (norm)
3444       d <<= norm;
3445     MP_DIGIT(&quot, 0) = 0;
3446 #endif
3447
3448     p = 0;
3449     for (ix = USED(&rem) - 1; ix >= 0; ix--) {
3450       w = DIGIT(&rem, ix);
3451
3452       if (p) {
3453         MP_CHECKOK( s_mpv_div_2dx1d(p, w, d, &q, &w) );
3454       } else if (w >= d) {
3455         q = w / d;
3456         w = w % d;
3457       } else {
3458         q = 0;
3459       }
3460
3461       MP_CHECKOK( s_mp_lshd(&quot, 1) );
3462       DIGIT(&quot, 0) = q;
3463       p = w;
3464     }
3465 #if !defined(MP_ASSEMBLY_DIV_2DX1D)
3466     if (norm)
3467       w >>= norm;
3468 #endif
3469   }
3470 #endif
3471
3472   /* Deliver the remainder, if desired */
3473   if(r)
3474     *r = (mp_digit)w;
3475
3476   s_mp_clamp(&quot);
3477   mp_exch(&quot, mp);
3478 CLEANUP:
3479   mp_clear(&quot);
3480   mp_clear(&rem);
3481
3482   return res;
3483 } /* end s_mp_div_d() */
3484
3485 /* }}} */
3486
3487
3488 /* }}} */
3489
3490 /* {{{ Primitive full arithmetic */
3491
3492 /* {{{ s_mp_add(a, b) */
3493
3494 /* Compute a = |a| + |b|                                                  */
3495 mp_err   s_mp_add(mp_int *a, const mp_int *b)  /* magnitude addition      */
3496 {
3497 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3498   mp_word   w = 0;
3499 #else
3500   mp_digit  d, sum, carry = 0;
3501 #endif
3502   mp_digit *pa, *pb;
3503   mp_size   ix;
3504   mp_size   used;
3505   mp_err    res;
3506
3507   /* Make sure a has enough precision for the output value */
3508   if((USED(b) > USED(a)) && (res = s_mp_pad(a, USED(b))) != MP_OKAY)
3509     return res;
3510
3511   /*
3512     Add up all digits up to the precision of b.  If b had initially
3513     the same precision as a, or greater, we took care of it by the
3514     padding step above, so there is no problem.  If b had initially
3515     less precision, we'll have to make sure the carry out is duly
3516     propagated upward among the higher-order digits of the sum.
3517    */
3518   pa = MP_DIGITS(a);
3519   pb = MP_DIGITS(b);
3520   used = MP_USED(b);
3521   for(ix = 0; ix < used; ix++) {
3522 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3523     w = w + *pa + *pb++;
3524     *pa++ = ACCUM(w);
3525     w = CARRYOUT(w);
3526 #else
3527     d = *pa;
3528     sum = d + *pb++;
3529     d = (sum < d);                      /* detect overflow */
3530     *pa++ = sum += carry;
3531     carry = d + (sum < carry);          /* detect overflow */
3532 #endif
3533   }
3534
3535   /* If we run out of 'b' digits before we're actually done, make
3536      sure the carries get propagated upward...
3537    */
3538   used = MP_USED(a);
3539 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3540   while (w && ix < used) {
3541     w = w + *pa;
3542     *pa++ = ACCUM(w);
3543     w = CARRYOUT(w);
3544     ++ix;
3545   }
3546 #else
3547   while (carry && ix < used) {
3548     sum = carry + *pa;
3549     *pa++ = sum;
3550     carry = !sum;
3551     ++ix;
3552   }
3553 #endif
3554
3555   /* If there's an overall carry out, increase precision and include
3556      it.  We could have done this initially, but why touch the memory
3557      allocator unless we're sure we have to?
3558    */
3559 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3560   if (w) {
3561     if((res = s_mp_pad(a, used + 1)) != MP_OKAY)
3562       return res;
3563
3564     DIGIT(a, ix) = (mp_digit)w;
3565   }
3566 #else
3567   if (carry) {
3568     if((res = s_mp_pad(a, used + 1)) != MP_OKAY)
3569       return res;
3570
3571     DIGIT(a, used) = carry;
3572   }
3573 #endif
3574
3575   return MP_OKAY;
3576 } /* end s_mp_add() */
3577
3578 /* }}} */
3579
3580 /* Compute c = |a| + |b|         */ /* magnitude addition      */
3581 mp_err   s_mp_add_3arg(const mp_int *a, const mp_int *b, mp_int *c)
3582 {
3583   mp_digit *pa, *pb, *pc;
3584 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3585   mp_word   w = 0;
3586 #else
3587   mp_digit  sum, carry = 0, d;
3588 #endif
3589   mp_size   ix;
3590   mp_size   used;
3591   mp_err    res;
3592
3593   MP_SIGN(c) = MP_SIGN(a);
3594   if (MP_USED(a) < MP_USED(b)) {
3595     const mp_int *xch = a;
3596     a = b;
3597     b = xch;
3598   }
3599
3600   /* Make sure a has enough precision for the output value */
3601   if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a))))
3602     return res;
3603
3604   /*
3605     Add up all digits up to the precision of b.  If b had initially
3606     the same precision as a, or greater, we took care of it by the
3607     exchange step above, so there is no problem.  If b had initially
3608     less precision, we'll have to make sure the carry out is duly
3609     propagated upward among the higher-order digits of the sum.
3610    */
3611   pa = MP_DIGITS(a);
3612   pb = MP_DIGITS(b);
3613   pc = MP_DIGITS(c);
3614   used = MP_USED(b);
3615   for (ix = 0; ix < used; ix++) {
3616 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3617     w = w + *pa++ + *pb++;
3618     *pc++ = ACCUM(w);
3619     w = CARRYOUT(w);
3620 #else
3621     d = *pa++;
3622     sum = d + *pb++;
3623     d = (sum < d);                      /* detect overflow */
3624     *pc++ = sum += carry;
3625     carry = d + (sum < carry);          /* detect overflow */
3626 #endif
3627   }
3628
3629   /* If we run out of 'b' digits before we're actually done, make
3630      sure the carries get propagated upward...
3631    */
3632   for (used = MP_USED(a); ix < used; ++ix) {
3633 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3634     w = w + *pa++;
3635     *pc++ = ACCUM(w);
3636     w = CARRYOUT(w);
3637 #else
3638     *pc++ = sum = carry + *pa++;
3639     carry = (sum < carry);
3640 #endif
3641   }
3642
3643   /* If there's an overall carry out, increase precision and include
3644      it.  We could have done this initially, but why touch the memory
3645      allocator unless we're sure we have to?
3646    */
3647 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3648   if (w) {
3649     if((res = s_mp_pad(c, used + 1)) != MP_OKAY)
3650       return res;
3651
3652     DIGIT(c, used) = (mp_digit)w;
3653     ++used;
3654   }
3655 #else
3656   if (carry) {
3657     if((res = s_mp_pad(c, used + 1)) != MP_OKAY)
3658       return res;
3659
3660     DIGIT(c, used) = carry;
3661     ++used;
3662   }
3663 #endif
3664   MP_USED(c) = used;
3665   return MP_OKAY;
3666 }
3667 /* {{{ s_mp_add_offset(a, b, offset) */
3668
3669 /* Compute a = |a| + ( |b| * (RADIX ** offset) )             */
3670 mp_err   s_mp_add_offset(mp_int *a, mp_int *b, mp_size offset)
3671 {
3672 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3673   mp_word   w, k = 0;
3674 #else
3675   mp_digit  d, sum, carry = 0;
3676 #endif
3677   mp_size   ib;
3678   mp_size   ia;
3679   mp_size   lim;
3680   mp_err    res;
3681
3682   /* Make sure a has enough precision for the output value */
3683   lim = MP_USED(b) + offset;
3684   if((lim > USED(a)) && (res = s_mp_pad(a, lim)) != MP_OKAY)
3685     return res;
3686
3687   /*
3688     Add up all digits up to the precision of b.  If b had initially
3689     the same precision as a, or greater, we took care of it by the
3690     padding step above, so there is no problem.  If b had initially
3691     less precision, we'll have to make sure the carry out is duly
3692     propagated upward among the higher-order digits of the sum.
3693    */
3694   lim = USED(b);
3695   for(ib = 0, ia = offset; ib < lim; ib++, ia++) {
3696 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3697     w = (mp_word)DIGIT(a, ia) + DIGIT(b, ib) + k;
3698     DIGIT(a, ia) = ACCUM(w);
3699     k = CARRYOUT(w);
3700 #else
3701     d = MP_DIGIT(a, ia);
3702     sum = d + MP_DIGIT(b, ib);
3703     d = (sum < d);
3704     MP_DIGIT(a,ia) = sum += carry;
3705     carry = d + (sum < carry);
3706 #endif
3707   }
3708
3709   /* If we run out of 'b' digits before we're actually done, make
3710      sure the carries get propagated upward...
3711    */
3712 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3713   for (lim = MP_USED(a); k && (ia < lim); ++ia) {
3714     w = (mp_word)DIGIT(a, ia) + k;
3715     DIGIT(a, ia) = ACCUM(w);
3716     k = CARRYOUT(w);
3717   }
3718 #else
3719   for (lim = MP_USED(a); carry && (ia < lim); ++ia) {
3720     d = MP_DIGIT(a, ia);
3721     MP_DIGIT(a,ia) = sum = d + carry;
3722     carry = (sum < d);
3723   }
3724 #endif
3725
3726   /* If there's an overall carry out, increase precision and include
3727      it.  We could have done this initially, but why touch the memory
3728      allocator unless we're sure we have to?
3729    */
3730 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3731   if(k) {
3732     if((res = s_mp_pad(a, USED(a) + 1)) != MP_OKAY)
3733       return res;
3734
3735     DIGIT(a, ia) = (mp_digit)k;
3736   }
3737 #else
3738   if (carry) {
3739     if((res = s_mp_pad(a, lim + 1)) != MP_OKAY)
3740       return res;
3741
3742     DIGIT(a, lim) = carry;
3743   }
3744 #endif
3745   s_mp_clamp(a);
3746
3747   return MP_OKAY;
3748
3749 } /* end s_mp_add_offset() */
3750
3751 /* }}} */
3752
3753 /* {{{ s_mp_sub(a, b) */
3754
3755 /* Compute a = |a| - |b|, assumes |a| >= |b|                              */
3756 mp_err   s_mp_sub(mp_int *a, const mp_int *b)  /* magnitude subtract      */
3757 {
3758   mp_digit *pa, *pb, *limit;
3759 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3760   mp_sword  w = 0;
3761 #else
3762   mp_digit  d, diff, borrow = 0;
3763 #endif
3764
3765   /*
3766     Subtract and propagate borrow.  Up to the precision of b, this
3767     accounts for the digits of b; after that, we just make sure the
3768     carries get to the right place.  This saves having to pad b out to
3769     the precision of a just to make the loops work right...
3770    */
3771   pa = MP_DIGITS(a);
3772   pb = MP_DIGITS(b);
3773   limit = pb + MP_USED(b);
3774   while (pb < limit) {
3775 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3776     w = w + *pa - *pb++;
3777     *pa++ = ACCUM(w);
3778     w >>= MP_DIGIT_BIT;
3779 #else
3780     d = *pa;
3781     diff = d - *pb++;
3782     d = (diff > d);                             /* detect borrow */
3783     if (borrow && --diff == MP_DIGIT_MAX)
3784       ++d;
3785     *pa++ = diff;
3786     borrow = d;
3787 #endif
3788   }
3789   limit = MP_DIGITS(a) + MP_USED(a);
3790 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3791   while (w && pa < limit) {
3792     w = w + *pa;
3793     *pa++ = ACCUM(w);
3794     w >>= MP_DIGIT_BIT;
3795   }
3796 #else
3797   while (borrow && pa < limit) {
3798     d = *pa;
3799     *pa++ = diff = d - borrow;
3800     borrow = (diff > d);
3801   }
3802 #endif
3803
3804   /* Clobber any leading zeroes we created    */
3805   s_mp_clamp(a);
3806
3807   /*
3808      If there was a borrow out, then |b| > |a| in violation
3809      of our input invariant.  We've already done the work,
3810      but we'll at least complain about it...
3811    */
3812 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3813   return w ? MP_RANGE : MP_OKAY;
3814 #else
3815   return borrow ? MP_RANGE : MP_OKAY;
3816 #endif
3817 } /* end s_mp_sub() */
3818
3819 /* }}} */
3820
3821 /* Compute c = |a| - |b|, assumes |a| >= |b| */ /* magnitude subtract      */
3822 mp_err   s_mp_sub_3arg(const mp_int *a, const mp_int *b, mp_int *c)
3823 {
3824   mp_digit *pa, *pb, *pc;
3825 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3826   mp_sword  w = 0;
3827 #else
3828   mp_digit  d, diff, borrow = 0;
3829 #endif
3830   int       ix, limit;
3831   mp_err    res;
3832
3833   MP_SIGN(c) = MP_SIGN(a);
3834
3835   /* Make sure a has enough precision for the output value */
3836   if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a))))
3837     return res;
3838
3839   /*
3840     Subtract and propagate borrow.  Up to the precision of b, this
3841     accounts for the digits of b; after that, we just make sure the
3842     carries get to the right place.  This saves having to pad b out to
3843     the precision of a just to make the loops work right...
3844    */
3845   pa = MP_DIGITS(a);
3846   pb = MP_DIGITS(b);
3847   pc = MP_DIGITS(c);
3848   limit = MP_USED(b);
3849   for (ix = 0; ix < limit; ++ix) {
3850 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3851     w = w + *pa++ - *pb++;
3852     *pc++ = ACCUM(w);
3853     w >>= MP_DIGIT_BIT;
3854 #else
3855     d = *pa++;
3856     diff = d - *pb++;
3857     d = (diff > d);
3858     if (borrow && --diff == MP_DIGIT_MAX)
3859       ++d;
3860     *pc++ = diff;
3861     borrow = d;
3862 #endif
3863   }
3864   for (limit = MP_USED(a); ix < limit; ++ix) {
3865 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3866     w = w + *pa++;
3867     *pc++ = ACCUM(w);
3868     w >>= MP_DIGIT_BIT;
3869 #else
3870     d = *pa++;
3871     *pc++ = diff = d - borrow;
3872     borrow = (diff > d);
3873 #endif
3874   }
3875
3876   /* Clobber any leading zeroes we created    */
3877   MP_USED(c) = ix;
3878   s_mp_clamp(c);
3879
3880   /*
3881      If there was a borrow out, then |b| > |a| in violation
3882      of our input invariant.  We've already done the work,
3883      but we'll at least complain about it...
3884    */
3885 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3886   return w ? MP_RANGE : MP_OKAY;
3887 #else
3888   return borrow ? MP_RANGE : MP_OKAY;
3889 #endif
3890 }
3891 /* {{{ s_mp_mul(a, b) */
3892
3893 /* Compute a = |a| * |b|                                                  */
3894 mp_err   s_mp_mul(mp_int *a, const mp_int *b)
3895 {
3896   return mp_mul(a, b, a);
3897 } /* end s_mp_mul() */
3898
3899 /* }}} */
3900
3901 #if defined(MP_USE_UINT_DIGIT) && defined(MP_USE_LONG_LONG_MULTIPLY)
3902 /* This trick works on Sparc V8 CPUs with the Workshop compilers. */
3903 #define MP_MUL_DxD(a, b, Phi, Plo) \
3904   { unsigned long long product = (unsigned long long)a * b; \
3905     Plo = (mp_digit)product; \
3906     Phi = (mp_digit)(product >> MP_DIGIT_BIT); }
3907 #elif defined(OSF1)
3908 #define MP_MUL_DxD(a, b, Phi, Plo) \
3909   { Plo = asm ("mulq %a0, %a1, %v0", a, b);\
3910     Phi = asm ("umulh %a0, %a1, %v0", a, b); }
3911 #else
3912 #define MP_MUL_DxD(a, b, Phi, Plo) \
3913   { mp_digit a0b1, a1b0; \
3914     Plo = (a & MP_HALF_DIGIT_MAX) * (b & MP_HALF_DIGIT_MAX); \
3915     Phi = (a >> MP_HALF_DIGIT_BIT) * (b >> MP_HALF_DIGIT_BIT); \
3916     a0b1 = (a & MP_HALF_DIGIT_MAX) * (b >> MP_HALF_DIGIT_BIT); \
3917     a1b0 = (a >> MP_HALF_DIGIT_BIT) * (b & MP_HALF_DIGIT_MAX); \
3918     a1b0 += a0b1; \
3919     Phi += a1b0 >> MP_HALF_DIGIT_BIT; \
3920     if (a1b0 < a0b1)  \
3921       Phi += MP_HALF_RADIX; \
3922     a1b0 <<= MP_HALF_DIGIT_BIT; \
3923     Plo += a1b0; \
3924     if (Plo < a1b0) \
3925       ++Phi; \
3926   }
3927 #endif
3928
3929 #if !defined(MP_ASSEMBLY_MULTIPLY)
3930 /* c = a * b */
3931 void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
3932 {
3933 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
3934   mp_digit   d = 0;
3935
3936   /* Inner product:  Digits of a */
3937   while (a_len--) {
3938     mp_word w = ((mp_word)b * *a++) + d;
3939     *c++ = ACCUM(w);
3940     d = CARRYOUT(w);
3941   }
3942   *c = d;
3943 #else
3944   mp_digit carry = 0;
3945   while (a_len--) {
3946     mp_digit a_i = *a++;
3947     mp_digit a0b0, a1b1;
3948
3949     MP_MUL_DxD(a_i, b, a1b1, a0b0);
3950
3951     a0b0 += carry;
3952     if (a0b0 < carry)
3953       ++a1b1;
3954     *c++ = a0b0;
3955     carry = a1b1;
3956   }
3957   *c = carry;
3958 #endif
3959 }
3960
3961 /* c += a * b */
3962 void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b,
3963                               mp_digit *c)
3964 {
3965 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
3966   mp_digit   d = 0;
3967
3968   /* Inner product:  Digits of a */
3969   while (a_len--) {
3970     mp_word w = ((mp_word)b * *a++) + *c + d;
3971     *c++ = ACCUM(w);
3972     d = CARRYOUT(w);
3973   }
3974   *c = d;
3975 #else
3976   mp_digit carry = 0;
3977   while (a_len--) {
3978     mp_digit a_i = *a++;
3979     mp_digit a0b0, a1b1;
3980
3981     MP_MUL_DxD(a_i, b, a1b1, a0b0);
3982
3983     a0b0 += carry;
3984     if (a0b0 < carry)
3985       ++a1b1;
3986     a0b0 += a_i = *c;
3987     if (a0b0 < a_i)
3988       ++a1b1;
3989     *c++ = a0b0;
3990     carry = a1b1;
3991   }
3992   *c = carry;
3993 #endif
3994 }
3995
3996 /* Presently, this is only used by the Montgomery arithmetic code. */
3997 /* c += a * b */
3998 void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
3999 {
4000 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
4001   mp_digit   d = 0;
4002
4003   /* Inner product:  Digits of a */
4004   while (a_len--) {
4005     mp_word w = ((mp_word)b * *a++) + *c + d;
4006     *c++ = ACCUM(w);
4007     d = CARRYOUT(w);
4008   }
4009
4010   while (d) {
4011     mp_word w = (mp_word)*c + d;
4012     *c++ = ACCUM(w);
4013     d = CARRYOUT(w);
4014   }
4015 #else
4016   mp_digit carry = 0;
4017   while (a_len--) {
4018     mp_digit a_i = *a++;
4019     mp_digit a0b0, a1b1;
4020
4021     MP_MUL_DxD(a_i, b, a1b1, a0b0);
4022
4023     a0b0 += carry;
4024     if (a0b0 < carry)
4025       ++a1b1;
4026
4027     a0b0 += a_i = *c;
4028     if (a0b0 < a_i)
4029       ++a1b1;
4030
4031     *c++ = a0b0;
4032     carry = a1b1;
4033   }
4034   while (carry) {
4035     mp_digit c_i = *c;
4036     carry += c_i;
4037     *c++ = carry;
4038     carry = carry < c_i;
4039   }
4040 #endif
4041 }
4042 #endif
4043
4044 #if defined(MP_USE_UINT_DIGIT) && defined(MP_USE_LONG_LONG_MULTIPLY)
4045 /* This trick works on Sparc V8 CPUs with the Workshop compilers. */
4046 #define MP_SQR_D(a, Phi, Plo) \
4047   { unsigned long long square = (unsigned long long)a * a; \
4048     Plo = (mp_digit)square; \
4049     Phi = (mp_digit)(square >> MP_DIGIT_BIT); }
4050 #elif defined(OSF1)
4051 #define MP_SQR_D(a, Phi, Plo) \
4052   { Plo = asm ("mulq  %a0, %a0, %v0", a);\
4053     Phi = asm ("umulh %a0, %a0, %v0", a); }
4054 #else
4055 #define MP_SQR_D(a, Phi, Plo) \
4056   { mp_digit Pmid; \
4057     Plo  = (a  & MP_HALF_DIGIT_MAX) * (a  & MP_HALF_DIGIT_MAX); \
4058     Phi  = (a >> MP_HALF_DIGIT_BIT) * (a >> MP_HALF_DIGIT_BIT); \
4059     Pmid = (a  & MP_HALF_DIGIT_MAX) * (a >> MP_HALF_DIGIT_BIT); \
4060     Phi += Pmid >> (MP_HALF_DIGIT_BIT - 1);  \
4061     Pmid <<= (MP_HALF_DIGIT_BIT + 1);  \
4062     Plo += Pmid;  \
4063     if (Plo < Pmid)  \
4064       ++Phi;  \
4065   }
4066 #endif
4067
4068 #if !defined(MP_ASSEMBLY_SQUARE)
4069 /* Add the squares of the digits of a to the digits of b. */
4070 void s_mpv_sqr_add_prop(const mp_digit *pa, mp_size a_len, mp_digit *ps)
4071 {
4072 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
4073   mp_word  w;
4074   mp_digit d;
4075   mp_size  ix;
4076
4077   w  = 0;
4078 #define ADD_SQUARE(n) \
4079     d = pa[n]; \
4080     w += (d * (mp_word)d) + ps[2*n]; \
4081     ps[2*n] = ACCUM(w); \
4082     w = (w >> DIGIT_BIT) + ps[2*n+1]; \
4083     ps[2*n+1] = ACCUM(w); \
4084     w = (w >> DIGIT_BIT)
4085
4086   for (ix = a_len; ix >= 4; ix -= 4) {
4087     ADD_SQUARE(0);
4088     ADD_SQUARE(1);
4089     ADD_SQUARE(2);
4090     ADD_SQUARE(3);
4091     pa += 4;
4092     ps += 8;
4093   }
4094   if (ix) {
4095     ps += 2*ix;
4096     pa += ix;
4097     switch (ix) {
4098     case 3: ADD_SQUARE(-3); /* FALLTHRU */
4099     case 2: ADD_SQUARE(-2); /* FALLTHRU */
4100     case 1: ADD_SQUARE(-1); /* FALLTHRU */
4101     case 0: break;
4102     }
4103   }
4104   while (w) {
4105     w += *ps;
4106     *ps++ = ACCUM(w);
4107     w = (w >> DIGIT_BIT);
4108   }
4109 #else
4110   mp_digit carry = 0;
4111   while (a_len--) {
4112     mp_digit a_i = *pa++;
4113     mp_digit a0a0, a1a1;
4114
4115     MP_SQR_D(a_i, a1a1, a0a0);
4116
4117     /* here a1a1 and a0a0 constitute a_i ** 2 */
4118     a0a0 += carry;
4119     if (a0a0 < carry)
4120       ++a1a1;
4121
4122     /* now add to ps */
4123     a0a0 += a_i = *ps;
4124     if (a0a0 < a_i)
4125       ++a1a1;
4126     *ps++ = a0a0;
4127     a1a1 += a_i = *ps;
4128     carry = (a1a1 < a_i);
4129     *ps++ = a1a1;
4130   }
4131   while (carry) {
4132     mp_digit s_i = *ps;
4133     carry += s_i;
4134     *ps++ = carry;
4135     carry = carry < s_i;
4136   }
4137 #endif
4138 }
4139 #endif
4140
4141 #if (defined(MP_NO_MP_WORD) || defined(MP_NO_DIV_WORD)) \
4142 && !defined(MP_ASSEMBLY_DIV_2DX1D)
4143 /*
4144 ** Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
4145 ** so its high bit is 1.   This code is from NSPR.
4146 */
4147 mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
4148                        mp_digit *qp, mp_digit *rp)
4149 {
4150     mp_digit d1, d0, q1, q0;
4151     mp_digit r1, r0, m;
4152
4153     d1 = divisor >> MP_HALF_DIGIT_BIT;
4154     d0 = divisor & MP_HALF_DIGIT_MAX;
4155     r1 = Nhi % d1;
4156     q1 = Nhi / d1;
4157     m = q1 * d0;
4158     r1 = (r1 << MP_HALF_DIGIT_BIT) | (Nlo >> MP_HALF_DIGIT_BIT);
4159     if (r1 < m) {
4160         q1--, r1 += divisor;
4161         if (r1 >= divisor && r1 < m) {
4162             q1--, r1 += divisor;
4163         }
4164     }
4165     r1 -= m;
4166     r0 = r1 % d1;
4167     q0 = r1 / d1;
4168     m = q0 * d0;
4169     r0 = (r0 << MP_HALF_DIGIT_BIT) | (Nlo & MP_HALF_DIGIT_MAX);
4170     if (r0 < m) {
4171         q0--, r0 += divisor;
4172         if (r0 >= divisor && r0 < m) {
4173             q0--, r0 += divisor;
4174         }
4175     }
4176     if (qp)
4177         *qp = (q1 << MP_HALF_DIGIT_BIT) | q0;
4178     if (rp)
4179         *rp = r0 - m;
4180     return MP_OKAY;
4181 }
4182 #endif
4183
4184 #if MP_SQUARE
4185 /* {{{ s_mp_sqr(a) */
4186
4187 mp_err   s_mp_sqr(mp_int *a)
4188 {
4189   mp_err   res;
4190   mp_int   tmp;
4191
4192   if((res = mp_init_size(&tmp, 2 * USED(a), FLAG(a))) != MP_OKAY)
4193     return res;
4194   res = mp_sqr(a, &tmp);
4195   if (res == MP_OKAY) {
4196     s_mp_exch(&tmp, a);
4197   }
4198   mp_clear(&tmp);
4199   return res;
4200 }
4201
4202 /* }}} */
4203 #endif
4204
4205 /* {{{ s_mp_div(a, b) */
4206
4207 /*
4208   s_mp_div(a, b)
4209
4210   Compute a = a / b and b = a mod b.  Assumes b > a.
4211  */
4212
4213 mp_err   s_mp_div(mp_int *rem,  /* i: dividend, o: remainder */
4214                   mp_int *div,  /* i: divisor                */
4215                   mp_int *quot) /* i: 0;        o: quotient  */
4216 {
4217   mp_int   part, t;
4218 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
4219   mp_word  q_msd;
4220 #else
4221   mp_digit q_msd;
4222 #endif
4223   mp_err   res;
4224   mp_digit d;
4225   mp_digit div_msd;
4226   int      ix;
4227
4228   if(mp_cmp_z(div) == 0)
4229     return MP_RANGE;
4230
4231   /* Shortcut if divisor is power of two */
4232   if((ix = s_mp_ispow2(div)) >= 0) {
4233     MP_CHECKOK( mp_copy(rem, quot) );
4234     s_mp_div_2d(quot, (mp_digit)ix);
4235     s_mp_mod_2d(rem,  (mp_digit)ix);
4236
4237     return MP_OKAY;
4238   }
4239
4240   DIGITS(&t) = 0;
4241   MP_SIGN(rem) = ZPOS;
4242   MP_SIGN(div) = ZPOS;
4243
4244   /* A working temporary for division     */
4245   MP_CHECKOK( mp_init_size(&t, MP_ALLOC(rem), FLAG(rem)));
4246
4247   /* Normalize to optimize guessing       */
4248   MP_CHECKOK( s_mp_norm(rem, div, &d) );
4249
4250   part = *rem;
4251
4252   /* Perform the division itself...woo!   */
4253   MP_USED(quot) = MP_ALLOC(quot);
4254
4255   /* Find a partial substring of rem which is at least div */
4256   /* If we didn't find one, we're finished dividing    */
4257   while (MP_USED(rem) > MP_USED(div) || s_mp_cmp(rem, div) >= 0) {
4258     int i;
4259     int unusedRem;
4260
4261     unusedRem = MP_USED(rem) - MP_USED(div);
4262     MP_DIGITS(&part) = MP_DIGITS(rem) + unusedRem;
4263     MP_ALLOC(&part)  = MP_ALLOC(rem)  - unusedRem;
4264     MP_USED(&part)   = MP_USED(div);
4265     if (s_mp_cmp(&part, div) < 0) {
4266       -- unusedRem;
4267 #if MP_ARGCHK == 2
4268       assert(unusedRem >= 0);
4269 #endif
4270       -- MP_DIGITS(&part);
4271       ++ MP_USED(&part);
4272       ++ MP_ALLOC(&part);
4273     }
4274
4275     /* Compute a guess for the next quotient digit       */
4276     q_msd = MP_DIGIT(&part, MP_USED(&part) - 1);
4277     div_msd = MP_DIGIT(div, MP_USED(div) - 1);
4278     if (q_msd >= div_msd) {
4279       q_msd = 1;
4280     } else if (MP_USED(&part) > 1) {
4281 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
4282       q_msd = (q_msd << MP_DIGIT_BIT) | MP_DIGIT(&part, MP_USED(&part) - 2);
4283       q_msd /= div_msd;
4284       if (q_msd == RADIX)
4285         --q_msd;
4286 #else
4287       mp_digit r;
4288       MP_CHECKOK( s_mpv_div_2dx1d(q_msd, MP_DIGIT(&part, MP_USED(&part) - 2),
4289                                   div_msd, &q_msd, &r) );
4290 #endif
4291     } else {
4292       q_msd = 0;
4293     }
4294 #if MP_ARGCHK == 2
4295     assert(q_msd > 0); /* This case should never occur any more. */
4296 #endif
4297     if (q_msd <= 0)
4298       break;
4299
4300     /* See what that multiplies out to                   */
4301     mp_copy(div, &t);
4302     MP_CHECKOK( s_mp_mul_d(&t, (mp_digit)q_msd) );
4303
4304     /*
4305        If it's too big, back it off.  We should not have to do this
4306        more than once, or, in rare cases, twice.  Knuth describes a
4307        method by which this could be reduced to a maximum of once, but
4308        I didn't implement that here.
4309      * When using s_mpv_div_2dx1d, we may have to do this 3 times.
4310      */
4311     for (i = 4; s_mp_cmp(&t, &part) > 0 && i > 0; --i) {
4312       --q_msd;
4313       s_mp_sub(&t, div);        /* t -= div */
4314     }
4315     if (i < 0) {
4316       res = MP_RANGE;
4317       goto CLEANUP;
4318     }
4319
4320     /* At this point, q_msd should be the right next digit   */
4321     MP_CHECKOK( s_mp_sub(&part, &t) );  /* part -= t */
4322     s_mp_clamp(rem);
4323
4324     /*
4325       Include the digit in the quotient.  We allocated enough memory
4326       for any quotient we could ever possibly get, so we should not
4327       have to check for failures here
4328      */
4329     MP_DIGIT(quot, unusedRem) = (mp_digit)q_msd;
4330   }
4331
4332   /* Denormalize remainder                */
4333   if (d) {
4334     s_mp_div_2d(rem, d);
4335   }
4336
4337   s_mp_clamp(quot);
4338
4339 CLEANUP:
4340   mp_clear(&t);
4341
4342   return res;
4343
4344 } /* end s_mp_div() */
4345
4346
4347 /* }}} */
4348
4349 /* {{{ s_mp_2expt(a, k) */
4350
4351 mp_err   s_mp_2expt(mp_int *a, mp_digit k)
4352 {
4353   mp_err    res;
4354   mp_size   dig, bit;
4355
4356   dig = k / DIGIT_BIT;
4357   bit = k % DIGIT_BIT;
4358
4359   mp_zero(a);
4360   if((res = s_mp_pad(a, dig + 1)) != MP_OKAY)
4361     return res;
4362
4363   DIGIT(a, dig) |= ((mp_digit)1 << bit);
4364
4365   return MP_OKAY;
4366
4367 } /* end s_mp_2expt() */
4368
4369 /* }}} */
4370
4371 /* {{{ s_mp_reduce(x, m, mu) */
4372
4373 /*
4374   Compute Barrett reduction, x (mod m), given a precomputed value for
4375   mu = b^2k / m, where b = RADIX and k = #digits(m).  This should be
4376   faster than straight division, when many reductions by the same
4377   value of m are required (such as in modular exponentiation).  This
4378   can nearly halve the time required to do modular exponentiation,
4379   as compared to using the full integer divide to reduce.
4380
4381   This algorithm was derived from the _Handbook of Applied
4382   Cryptography_ by Menezes, Oorschot and VanStone, Ch. 14,
4383   pp. 603-604.
4384  */
4385
4386 mp_err   s_mp_reduce(mp_int *x, const mp_int *m, const mp_int *mu)
4387 {
4388   mp_int   q;
4389   mp_err   res;
4390
4391   if((res = mp_init_copy(&q, x)) != MP_OKAY)
4392     return res;
4393
4394   s_mp_rshd(&q, USED(m) - 1);  /* q1 = x / b^(k-1)  */
4395   s_mp_mul(&q, mu);            /* q2 = q1 * mu      */
4396   s_mp_rshd(&q, USED(m) + 1);  /* q3 = q2 / b^(k+1) */
4397
4398   /* x = x mod b^(k+1), quick (no division) */
4399   s_mp_mod_2d(x, DIGIT_BIT * (USED(m) + 1));
4400
4401   /* q = q * m mod b^(k+1), quick (no division) */
4402   s_mp_mul(&q, m);
4403   s_mp_mod_2d(&q, DIGIT_BIT * (USED(m) + 1));
4404
4405   /* x = x - q */
4406   if((res = mp_sub(x, &q, x)) != MP_OKAY)
4407     goto CLEANUP;
4408
4409   /* If x < 0, add b^(k+1) to it */
4410   if(mp_cmp_z(x) < 0) {
4411     mp_set(&q, 1);
4412     if((res = s_mp_lshd(&q, USED(m) + 1)) != MP_OKAY)
4413       goto CLEANUP;
4414     if((res = mp_add(x, &q, x)) != MP_OKAY)
4415       goto CLEANUP;
4416   }
4417
4418   /* Back off if it's too big */
4419   while(mp_cmp(x, m) >= 0) {
4420     if((res = s_mp_sub(x, m)) != MP_OKAY)
4421       break;
4422   }
4423
4424  CLEANUP:
4425   mp_clear(&q);
4426
4427   return res;
4428
4429 } /* end s_mp_reduce() */
4430
4431 /* }}} */
4432
4433 /* }}} */
4434
4435 /* {{{ Primitive comparisons */
4436
4437 /* {{{ s_mp_cmp(a, b) */
4438
4439 /* Compare |a| <=> |b|, return 0 if equal, <0 if a<b, >0 if a>b           */
4440 int      s_mp_cmp(const mp_int *a, const mp_int *b)
4441 {
4442   mp_size used_a = MP_USED(a);
4443   {
4444     mp_size used_b = MP_USED(b);
4445
4446     if (used_a > used_b)
4447       goto IS_GT;
4448     if (used_a < used_b)
4449       goto IS_LT;
4450   }
4451   {
4452     mp_digit *pa, *pb;
4453     mp_digit da = 0, db = 0;
4454
4455 #define CMP_AB(n) if ((da = pa[n]) != (db = pb[n])) goto done
4456
4457     pa = MP_DIGITS(a) + used_a;
4458     pb = MP_DIGITS(b) + used_a;
4459     while (used_a >= 4) {
4460       pa     -= 4;
4461       pb     -= 4;
4462       used_a -= 4;
4463       CMP_AB(3);
4464       CMP_AB(2);
4465       CMP_AB(1);
4466       CMP_AB(0);
4467     }
4468     while (used_a-- > 0 && ((da = *--pa) == (db = *--pb)))
4469       /* do nothing */;
4470 done:
4471     if (da > db)
4472       goto IS_GT;
4473     if (da < db)
4474       goto IS_LT;
4475   }
4476   return MP_EQ;
4477 IS_LT:
4478   return MP_LT;
4479 IS_GT:
4480   return MP_GT;
4481 } /* end s_mp_cmp() */
4482
4483 /* }}} */
4484
4485 /* {{{ s_mp_cmp_d(a, d) */
4486
4487 /* Compare |a| <=> d, return 0 if equal, <0 if a<d, >0 if a>d             */
4488 int      s_mp_cmp_d(const mp_int *a, mp_digit d)
4489 {
4490   if(USED(a) > 1)
4491     return MP_GT;
4492
4493   if(DIGIT(a, 0) < d)
4494     return MP_LT;
4495   else if(DIGIT(a, 0) > d)
4496     return MP_GT;
4497   else
4498     return MP_EQ;
4499
4500 } /* end s_mp_cmp_d() */
4501
4502 /* }}} */
4503
4504 /* {{{ s_mp_ispow2(v) */
4505
4506 /*
4507   Returns -1 if the value is not a power of two; otherwise, it returns
4508   k such that v = 2^k, i.e. lg(v).
4509  */
4510 int      s_mp_ispow2(const mp_int *v)
4511 {
4512   mp_digit d;
4513   int      extra = 0, ix;
4514
4515   ix = MP_USED(v) - 1;
4516   d = MP_DIGIT(v, ix); /* most significant digit of v */
4517
4518   extra = s_mp_ispow2d(d);
4519   if (extra < 0 || ix == 0)
4520     return extra;
4521
4522   while (--ix >= 0) {
4523     if (DIGIT(v, ix) != 0)
4524       return -1; /* not a power of two */
4525     extra += MP_DIGIT_BIT;
4526   }
4527
4528   return extra;
4529
4530 } /* end s_mp_ispow2() */
4531
4532 /* }}} */
4533
4534 /* {{{ s_mp_ispow2d(d) */
4535
4536 int      s_mp_ispow2d(mp_digit d)
4537 {
4538   if ((d != 0) && ((d & (d-1)) == 0)) { /* d is a power of 2 */
4539     int pow = 0;
4540 #if defined (MP_USE_UINT_DIGIT)
4541     if (d & 0xffff0000U)
4542       pow += 16;
4543     if (d & 0xff00ff00U)
4544       pow += 8;
4545     if (d & 0xf0f0f0f0U)
4546       pow += 4;
4547     if (d & 0xccccccccU)
4548       pow += 2;
4549     if (d & 0xaaaaaaaaU)
4550       pow += 1;
4551 #elif defined(MP_USE_LONG_LONG_DIGIT)
4552     if (d & 0xffffffff00000000ULL)
4553       pow += 32;
4554     if (d & 0xffff0000ffff0000ULL)
4555       pow += 16;
4556     if (d & 0xff00ff00ff00ff00ULL)
4557       pow += 8;
4558     if (d & 0xf0f0f0f0f0f0f0f0ULL)
4559       pow += 4;
4560     if (d & 0xccccccccccccccccULL)
4561       pow += 2;
4562     if (d & 0xaaaaaaaaaaaaaaaaULL)
4563       pow += 1;
4564 #elif defined(MP_USE_LONG_DIGIT)
4565     if (d & 0xffffffff00000000UL)
4566       pow += 32;
4567     if (d & 0xffff0000ffff0000UL)
4568       pow += 16;
4569     if (d & 0xff00ff00ff00ff00UL)
4570       pow += 8;
4571     if (d & 0xf0f0f0f0f0f0f0f0UL)
4572       pow += 4;
4573     if (d & 0xccccccccccccccccUL)
4574       pow += 2;
4575     if (d & 0xaaaaaaaaaaaaaaaaUL)
4576       pow += 1;
4577 #else
4578 #error "unknown type for mp_digit"
4579 #endif
4580     return pow;
4581   }
4582   return -1;
4583
4584 } /* end s_mp_ispow2d() */
4585
4586 /* }}} */
4587
4588 /* }}} */
4589
4590 /* {{{ Primitive I/O helpers */
4591
4592 /* {{{ s_mp_tovalue(ch, r) */
4593
4594 /*
4595   Convert the given character to its digit value, in the given radix.
4596   If the given character is not understood in the given radix, -1 is
4597   returned.  Otherwise the digit's numeric value is returned.
4598
4599   The results will be odd if you use a radix < 2 or > 62, you are
4600   expected to know what you're up to.
4601  */
4602 int      s_mp_tovalue(char ch, int r)
4603 {
4604   int    val, xch;
4605
4606   if(r > 36)
4607     xch = ch;
4608   else
4609     xch = toupper(ch);
4610
4611   if(isdigit(xch))
4612     val = xch - '0';
4613   else if(isupper(xch))
4614     val = xch - 'A' + 10;
4615   else if(islower(xch))
4616     val = xch - 'a' + 36;
4617   else if(xch == '+')
4618     val = 62;
4619   else if(xch == '/')
4620     val = 63;
4621   else
4622     return -1;
4623
4624   if(val < 0 || val >= r)
4625     return -1;
4626
4627   return val;
4628
4629 } /* end s_mp_tovalue() */
4630
4631 /* }}} */
4632
4633 /* {{{ s_mp_todigit(val, r, low) */
4634
4635 /*
4636   Convert val to a radix-r digit, if possible.  If val is out of range
4637   for r, returns zero.  Otherwise, returns an ASCII character denoting
4638   the value in the given radix.
4639
4640   The results may be odd if you use a radix < 2 or > 64, you are
4641   expected to know what you're doing.
4642  */
4643
4644 char     s_mp_todigit(mp_digit val, int r, int low)
4645 {
4646   char   ch;
4647
4648   if(val >= r)
4649     return 0;
4650
4651   ch = s_dmap_1[val];
4652
4653   if(r <= 36 && low)
4654     ch = tolower(ch);
4655
4656   return ch;
4657
4658 } /* end s_mp_todigit() */
4659
4660 /* }}} */
4661
4662 /* {{{ s_mp_outlen(bits, radix) */
4663
4664 /*
4665    Return an estimate for how long a string is needed to hold a radix
4666    r representation of a number with 'bits' significant bits, plus an
4667    extra for a zero terminator (assuming C style strings here)
4668  */
4669 int      s_mp_outlen(int bits, int r)
4670 {
4671   return (int)((double)bits * LOG_V_2(r) + 1.5) + 1;
4672
4673 } /* end s_mp_outlen() */
4674
4675 /* }}} */
4676
4677 /* }}} */
4678
4679 /* {{{ mp_read_unsigned_octets(mp, str, len) */
4680 /* mp_read_unsigned_octets(mp, str, len)
4681    Read in a raw value (base 256) into the given mp_int
4682    No sign bit, number is positive.  Leading zeros ignored.
4683  */
4684
4685 mp_err
4686 mp_read_unsigned_octets(mp_int *mp, const unsigned char *str, mp_size len)
4687 {
4688   int            count;
4689   mp_err         res;
4690   mp_digit       d;
4691
4692   ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG);
4693
4694   mp_zero(mp);
4695
4696   count = len % sizeof(mp_digit);
4697   if (count) {
4698     for (d = 0; count-- > 0; --len) {
4699       d = (d << 8) | *str++;
4700     }
4701     MP_DIGIT(mp, 0) = d;
4702   }
4703
4704   /* Read the rest of the digits */
4705   for(; len > 0; len -= sizeof(mp_digit)) {
4706     for (d = 0, count = sizeof(mp_digit); count > 0; --count) {
4707       d = (d << 8) | *str++;
4708     }
4709     if (MP_EQ == mp_cmp_z(mp)) {
4710       if (!d)
4711         continue;
4712     } else {
4713       if((res = s_mp_lshd(mp, 1)) != MP_OKAY)
4714         return res;
4715     }
4716     MP_DIGIT(mp, 0) = d;
4717   }
4718   return MP_OKAY;
4719 } /* end mp_read_unsigned_octets() */
4720 /* }}} */
4721
4722 /* {{{ mp_unsigned_octet_size(mp) */
4723 int
4724 mp_unsigned_octet_size(const mp_int *mp)
4725 {
4726   int  bytes;
4727   int  ix;
4728   mp_digit  d = 0;
4729
4730   ARGCHK(mp != NULL, MP_BADARG);
4731   ARGCHK(MP_ZPOS == SIGN(mp), MP_BADARG);
4732
4733   bytes = (USED(mp) * sizeof(mp_digit));
4734
4735   /* subtract leading zeros. */
4736   /* Iterate over each digit... */
4737   for(ix = USED(mp) - 1; ix >= 0; ix--) {
4738     d = DIGIT(mp, ix);
4739     if (d)
4740         break;
4741     bytes -= sizeof(d);
4742   }
4743   if (!bytes)
4744     return 1;
4745
4746   /* Have MSD, check digit bytes, high order first */
4747   for(ix = sizeof(mp_digit) - 1; ix >= 0; ix--) {
4748     unsigned char x = (unsigned char)(d >> (ix * CHAR_BIT));
4749     if (x)
4750         break;
4751     --bytes;
4752   }
4753   return bytes;
4754 } /* end mp_unsigned_octet_size() */
4755 /* }}} */
4756
4757 /* {{{ mp_to_unsigned_octets(mp, str) */
4758 /* output a buffer of big endian octets no longer than specified. */
4759 mp_err
4760 mp_to_unsigned_octets(const mp_int *mp, unsigned char *str, mp_size maxlen)
4761 {
4762   int  ix, pos = 0;
4763   int  bytes;
4764
4765   ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
4766
4767   bytes = mp_unsigned_octet_size(mp);
4768   ARGCHK(bytes <= maxlen, MP_BADARG);
4769
4770   /* Iterate over each digit... */
4771   for(ix = USED(mp) - 1; ix >= 0; ix--) {
4772     mp_digit  d = DIGIT(mp, ix);
4773     int       jx;
4774
4775     /* Unpack digit bytes, high order first */
4776     for(jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
4777       unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
4778       if (!pos && !x)   /* suppress leading zeros */
4779         continue;
4780       str[pos++] = x;
4781     }
4782   }
4783   if (!pos)
4784     str[pos++] = 0;
4785   return pos;
4786 } /* end mp_to_unsigned_octets() */
4787 /* }}} */
4788
4789 /* {{{ mp_to_signed_octets(mp, str) */
4790 /* output a buffer of big endian octets no longer than specified. */
4791 mp_err
4792 mp_to_signed_octets(const mp_int *mp, unsigned char *str, mp_size maxlen)
4793 {
4794   int  ix, pos = 0;
4795   int  bytes;
4796
4797   ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
4798
4799   bytes = mp_unsigned_octet_size(mp);
4800   ARGCHK(bytes <= maxlen, MP_BADARG);
4801
4802   /* Iterate over each digit... */
4803   for(ix = USED(mp) - 1; ix >= 0; ix--) {
4804     mp_digit  d = DIGIT(mp, ix);
4805     int       jx;
4806
4807     /* Unpack digit bytes, high order first */
4808     for(jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
4809       unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
4810       if (!pos) {
4811         if (!x)         /* suppress leading zeros */
4812           continue;
4813         if (x & 0x80) { /* add one leading zero to make output positive.  */
4814           ARGCHK(bytes + 1 <= maxlen, MP_BADARG);
4815           if (bytes + 1 > maxlen)
4816             return MP_BADARG;
4817           str[pos++] = 0;
4818         }
4819       }
4820       str[pos++] = x;
4821     }
4822   }
4823   if (!pos)
4824     str[pos++] = 0;
4825   return pos;
4826 } /* end mp_to_signed_octets() */
4827 /* }}} */
4828
4829 /* {{{ mp_to_fixlen_octets(mp, str) */
4830 /* output a buffer of big endian octets exactly as long as requested. */
4831 mp_err
4832 mp_to_fixlen_octets(const mp_int *mp, unsigned char *str, mp_size length)
4833 {
4834   int  ix, pos = 0;
4835   int  bytes;
4836
4837   ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
4838
4839   bytes = mp_unsigned_octet_size(mp);
4840   ARGCHK(bytes <= length, MP_BADARG);
4841
4842   /* place any needed leading zeros */
4843   for (;length > bytes; --length) {
4844         *str++ = 0;
4845   }
4846
4847   /* Iterate over each digit... */
4848   for(ix = USED(mp) - 1; ix >= 0; ix--) {
4849     mp_digit  d = DIGIT(mp, ix);
4850     int       jx;
4851
4852     /* Unpack digit bytes, high order first */
4853     for(jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
4854       unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
4855       if (!pos && !x)   /* suppress leading zeros */
4856         continue;
4857       str[pos++] = x;
4858     }
4859   }
4860   if (!pos)
4861     str[pos++] = 0;
4862   return MP_OKAY;
4863 } /* end mp_to_fixlen_octets() */
4864 /* }}} */
4865
4866
4867 /*------------------------------------------------------------------------*/
4868 /* HERE THERE BE DRAGONS                                                  */
4869 /* END CSTYLED */