usr/src/common/crypto/ecc/ecp_192.c

   1 /*
   2  * ***** BEGIN LICENSE BLOCK *****
   3  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
   4  *
   5  * The contents of this file are subject to the Mozilla Public License Version
   6  * 1.1 (the "License"); you may not use this file except in compliance with
   7  * the License. You may obtain a copy of the License at
   8  * http://www.mozilla.org/MPL/
   9  *
  10  * Software distributed under the License is distributed on an "AS IS" basis,
  11  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  12  * for the specific language governing rights and limitations under the
  13  * License.
  14  *
  15  * The Original Code is the elliptic curve math library for prime field curves.
  16  *
  17  * The Initial Developer of the Original Code is
  18  * Sun Microsystems, Inc.
  19  * Portions created by the Initial Developer are Copyright (C) 2003
  20  * the Initial Developer. All Rights Reserved.
  21  *
  22  * Contributor(s):
  23  *   Douglas Stebila <douglas@stebila.ca>, Sun Microsystems Laboratories
  24  *
  25  * Alternatively, the contents of this file may be used under the terms of
  26  * either the GNU General Public License Version 2 or later (the "GPL"), or
  27  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  28  * in which case the provisions of the GPL or the LGPL are applicable instead
  29  * of those above. If you wish to allow use of your version of this file only
  30  * under the terms of either the GPL or the LGPL, and not to allow others to
  31  * use your version of this file under the terms of the MPL, indicate your
  32  * decision by deleting the provisions above and replace them with the notice
  33  * and other provisions required by the GPL or the LGPL. If you do not delete
  34  * the provisions above, a recipient may use your version of this file under
  35  * the terms of any one of the MPL, the GPL or the LGPL.
  36  *
  37  * ***** END LICENSE BLOCK ***** */
  38 /*
  39  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  40  * Use is subject to license terms.
  41  *
  42  * Sun elects to use this software under the MPL license.
  43  */
  44
  45 #include "ecp.h"
  46 #include "mpi.h"
  47 #include "mplogic.h"
  48 #include "mpi-priv.h"
  49 #ifndef _KERNEL
  50 #include <stdlib.h>
  51 #endif
  52
  53 #define ECP192_DIGITS ECL_CURVE_DIGITS(192)
  54
  55 /* Fast modular reduction for p192 = 2^192 - 2^64 - 1.  a can be r. Uses
  56  * algorithm 7 from Brown, Hankerson, Lopez, Menezes. Software
  57  * Implementation of the NIST Elliptic Curves over Prime Fields. */
  58 mp_err
  59 ec_GFp_nistp192_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
  60 {
  61         mp_err res = MP_OKAY;
  62         mp_size a_used = MP_USED(a);
  63         mp_digit r3;
  64 #ifndef MPI_AMD64_ADD
  65         mp_digit carry;
  66 #endif
  67 #ifdef ECL_THIRTY_TWO_BIT
  68         mp_digit a5a = 0, a5b = 0, a4a = 0, a4b = 0, a3a = 0, a3b = 0;
  69         mp_digit r0a, r0b, r1a, r1b, r2a, r2b;
  70 #else
  71         mp_digit a5 = 0, a4 = 0, a3 = 0;
  72         mp_digit r0, r1, r2;
  73 #endif
  74
  75         /* reduction not needed if a is not larger than field size */
  76         if (a_used < ECP192_DIGITS) {
  77                 if (a == r) {
  78                         return MP_OKAY;
  79                 }
  80                 return mp_copy(a, r);
  81         }
  82
  83         /* for polynomials larger than twice the field size, use regular
  84          * reduction */
  85         if (a_used > ECP192_DIGITS*2) {
  86                 MP_CHECKOK(mp_mod(a, &meth->irr, r));
  87         } else {
  88                 /* copy out upper words of a */
  89
  90 #ifdef ECL_THIRTY_TWO_BIT
  91
  92                 /* in all the math below,
  93                  * nXb is most signifiant, nXa is least significant */
  94                 switch (a_used) {
  95                 case 12:
  96                         a5b = MP_DIGIT(a, 11);
  97                         /* FALLTHROUGH */
  98                 case 11:
  99                         a5a = MP_DIGIT(a, 10);
 100                         /* FALLTHROUGH */
 101                 case 10:
 102                         a4b = MP_DIGIT(a, 9);
 103                         /* FALLTHROUGH */
 104                 case 9:
 105                         a4a = MP_DIGIT(a, 8);
 106                         /* FALLTHROUGH */
 107                 case 8:
 108                         a3b = MP_DIGIT(a, 7);
 109                         /* FALLTHROUGH */
 110                 case 7:
 111                         a3a = MP_DIGIT(a, 6);
 112                 }
 113
 114
 115                 r2b= MP_DIGIT(a, 5);
 116                 r2a= MP_DIGIT(a, 4);
 117                 r1b = MP_DIGIT(a, 3);
 118                 r1a = MP_DIGIT(a, 2);
 119                 r0b = MP_DIGIT(a, 1);
 120                 r0a = MP_DIGIT(a, 0);
 121
 122                 /* implement r = (a2,a1,a0)+(a5,a5,a5)+(a4,a4,0)+(0,a3,a3) */
 123                 MP_ADD_CARRY(r0a, a3a, r0a, 0,    carry);
 124                 MP_ADD_CARRY(r0b, a3b, r0b, carry, carry);
 125                 MP_ADD_CARRY(r1a, a3a, r1a, carry, carry);
 126                 MP_ADD_CARRY(r1b, a3b, r1b, carry, carry);
 127                 MP_ADD_CARRY(r2a, a4a, r2a, carry, carry);
 128                 MP_ADD_CARRY(r2b, a4b, r2b, carry, carry);
 129                 r3 = carry; carry = 0;
 130                 MP_ADD_CARRY(r0a, a5a, r0a, 0,     carry);
 131                 MP_ADD_CARRY(r0b, a5b, r0b, carry, carry);
 132                 MP_ADD_CARRY(r1a, a5a, r1a, carry, carry);
 133                 MP_ADD_CARRY(r1b, a5b, r1b, carry, carry);
 134                 MP_ADD_CARRY(r2a, a5a, r2a, carry, carry);
 135                 MP_ADD_CARRY(r2b, a5b, r2b, carry, carry);
 136                 r3 += carry;
 137                 MP_ADD_CARRY(r1a, a4a, r1a, 0,     carry);
 138                 MP_ADD_CARRY(r1b, a4b, r1b, carry, carry);
 139                 MP_ADD_CARRY(r2a,   0, r2a, carry, carry);
 140                 MP_ADD_CARRY(r2b,   0, r2b, carry, carry);
 141                 r3 += carry;
 142
 143                 /* reduce out the carry */
 144                 while (r3) {
 145                         MP_ADD_CARRY(r0a, r3, r0a, 0,     carry);
 146                         MP_ADD_CARRY(r0b,  0, r0b, carry, carry);
 147                         MP_ADD_CARRY(r1a, r3, r1a, carry, carry);
 148                         MP_ADD_CARRY(r1b,  0, r1b, carry, carry);
 149                         MP_ADD_CARRY(r2a,  0, r2a, carry, carry);
 150                         MP_ADD_CARRY(r2b,  0, r2b, carry, carry);
 151                         r3 = carry;
 152                 }
 153
 154                 /* check for final reduction */
 155                 /*
 156                  * our field is 0xffffffffffffffff, 0xfffffffffffffffe,
 157                  * 0xffffffffffffffff. That means we can only be over and need
 158                  * one more reduction
 159                  *  if r2 == 0xffffffffffffffffff (same as r2+1 == 0)
 160                  *     and
 161                  *     r1 == 0xffffffffffffffffff   or
 162                  *     r1 == 0xfffffffffffffffffe and r0 = 0xfffffffffffffffff
 163                  * In all cases, we subtract the field (or add the 2's
 164                  * complement value (1,1,0)).  (r0, r1, r2)
 165                  */
 166                 if (((r2b == 0xffffffff) && (r2a == 0xffffffff)
 167                         && (r1b == 0xffffffff) ) &&
 168                            ((r1a == 0xffffffff) ||
 169                             (r1a == 0xfffffffe) && (r0a == 0xffffffff) &&
 170                                         (r0b == 0xffffffff)) ) {
 171                         /* do a quick subtract */
 172                         MP_ADD_CARRY(r0a, 1, r0a, 0, carry);
 173                         r0b += carry;
 174                         r1a = r1b = r2a = r2b = 0;
 175                 }
 176
 177                 /* set the lower words of r */
 178                 if (a != r) {
 179                         MP_CHECKOK(s_mp_pad(r, 6));
 180                 }
 181                 MP_DIGIT(r, 5) = r2b;
 182                 MP_DIGIT(r, 4) = r2a;
 183                 MP_DIGIT(r, 3) = r1b;
 184                 MP_DIGIT(r, 2) = r1a;
 185                 MP_DIGIT(r, 1) = r0b;
 186                 MP_DIGIT(r, 0) = r0a;
 187                 MP_USED(r) = 6;
 188 #else
 189                 switch (a_used) {
 190                 case 6:
 191                         a5 = MP_DIGIT(a, 5);
 192                         /* FALLTHROUGH */
 193                 case 5:
 194                         a4 = MP_DIGIT(a, 4);
 195                         /* FALLTHROUGH */
 196                 case 4:
 197                         a3 = MP_DIGIT(a, 3);
 198                 }
 199
 200                 r2 = MP_DIGIT(a, 2);
 201                 r1 = MP_DIGIT(a, 1);
 202                 r0 = MP_DIGIT(a, 0);
 203
 204                 /* implement r = (a2,a1,a0)+(a5,a5,a5)+(a4,a4,0)+(0,a3,a3) */
 205 #ifndef MPI_AMD64_ADD
 206                 MP_ADD_CARRY(r0, a3, r0, 0,     carry);
 207                 MP_ADD_CARRY(r1, a3, r1, carry, carry);
 208                 MP_ADD_CARRY(r2, a4, r2, carry, carry);
 209                 r3 = carry;
 210                 MP_ADD_CARRY(r0, a5, r0, 0,     carry);
 211                 MP_ADD_CARRY(r1, a5, r1, carry, carry);
 212                 MP_ADD_CARRY(r2, a5, r2, carry, carry);
 213                 r3 += carry;
 214                 MP_ADD_CARRY(r1, a4, r1, 0,     carry);
 215                 MP_ADD_CARRY(r2,  0, r2, carry, carry);
 216                 r3 += carry;
 217
 218 #else
 219                 r2 = MP_DIGIT(a, 2);
 220                 r1 = MP_DIGIT(a, 1);
 221                 r0 = MP_DIGIT(a, 0);
 222
 223                 /* set the lower words of r */
 224                 __asm__ (
 225                 "xorq   %3,%3           \n\t"
 226                 "addq   %4,%0           \n\t"
 227                 "adcq   %4,%1           \n\t"
 228                 "adcq   %5,%2           \n\t"
 229                 "adcq   $0,%3           \n\t"
 230                 "addq   %6,%0           \n\t"
 231                 "adcq   %6,%1           \n\t"
 232                 "adcq   %6,%2           \n\t"
 233                 "adcq   $0,%3           \n\t"
 234                 "addq   %5,%1           \n\t"
 235                 "adcq   $0,%2           \n\t"
 236                 "adcq   $0,%3           \n\t"
 237                 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(a3),
 238                   "=r"(a4), "=r"(a5)
 239                 : "0" (r0), "1" (r1), "2" (r2), "3" (r3),
 240                   "4" (a3), "5" (a4), "6"(a5)
 241                 : "%cc" );
 242 #endif
 243
 244                 /* reduce out the carry */
 245                 while (r3) {
 246 #ifndef MPI_AMD64_ADD
 247                         MP_ADD_CARRY(r0, r3, r0, 0,     carry);
 248                         MP_ADD_CARRY(r1, r3, r1, carry, carry);
 249                         MP_ADD_CARRY(r2,  0, r2, carry, carry);
 250                         r3 = carry;
 251 #else
 252                         a3=r3;
 253                         __asm__ (
 254                         "xorq   %3,%3           \n\t"
 255                         "addq   %4,%0           \n\t"
 256                         "adcq   %4,%1           \n\t"
 257                         "adcq   $0,%2           \n\t"
 258                         "adcq   $0,%3           \n\t"
 259                         : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(a3)
 260                         : "0" (r0), "1" (r1), "2" (r2), "3" (r3), "4"(a3)
 261                         : "%cc" );
 262 #endif
 263                 }
 264
 265                 /* check for final reduction */
 266                 /*
 267                  * our field is 0xffffffffffffffff, 0xfffffffffffffffe,
 268                  * 0xffffffffffffffff. That means we can only be over and need
 269                  * one more reduction
 270                  *  if r2 == 0xffffffffffffffffff (same as r2+1 == 0)
 271                  *     and
 272                  *     r1 == 0xffffffffffffffffff   or
 273                  *     r1 == 0xfffffffffffffffffe and r0 = 0xfffffffffffffffff
 274                  * In all cases, we subtract the field (or add the 2's
 275                  * complement value (1,1,0)).  (r0, r1, r2)
 276                  */
 277                 if (r3 || ((r2 == MP_DIGIT_MAX) &&
 278                       ((r1 == MP_DIGIT_MAX) ||
 279                         ((r1 == (MP_DIGIT_MAX-1)) && (r0 == MP_DIGIT_MAX))))) {
 280                         /* do a quick subtract */
 281                         r0++;
 282                         r1 = r2 = 0;
 283                 }
 284                 /* set the lower words of r */
 285                 if (a != r) {
 286                         MP_CHECKOK(s_mp_pad(r, 3));
 287                 }
 288                 MP_DIGIT(r, 2) = r2;
 289                 MP_DIGIT(r, 1) = r1;
 290                 MP_DIGIT(r, 0) = r0;
 291                 MP_USED(r) = 3;
 292 #endif
 293         }
 294
 295   CLEANUP:
 296         return res;
 297 }
 298
 299 #ifndef ECL_THIRTY_TWO_BIT
 300 /* Compute the sum of 192 bit curves. Do the work in-line since the
 301  * number of words are so small, we don't want to overhead of mp function
 302  * calls.  Uses optimized modular reduction for p192.
 303  */
 304 mp_err
 305 ec_GFp_nistp192_add(const mp_int *a, const mp_int *b, mp_int *r,
 306                         const GFMethod *meth)
 307 {
 308         mp_err res = MP_OKAY;
 309         mp_digit a0 = 0, a1 = 0, a2 = 0;
 310         mp_digit r0 = 0, r1 = 0, r2 = 0;
 311         mp_digit carry;
 312
 313         switch(MP_USED(a)) {
 314         case 3:
 315                 a2 = MP_DIGIT(a,2);
 316                 /* FALLTHROUGH */
 317         case 2:
 318                 a1 = MP_DIGIT(a,1);
 319                 /* FALLTHROUGH */
 320         case 1:
 321                 a0 = MP_DIGIT(a,0);
 322         }
 323         switch(MP_USED(b)) {
 324         case 3:
 325                 r2 = MP_DIGIT(b,2);
 326                 /* FALLTHROUGH */
 327         case 2:
 328                 r1 = MP_DIGIT(b,1);
 329                 /* FALLTHROUGH */
 330         case 1:
 331                 r0 = MP_DIGIT(b,0);
 332         }
 333
 334 #ifndef MPI_AMD64_ADD
 335         MP_ADD_CARRY(a0, r0, r0, 0,     carry);
 336         MP_ADD_CARRY(a1, r1, r1, carry, carry);
 337         MP_ADD_CARRY(a2, r2, r2, carry, carry);
 338 #else
 339         __asm__ (
 340                 "xorq   %3,%3           \n\t"
 341                 "addq   %4,%0           \n\t"
 342                 "adcq   %5,%1           \n\t"
 343                 "adcq   %6,%2           \n\t"
 344                 "adcq   $0,%3           \n\t"
 345                 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(carry)
 346                 : "r" (a0), "r" (a1), "r" (a2), "0" (r0),
 347                   "1" (r1), "2" (r2)
 348                 : "%cc" );
 349 #endif
 350
 351         /* Do quick 'subract' if we've gone over
 352          * (add the 2's complement of the curve field) */
 353         if (carry || ((r2 == MP_DIGIT_MAX) &&
 354                       ((r1 == MP_DIGIT_MAX) ||
 355                         ((r1 == (MP_DIGIT_MAX-1)) && (r0 == MP_DIGIT_MAX))))) {
 356 #ifndef MPI_AMD64_ADD
 357                 MP_ADD_CARRY(r0, 1, r0, 0,     carry);
 358                 MP_ADD_CARRY(r1, 1, r1, carry, carry);
 359                 MP_ADD_CARRY(r2, 0, r2, carry, carry);
 360 #else
 361                 __asm__ (
 362                         "addq   $1,%0           \n\t"
 363                         "adcq   $1,%1           \n\t"
 364                         "adcq   $0,%2           \n\t"
 365                         : "=r"(r0), "=r"(r1), "=r"(r2)
 366                         : "0" (r0), "1" (r1), "2" (r2)
 367                         : "%cc" );
 368 #endif
 369         }
 370
 371
 372         MP_CHECKOK(s_mp_pad(r, 3));
 373         MP_DIGIT(r, 2) = r2;
 374         MP_DIGIT(r, 1) = r1;
 375         MP_DIGIT(r, 0) = r0;
 376         MP_SIGN(r) = MP_ZPOS;
 377         MP_USED(r) = 3;
 378         s_mp_clamp(r);
 379
 380
 381   CLEANUP:
 382         return res;
 383 }
 384
 385 /* Compute the diff of 192 bit curves. Do the work in-line since the
 386  * number of words are so small, we don't want to overhead of mp function
 387  * calls.  Uses optimized modular reduction for p192.
 388  */
 389 mp_err
 390 ec_GFp_nistp192_sub(const mp_int *a, const mp_int *b, mp_int *r,
 391                         const GFMethod *meth)
 392 {
 393         mp_err res = MP_OKAY;
 394         mp_digit b0 = 0, b1 = 0, b2 = 0;
 395         mp_digit r0 = 0, r1 = 0, r2 = 0;
 396         mp_digit borrow;
 397
 398         switch(MP_USED(a)) {
 399         case 3:
 400                 r2 = MP_DIGIT(a,2);
 401                 /* FALLTHROUGH */
 402         case 2:
 403                 r1 = MP_DIGIT(a,1);
 404                 /* FALLTHROUGH */
 405         case 1:
 406                 r0 = MP_DIGIT(a,0);
 407         }
 408
 409         switch(MP_USED(b)) {
 410         case 3:
 411                 b2 = MP_DIGIT(b,2);
 412                 /* FALLTHROUGH */
 413         case 2:
 414                 b1 = MP_DIGIT(b,1);
 415                 /* FALLTHROUGH */
 416         case 1:
 417                 b0 = MP_DIGIT(b,0);
 418         }
 419
 420 #ifndef MPI_AMD64_ADD
 421         MP_SUB_BORROW(r0, b0, r0, 0,     borrow);
 422         MP_SUB_BORROW(r1, b1, r1, borrow, borrow);
 423         MP_SUB_BORROW(r2, b2, r2, borrow, borrow);
 424 #else
 425         __asm__ (
 426                 "xorq   %3,%3           \n\t"
 427                 "subq   %4,%0           \n\t"
 428                 "sbbq   %5,%1           \n\t"
 429                 "sbbq   %6,%2           \n\t"
 430                 "adcq   $0,%3           \n\t"
 431                 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(borrow)
 432                 : "r" (b0), "r" (b1), "r" (b2), "0" (r0),
 433                   "1" (r1), "2" (r2)
 434                 : "%cc" );
 435 #endif
 436
 437         /* Do quick 'add' if we've gone under 0
 438          * (subtract the 2's complement of the curve field) */
 439         if (borrow) {
 440 #ifndef MPI_AMD64_ADD
 441                 MP_SUB_BORROW(r0, 1, r0, 0,     borrow);
 442                 MP_SUB_BORROW(r1, 1, r1, borrow, borrow);
 443                 MP_SUB_BORROW(r2,  0, r2, borrow, borrow);
 444 #else
 445                 __asm__ (
 446                         "subq   $1,%0           \n\t"
 447                         "sbbq   $1,%1           \n\t"
 448                         "sbbq   $0,%2           \n\t"
 449                         : "=r"(r0), "=r"(r1), "=r"(r2)
 450                         : "0" (r0), "1" (r1), "2" (r2)
 451                         : "%cc" );
 452 #endif
 453         }
 454
 455         MP_CHECKOK(s_mp_pad(r, 3));
 456         MP_DIGIT(r, 2) = r2;
 457         MP_DIGIT(r, 1) = r1;
 458         MP_DIGIT(r, 0) = r0;
 459         MP_SIGN(r) = MP_ZPOS;
 460         MP_USED(r) = 3;
 461         s_mp_clamp(r);
 462
 463   CLEANUP:
 464         return res;
 465 }
 466
 467 #endif
 468
 469 /* Compute the square of polynomial a, reduce modulo p192. Store the
 470  * result in r.  r could be a.  Uses optimized modular reduction for p192.
 471  */
 472 mp_err
 473 ec_GFp_nistp192_sqr(const mp_int *a, mp_int *r, const GFMethod *meth)
 474 {
 475         mp_err res = MP_OKAY;
 476
 477         MP_CHECKOK(mp_sqr(a, r));
 478         MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth));
 479   CLEANUP:
 480         return res;
 481 }
 482
 483 /* Compute the product of two polynomials a and b, reduce modulo p192.
 484  * Store the result in r.  r could be a or b; a could be b.  Uses
 485  * optimized modular reduction for p192. */
 486 mp_err
 487 ec_GFp_nistp192_mul(const mp_int *a, const mp_int *b, mp_int *r,
 488                                         const GFMethod *meth)
 489 {
 490         mp_err res = MP_OKAY;
 491
 492         MP_CHECKOK(mp_mul(a, b, r));
 493         MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth));
 494   CLEANUP:
 495         return res;
 496 }
 497
 498 /* Divides two field elements. If a is NULL, then returns the inverse of
 499  * b. */
 500 mp_err
 501 ec_GFp_nistp192_div(const mp_int *a, const mp_int *b, mp_int *r,
 502                    const GFMethod *meth)
 503 {
 504         mp_err res = MP_OKAY;
 505         mp_int t;
 506
 507         /* If a is NULL, then return the inverse of b, otherwise return a/b. */
 508         if (a == NULL) {
 509                 return  mp_invmod(b, &meth->irr, r);
 510         } else {
 511                 /* MPI doesn't support divmod, so we implement it using invmod and
 512                  * mulmod. */
 513                 MP_CHECKOK(mp_init(&t, FLAG(b)));
 514                 MP_CHECKOK(mp_invmod(b, &meth->irr, &t));
 515                 MP_CHECKOK(mp_mul(a, &t, r));
 516                 MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth));
 517           CLEANUP:
 518                 mp_clear(&t);
 519                 return res;
 520         }
 521 }
 522
 523 /* Wire in fast field arithmetic and precomputation of base point for
 524  * named curves. */
 525 mp_err
 526 ec_group_set_gfp192(ECGroup *group, ECCurveName name)
 527 {
 528         if (name == ECCurve_NIST_P192) {
 529                 group->meth->field_mod = &ec_GFp_nistp192_mod;
 530                 group->meth->field_mul = &ec_GFp_nistp192_mul;
 531                 group->meth->field_sqr = &ec_GFp_nistp192_sqr;
 532                 group->meth->field_div = &ec_GFp_nistp192_div;
 533 #ifndef ECL_THIRTY_TWO_BIT
 534                 group->meth->field_add = &ec_GFp_nistp192_add;
 535                 group->meth->field_sub = &ec_GFp_nistp192_sub;
 536 #endif
 537         }
 538         return MP_OKAY;
 539 }