1 /* $OpenBSD: ecp_nistp521.c,v 1.18 2017/01/29 17:49:23 beck Exp $ */
3 * Written by Adam Langley (Google) for the OpenSSL project
6 * Copyright (c) 2011 Google Inc.
8 * Permission to use, copy, modify, and distribute this software for any
9 * purpose with or without fee is hereby granted, provided that the above
10 * copyright notice and this permission notice appear in all copies.
12 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
13 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
14 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
15 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
16 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
17 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
18 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22 * A 64-bit implementation of the NIST P-521 elliptic curve point multiplication
24 * OpenSSL integration was taken from Emilia Kasper's work in ecp_nistp224.c.
25 * Otherwise based on Emilia's P224 work, which was inspired by my curve25519
26 * work which got its smarts from Daniel J. Bernstein's work on the same.
32 #include <openssl/opensslconf.h>
34 #ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
36 #include <openssl/err.h>
39 #if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
40 /* even with gcc, the typedef won't work for 32-bit platforms */
41 typedef __uint128_t uint128_t
; /* nonstandard; implemented by gcc on 64-bit platforms */
43 #error "Need GCC 3.1 or later to define type uint128_t"
50 /* The underlying field.
52 * P521 operates over GF(2^521-1). We can serialise an element of this field
53 * into 66 bytes where the most significant byte contains only a single bit. We
54 * call this an felem_bytearray. */
56 typedef u8 felem_bytearray
[66];
58 /* These are the parameters of P521, taken from FIPS 186-3, section D.1.2.5.
59 * These values are big-endian. */
60 static const felem_bytearray nistp521_curve_params
[5] =
62 {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* p */
63 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
64 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
65 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
66 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
67 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
68 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
69 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
71 {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* a = -3 */
72 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
73 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
74 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
75 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
76 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
77 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
78 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
80 {0x00, 0x51, 0x95, 0x3e, 0xb9, 0x61, 0x8e, 0x1c, /* b */
81 0x9a, 0x1f, 0x92, 0x9a, 0x21, 0xa0, 0xb6, 0x85,
82 0x40, 0xee, 0xa2, 0xda, 0x72, 0x5b, 0x99, 0xb3,
83 0x15, 0xf3, 0xb8, 0xb4, 0x89, 0x91, 0x8e, 0xf1,
84 0x09, 0xe1, 0x56, 0x19, 0x39, 0x51, 0xec, 0x7e,
85 0x93, 0x7b, 0x16, 0x52, 0xc0, 0xbd, 0x3b, 0xb1,
86 0xbf, 0x07, 0x35, 0x73, 0xdf, 0x88, 0x3d, 0x2c,
87 0x34, 0xf1, 0xef, 0x45, 0x1f, 0xd4, 0x6b, 0x50,
89 {0x00, 0xc6, 0x85, 0x8e, 0x06, 0xb7, 0x04, 0x04, /* x */
90 0xe9, 0xcd, 0x9e, 0x3e, 0xcb, 0x66, 0x23, 0x95,
91 0xb4, 0x42, 0x9c, 0x64, 0x81, 0x39, 0x05, 0x3f,
92 0xb5, 0x21, 0xf8, 0x28, 0xaf, 0x60, 0x6b, 0x4d,
93 0x3d, 0xba, 0xa1, 0x4b, 0x5e, 0x77, 0xef, 0xe7,
94 0x59, 0x28, 0xfe, 0x1d, 0xc1, 0x27, 0xa2, 0xff,
95 0xa8, 0xde, 0x33, 0x48, 0xb3, 0xc1, 0x85, 0x6a,
96 0x42, 0x9b, 0xf9, 0x7e, 0x7e, 0x31, 0xc2, 0xe5,
98 {0x01, 0x18, 0x39, 0x29, 0x6a, 0x78, 0x9a, 0x3b, /* y */
99 0xc0, 0x04, 0x5c, 0x8a, 0x5f, 0xb4, 0x2c, 0x7d,
100 0x1b, 0xd9, 0x98, 0xf5, 0x44, 0x49, 0x57, 0x9b,
101 0x44, 0x68, 0x17, 0xaf, 0xbd, 0x17, 0x27, 0x3e,
102 0x66, 0x2c, 0x97, 0xee, 0x72, 0x99, 0x5e, 0xf4,
103 0x26, 0x40, 0xc5, 0x50, 0xb9, 0x01, 0x3f, 0xad,
104 0x07, 0x61, 0x35, 0x3c, 0x70, 0x86, 0xa2, 0x72,
105 0xc2, 0x40, 0x88, 0xbe, 0x94, 0x76, 0x9f, 0xd1,
109 /* The representation of field elements.
110 * ------------------------------------
112 * We represent field elements with nine values. These values are either 64 or
113 * 128 bits and the field element represented is:
114 * v[0]*2^0 + v[1]*2^58 + v[2]*2^116 + ... + v[8]*2^464 (mod p)
115 * Each of the nine values is called a 'limb'. Since the limbs are spaced only
116 * 58 bits apart, but are greater than 58 bits in length, the most significant
117 * bits of each limb overlap with the least significant bits of the next.
119 * A field element with 64-bit limbs is an 'felem'. One with 128-bit limbs is a
124 typedef uint64_t limb
;
125 typedef limb felem
[NLIMBS
];
126 typedef uint128_t largefelem
[NLIMBS
];
128 static const limb bottom57bits
= 0x1ffffffffffffff;
129 static const limb bottom58bits
= 0x3ffffffffffffff;
131 /* bin66_to_felem takes a little-endian byte array and converts it into felem
132 * form. This assumes that the CPU is little-endian. */
134 bin66_to_felem(felem out
, const u8 in
[66])
136 out
[0] = (*((limb
*) & in
[0])) & bottom58bits
;
137 out
[1] = (*((limb
*) & in
[7]) >> 2) & bottom58bits
;
138 out
[2] = (*((limb
*) & in
[14]) >> 4) & bottom58bits
;
139 out
[3] = (*((limb
*) & in
[21]) >> 6) & bottom58bits
;
140 out
[4] = (*((limb
*) & in
[29])) & bottom58bits
;
141 out
[5] = (*((limb
*) & in
[36]) >> 2) & bottom58bits
;
142 out
[6] = (*((limb
*) & in
[43]) >> 4) & bottom58bits
;
143 out
[7] = (*((limb
*) & in
[50]) >> 6) & bottom58bits
;
144 out
[8] = (*((limb
*) & in
[58])) & bottom57bits
;
147 /* felem_to_bin66 takes an felem and serialises into a little endian, 66 byte
148 * array. This assumes that the CPU is little-endian. */
150 felem_to_bin66(u8 out
[66], const felem in
)
153 (*((limb
*) & out
[0])) = in
[0];
154 (*((limb
*) & out
[7])) |= in
[1] << 2;
155 (*((limb
*) & out
[14])) |= in
[2] << 4;
156 (*((limb
*) & out
[21])) |= in
[3] << 6;
157 (*((limb
*) & out
[29])) = in
[4];
158 (*((limb
*) & out
[36])) |= in
[5] << 2;
159 (*((limb
*) & out
[43])) |= in
[6] << 4;
160 (*((limb
*) & out
[50])) |= in
[7] << 6;
161 (*((limb
*) & out
[58])) = in
[8];
164 /* To preserve endianness when using BN_bn2bin and BN_bin2bn */
166 flip_endian(u8
* out
, const u8
* in
, unsigned len
)
169 for (i
= 0; i
< len
; ++i
)
170 out
[i
] = in
[len
- 1 - i
];
173 /* BN_to_felem converts an OpenSSL BIGNUM into an felem */
175 BN_to_felem(felem out
, const BIGNUM
* bn
)
177 felem_bytearray b_in
;
178 felem_bytearray b_out
;
181 /* BN_bn2bin eats leading zeroes */
182 memset(b_out
, 0, sizeof b_out
);
183 num_bytes
= BN_num_bytes(bn
);
184 if (num_bytes
> sizeof b_out
) {
185 ECerror(EC_R_BIGNUM_OUT_OF_RANGE
);
188 if (BN_is_negative(bn
)) {
189 ECerror(EC_R_BIGNUM_OUT_OF_RANGE
);
192 num_bytes
= BN_bn2bin(bn
, b_in
);
193 flip_endian(b_out
, b_in
, num_bytes
);
194 bin66_to_felem(out
, b_out
);
198 /* felem_to_BN converts an felem into an OpenSSL BIGNUM */
200 felem_to_BN(BIGNUM
* out
, const felem in
)
202 felem_bytearray b_in
, b_out
;
203 felem_to_bin66(b_in
, in
);
204 flip_endian(b_out
, b_in
, sizeof b_out
);
205 return BN_bin2bn(b_out
, sizeof b_out
, out
);
210 * ---------------- */
227 felem_assign(felem out
, const felem in
)
240 /* felem_sum64 sets out = out + in. */
242 felem_sum64(felem out
, const felem in
)
255 /* felem_scalar sets out = in * scalar */
257 felem_scalar(felem out
, const felem in
, limb scalar
)
259 out
[0] = in
[0] * scalar
;
260 out
[1] = in
[1] * scalar
;
261 out
[2] = in
[2] * scalar
;
262 out
[3] = in
[3] * scalar
;
263 out
[4] = in
[4] * scalar
;
264 out
[5] = in
[5] * scalar
;
265 out
[6] = in
[6] * scalar
;
266 out
[7] = in
[7] * scalar
;
267 out
[8] = in
[8] * scalar
;
270 /* felem_scalar64 sets out = out * scalar */
272 felem_scalar64(felem out
, limb scalar
)
285 /* felem_scalar128 sets out = out * scalar */
287 felem_scalar128(largefelem out
, limb scalar
)
300 /* felem_neg sets |out| to |-in|
302 * in[i] < 2^59 + 2^14
307 felem_neg(felem out
, const felem in
)
309 /* In order to prevent underflow, we subtract from 0 mod p. */
310 static const limb two62m3
= (((limb
) 1) << 62) - (((limb
) 1) << 5);
311 static const limb two62m2
= (((limb
) 1) << 62) - (((limb
) 1) << 4);
313 out
[0] = two62m3
- in
[0];
314 out
[1] = two62m2
- in
[1];
315 out
[2] = two62m2
- in
[2];
316 out
[3] = two62m2
- in
[3];
317 out
[4] = two62m2
- in
[4];
318 out
[5] = two62m2
- in
[5];
319 out
[6] = two62m2
- in
[6];
320 out
[7] = two62m2
- in
[7];
321 out
[8] = two62m2
- in
[8];
324 /* felem_diff64 subtracts |in| from |out|
326 * in[i] < 2^59 + 2^14
328 * out[i] < out[i] + 2^62
331 felem_diff64(felem out
, const felem in
)
333 /* In order to prevent underflow, we add 0 mod p before subtracting. */
334 static const limb two62m3
= (((limb
) 1) << 62) - (((limb
) 1) << 5);
335 static const limb two62m2
= (((limb
) 1) << 62) - (((limb
) 1) << 4);
337 out
[0] += two62m3
- in
[0];
338 out
[1] += two62m2
- in
[1];
339 out
[2] += two62m2
- in
[2];
340 out
[3] += two62m2
- in
[3];
341 out
[4] += two62m2
- in
[4];
342 out
[5] += two62m2
- in
[5];
343 out
[6] += two62m2
- in
[6];
344 out
[7] += two62m2
- in
[7];
345 out
[8] += two62m2
- in
[8];
348 /* felem_diff_128_64 subtracts |in| from |out|
350 * in[i] < 2^62 + 2^17
352 * out[i] < out[i] + 2^63
355 felem_diff_128_64(largefelem out
, const felem in
)
357 /* In order to prevent underflow, we add 0 mod p before subtracting. */
358 static const limb two63m6
= (((limb
) 1) << 62) - (((limb
) 1) << 5);
359 static const limb two63m5
= (((limb
) 1) << 62) - (((limb
) 1) << 4);
361 out
[0] += two63m6
- in
[0];
362 out
[1] += two63m5
- in
[1];
363 out
[2] += two63m5
- in
[2];
364 out
[3] += two63m5
- in
[3];
365 out
[4] += two63m5
- in
[4];
366 out
[5] += two63m5
- in
[5];
367 out
[6] += two63m5
- in
[6];
368 out
[7] += two63m5
- in
[7];
369 out
[8] += two63m5
- in
[8];
372 /* felem_diff_128_64 subtracts |in| from |out|
376 * out[i] < out[i] + 2^127 - 2^69
379 felem_diff128(largefelem out
, const largefelem in
)
381 /* In order to prevent underflow, we add 0 mod p before subtracting. */
382 static const uint128_t two127m70
= (((uint128_t
) 1) << 127) - (((uint128_t
) 1) << 70);
383 static const uint128_t two127m69
= (((uint128_t
) 1) << 127) - (((uint128_t
) 1) << 69);
385 out
[0] += (two127m70
- in
[0]);
386 out
[1] += (two127m69
- in
[1]);
387 out
[2] += (two127m69
- in
[2]);
388 out
[3] += (two127m69
- in
[3]);
389 out
[4] += (two127m69
- in
[4]);
390 out
[5] += (two127m69
- in
[5]);
391 out
[6] += (two127m69
- in
[6]);
392 out
[7] += (two127m69
- in
[7]);
393 out
[8] += (two127m69
- in
[8]);
396 /* felem_square sets |out| = |in|^2
400 * out[i] < 17 * max(in[i]) * max(in[i])
403 felem_square(largefelem out
, const felem in
)
406 felem_scalar(inx2
, in
, 2);
407 felem_scalar(inx4
, in
, 4);
410 * We have many cases were we want to do in[x] * in[y] + in[y] *
411 * in[x] This is obviously just 2 * in[x] * in[y] However, rather
412 * than do the doubling on the 128 bit result, we double one of the
413 * inputs to the multiplication by reading from |inx2|
416 out
[0] = ((uint128_t
) in
[0]) * in
[0];
417 out
[1] = ((uint128_t
) in
[0]) * inx2
[1];
418 out
[2] = ((uint128_t
) in
[0]) * inx2
[2] +
419 ((uint128_t
) in
[1]) * in
[1];
420 out
[3] = ((uint128_t
) in
[0]) * inx2
[3] +
421 ((uint128_t
) in
[1]) * inx2
[2];
422 out
[4] = ((uint128_t
) in
[0]) * inx2
[4] +
423 ((uint128_t
) in
[1]) * inx2
[3] +
424 ((uint128_t
) in
[2]) * in
[2];
425 out
[5] = ((uint128_t
) in
[0]) * inx2
[5] +
426 ((uint128_t
) in
[1]) * inx2
[4] +
427 ((uint128_t
) in
[2]) * inx2
[3];
428 out
[6] = ((uint128_t
) in
[0]) * inx2
[6] +
429 ((uint128_t
) in
[1]) * inx2
[5] +
430 ((uint128_t
) in
[2]) * inx2
[4] +
431 ((uint128_t
) in
[3]) * in
[3];
432 out
[7] = ((uint128_t
) in
[0]) * inx2
[7] +
433 ((uint128_t
) in
[1]) * inx2
[6] +
434 ((uint128_t
) in
[2]) * inx2
[5] +
435 ((uint128_t
) in
[3]) * inx2
[4];
436 out
[8] = ((uint128_t
) in
[0]) * inx2
[8] +
437 ((uint128_t
) in
[1]) * inx2
[7] +
438 ((uint128_t
) in
[2]) * inx2
[6] +
439 ((uint128_t
) in
[3]) * inx2
[5] +
440 ((uint128_t
) in
[4]) * in
[4];
443 * The remaining limbs fall above 2^521, with the first falling at
444 * 2^522. They correspond to locations one bit up from the limbs
445 * produced above so we would have to multiply by two to align them.
446 * Again, rather than operate on the 128-bit result, we double one of
447 * the inputs to the multiplication. If we want to double for both
448 * this reason, and the reason above, then we end up multiplying by
453 out
[0] += ((uint128_t
) in
[1]) * inx4
[8] +
454 ((uint128_t
) in
[2]) * inx4
[7] +
455 ((uint128_t
) in
[3]) * inx4
[6] +
456 ((uint128_t
) in
[4]) * inx4
[5];
459 out
[1] += ((uint128_t
) in
[2]) * inx4
[8] +
460 ((uint128_t
) in
[3]) * inx4
[7] +
461 ((uint128_t
) in
[4]) * inx4
[6] +
462 ((uint128_t
) in
[5]) * inx2
[5];
465 out
[2] += ((uint128_t
) in
[3]) * inx4
[8] +
466 ((uint128_t
) in
[4]) * inx4
[7] +
467 ((uint128_t
) in
[5]) * inx4
[6];
470 out
[3] += ((uint128_t
) in
[4]) * inx4
[8] +
471 ((uint128_t
) in
[5]) * inx4
[7] +
472 ((uint128_t
) in
[6]) * inx2
[6];
475 out
[4] += ((uint128_t
) in
[5]) * inx4
[8] +
476 ((uint128_t
) in
[6]) * inx4
[7];
479 out
[5] += ((uint128_t
) in
[6]) * inx4
[8] +
480 ((uint128_t
) in
[7]) * inx2
[7];
483 out
[6] += ((uint128_t
) in
[7]) * inx4
[8];
486 out
[7] += ((uint128_t
) in
[8]) * inx2
[8];
489 /* felem_mul sets |out| = |in1| * |in2|
494 * out[i] < 17 * max(in1[i]) * max(in2[i])
497 felem_mul(largefelem out
, const felem in1
, const felem in2
)
500 felem_scalar(in2x2
, in2
, 2);
502 out
[0] = ((uint128_t
) in1
[0]) * in2
[0];
504 out
[1] = ((uint128_t
) in1
[0]) * in2
[1] +
505 ((uint128_t
) in1
[1]) * in2
[0];
507 out
[2] = ((uint128_t
) in1
[0]) * in2
[2] +
508 ((uint128_t
) in1
[1]) * in2
[1] +
509 ((uint128_t
) in1
[2]) * in2
[0];
511 out
[3] = ((uint128_t
) in1
[0]) * in2
[3] +
512 ((uint128_t
) in1
[1]) * in2
[2] +
513 ((uint128_t
) in1
[2]) * in2
[1] +
514 ((uint128_t
) in1
[3]) * in2
[0];
516 out
[4] = ((uint128_t
) in1
[0]) * in2
[4] +
517 ((uint128_t
) in1
[1]) * in2
[3] +
518 ((uint128_t
) in1
[2]) * in2
[2] +
519 ((uint128_t
) in1
[3]) * in2
[1] +
520 ((uint128_t
) in1
[4]) * in2
[0];
522 out
[5] = ((uint128_t
) in1
[0]) * in2
[5] +
523 ((uint128_t
) in1
[1]) * in2
[4] +
524 ((uint128_t
) in1
[2]) * in2
[3] +
525 ((uint128_t
) in1
[3]) * in2
[2] +
526 ((uint128_t
) in1
[4]) * in2
[1] +
527 ((uint128_t
) in1
[5]) * in2
[0];
529 out
[6] = ((uint128_t
) in1
[0]) * in2
[6] +
530 ((uint128_t
) in1
[1]) * in2
[5] +
531 ((uint128_t
) in1
[2]) * in2
[4] +
532 ((uint128_t
) in1
[3]) * in2
[3] +
533 ((uint128_t
) in1
[4]) * in2
[2] +
534 ((uint128_t
) in1
[5]) * in2
[1] +
535 ((uint128_t
) in1
[6]) * in2
[0];
537 out
[7] = ((uint128_t
) in1
[0]) * in2
[7] +
538 ((uint128_t
) in1
[1]) * in2
[6] +
539 ((uint128_t
) in1
[2]) * in2
[5] +
540 ((uint128_t
) in1
[3]) * in2
[4] +
541 ((uint128_t
) in1
[4]) * in2
[3] +
542 ((uint128_t
) in1
[5]) * in2
[2] +
543 ((uint128_t
) in1
[6]) * in2
[1] +
544 ((uint128_t
) in1
[7]) * in2
[0];
546 out
[8] = ((uint128_t
) in1
[0]) * in2
[8] +
547 ((uint128_t
) in1
[1]) * in2
[7] +
548 ((uint128_t
) in1
[2]) * in2
[6] +
549 ((uint128_t
) in1
[3]) * in2
[5] +
550 ((uint128_t
) in1
[4]) * in2
[4] +
551 ((uint128_t
) in1
[5]) * in2
[3] +
552 ((uint128_t
) in1
[6]) * in2
[2] +
553 ((uint128_t
) in1
[7]) * in2
[1] +
554 ((uint128_t
) in1
[8]) * in2
[0];
556 /* See comment in felem_square about the use of in2x2 here */
558 out
[0] += ((uint128_t
) in1
[1]) * in2x2
[8] +
559 ((uint128_t
) in1
[2]) * in2x2
[7] +
560 ((uint128_t
) in1
[3]) * in2x2
[6] +
561 ((uint128_t
) in1
[4]) * in2x2
[5] +
562 ((uint128_t
) in1
[5]) * in2x2
[4] +
563 ((uint128_t
) in1
[6]) * in2x2
[3] +
564 ((uint128_t
) in1
[7]) * in2x2
[2] +
565 ((uint128_t
) in1
[8]) * in2x2
[1];
567 out
[1] += ((uint128_t
) in1
[2]) * in2x2
[8] +
568 ((uint128_t
) in1
[3]) * in2x2
[7] +
569 ((uint128_t
) in1
[4]) * in2x2
[6] +
570 ((uint128_t
) in1
[5]) * in2x2
[5] +
571 ((uint128_t
) in1
[6]) * in2x2
[4] +
572 ((uint128_t
) in1
[7]) * in2x2
[3] +
573 ((uint128_t
) in1
[8]) * in2x2
[2];
575 out
[2] += ((uint128_t
) in1
[3]) * in2x2
[8] +
576 ((uint128_t
) in1
[4]) * in2x2
[7] +
577 ((uint128_t
) in1
[5]) * in2x2
[6] +
578 ((uint128_t
) in1
[6]) * in2x2
[5] +
579 ((uint128_t
) in1
[7]) * in2x2
[4] +
580 ((uint128_t
) in1
[8]) * in2x2
[3];
582 out
[3] += ((uint128_t
) in1
[4]) * in2x2
[8] +
583 ((uint128_t
) in1
[5]) * in2x2
[7] +
584 ((uint128_t
) in1
[6]) * in2x2
[6] +
585 ((uint128_t
) in1
[7]) * in2x2
[5] +
586 ((uint128_t
) in1
[8]) * in2x2
[4];
588 out
[4] += ((uint128_t
) in1
[5]) * in2x2
[8] +
589 ((uint128_t
) in1
[6]) * in2x2
[7] +
590 ((uint128_t
) in1
[7]) * in2x2
[6] +
591 ((uint128_t
) in1
[8]) * in2x2
[5];
593 out
[5] += ((uint128_t
) in1
[6]) * in2x2
[8] +
594 ((uint128_t
) in1
[7]) * in2x2
[7] +
595 ((uint128_t
) in1
[8]) * in2x2
[6];
597 out
[6] += ((uint128_t
) in1
[7]) * in2x2
[8] +
598 ((uint128_t
) in1
[8]) * in2x2
[7];
600 out
[7] += ((uint128_t
) in1
[8]) * in2x2
[8];
603 static const limb bottom52bits
= 0xfffffffffffff;
605 /* felem_reduce converts a largefelem to an felem.
609 * out[i] < 2^59 + 2^14
612 felem_reduce(felem out
, const largefelem in
)
614 u64 overflow1
, overflow2
;
616 out
[0] = ((limb
) in
[0]) & bottom58bits
;
617 out
[1] = ((limb
) in
[1]) & bottom58bits
;
618 out
[2] = ((limb
) in
[2]) & bottom58bits
;
619 out
[3] = ((limb
) in
[3]) & bottom58bits
;
620 out
[4] = ((limb
) in
[4]) & bottom58bits
;
621 out
[5] = ((limb
) in
[5]) & bottom58bits
;
622 out
[6] = ((limb
) in
[6]) & bottom58bits
;
623 out
[7] = ((limb
) in
[7]) & bottom58bits
;
624 out
[8] = ((limb
) in
[8]) & bottom58bits
;
628 out
[1] += ((limb
) in
[0]) >> 58;
629 out
[1] += (((limb
) (in
[0] >> 64)) & bottom52bits
) << 6;
631 * out[1] < 2^58 + 2^6 + 2^58 = 2^59 + 2^6
633 out
[2] += ((limb
) (in
[0] >> 64)) >> 52;
635 out
[2] += ((limb
) in
[1]) >> 58;
636 out
[2] += (((limb
) (in
[1] >> 64)) & bottom52bits
) << 6;
637 out
[3] += ((limb
) (in
[1] >> 64)) >> 52;
639 out
[3] += ((limb
) in
[2]) >> 58;
640 out
[3] += (((limb
) (in
[2] >> 64)) & bottom52bits
) << 6;
641 out
[4] += ((limb
) (in
[2] >> 64)) >> 52;
643 out
[4] += ((limb
) in
[3]) >> 58;
644 out
[4] += (((limb
) (in
[3] >> 64)) & bottom52bits
) << 6;
645 out
[5] += ((limb
) (in
[3] >> 64)) >> 52;
647 out
[5] += ((limb
) in
[4]) >> 58;
648 out
[5] += (((limb
) (in
[4] >> 64)) & bottom52bits
) << 6;
649 out
[6] += ((limb
) (in
[4] >> 64)) >> 52;
651 out
[6] += ((limb
) in
[5]) >> 58;
652 out
[6] += (((limb
) (in
[5] >> 64)) & bottom52bits
) << 6;
653 out
[7] += ((limb
) (in
[5] >> 64)) >> 52;
655 out
[7] += ((limb
) in
[6]) >> 58;
656 out
[7] += (((limb
) (in
[6] >> 64)) & bottom52bits
) << 6;
657 out
[8] += ((limb
) (in
[6] >> 64)) >> 52;
659 out
[8] += ((limb
) in
[7]) >> 58;
660 out
[8] += (((limb
) (in
[7] >> 64)) & bottom52bits
) << 6;
662 * out[x > 1] < 2^58 + 2^6 + 2^58 + 2^12 < 2^59 + 2^13
664 overflow1
= ((limb
) (in
[7] >> 64)) >> 52;
666 overflow1
+= ((limb
) in
[8]) >> 58;
667 overflow1
+= (((limb
) (in
[8] >> 64)) & bottom52bits
) << 6;
668 overflow2
= ((limb
) (in
[8] >> 64)) >> 52;
670 overflow1
<<= 1; /* overflow1 < 2^13 + 2^7 + 2^59 */
671 overflow2
<<= 1; /* overflow2 < 2^13 */
673 out
[0] += overflow1
; /* out[0] < 2^60 */
674 out
[1] += overflow2
; /* out[1] < 2^59 + 2^6 + 2^13 */
676 out
[1] += out
[0] >> 58;
677 out
[0] &= bottom58bits
;
679 * out[0] < 2^58 out[1] < 2^59 + 2^6 + 2^13 + 2^2 < 2^59 + 2^14
684 felem_square_reduce(felem out
, const felem in
)
687 felem_square(tmp
, in
);
688 felem_reduce(out
, tmp
);
692 felem_mul_reduce(felem out
, const felem in1
, const felem in2
)
695 felem_mul(tmp
, in1
, in2
);
696 felem_reduce(out
, tmp
);
699 /* felem_inv calculates |out| = |in|^{-1}
701 * Based on Fermat's Little Theorem:
703 * a^{p-1} = 1 (mod p)
704 * a^{p-2} = a^{-1} (mod p)
707 felem_inv(felem out
, const felem in
)
709 felem ftmp
, ftmp2
, ftmp3
, ftmp4
;
713 felem_square(tmp
, in
);
714 felem_reduce(ftmp
, tmp
);/* 2^1 */
715 felem_mul(tmp
, in
, ftmp
);
716 felem_reduce(ftmp
, tmp
);/* 2^2 - 2^0 */
717 felem_assign(ftmp2
, ftmp
);
718 felem_square(tmp
, ftmp
);
719 felem_reduce(ftmp
, tmp
);/* 2^3 - 2^1 */
720 felem_mul(tmp
, in
, ftmp
);
721 felem_reduce(ftmp
, tmp
);/* 2^3 - 2^0 */
722 felem_square(tmp
, ftmp
);
723 felem_reduce(ftmp
, tmp
);/* 2^4 - 2^1 */
725 felem_square(tmp
, ftmp2
);
726 felem_reduce(ftmp3
, tmp
); /* 2^3 - 2^1 */
727 felem_square(tmp
, ftmp3
);
728 felem_reduce(ftmp3
, tmp
); /* 2^4 - 2^2 */
729 felem_mul(tmp
, ftmp3
, ftmp2
);
730 felem_reduce(ftmp3
, tmp
); /* 2^4 - 2^0 */
732 felem_assign(ftmp2
, ftmp3
);
733 felem_square(tmp
, ftmp3
);
734 felem_reduce(ftmp3
, tmp
); /* 2^5 - 2^1 */
735 felem_square(tmp
, ftmp3
);
736 felem_reduce(ftmp3
, tmp
); /* 2^6 - 2^2 */
737 felem_square(tmp
, ftmp3
);
738 felem_reduce(ftmp3
, tmp
); /* 2^7 - 2^3 */
739 felem_square(tmp
, ftmp3
);
740 felem_reduce(ftmp3
, tmp
); /* 2^8 - 2^4 */
741 felem_assign(ftmp4
, ftmp3
);
742 felem_mul(tmp
, ftmp3
, ftmp
);
743 felem_reduce(ftmp4
, tmp
); /* 2^8 - 2^1 */
744 felem_square(tmp
, ftmp4
);
745 felem_reduce(ftmp4
, tmp
); /* 2^9 - 2^2 */
746 felem_mul(tmp
, ftmp3
, ftmp2
);
747 felem_reduce(ftmp3
, tmp
); /* 2^8 - 2^0 */
748 felem_assign(ftmp2
, ftmp3
);
750 for (i
= 0; i
< 8; i
++) {
751 felem_square(tmp
, ftmp3
);
752 felem_reduce(ftmp3
, tmp
); /* 2^16 - 2^8 */
754 felem_mul(tmp
, ftmp3
, ftmp2
);
755 felem_reduce(ftmp3
, tmp
); /* 2^16 - 2^0 */
756 felem_assign(ftmp2
, ftmp3
);
758 for (i
= 0; i
< 16; i
++) {
759 felem_square(tmp
, ftmp3
);
760 felem_reduce(ftmp3
, tmp
); /* 2^32 - 2^16 */
762 felem_mul(tmp
, ftmp3
, ftmp2
);
763 felem_reduce(ftmp3
, tmp
); /* 2^32 - 2^0 */
764 felem_assign(ftmp2
, ftmp3
);
766 for (i
= 0; i
< 32; i
++) {
767 felem_square(tmp
, ftmp3
);
768 felem_reduce(ftmp3
, tmp
); /* 2^64 - 2^32 */
770 felem_mul(tmp
, ftmp3
, ftmp2
);
771 felem_reduce(ftmp3
, tmp
); /* 2^64 - 2^0 */
772 felem_assign(ftmp2
, ftmp3
);
774 for (i
= 0; i
< 64; i
++) {
775 felem_square(tmp
, ftmp3
);
776 felem_reduce(ftmp3
, tmp
); /* 2^128 - 2^64 */
778 felem_mul(tmp
, ftmp3
, ftmp2
);
779 felem_reduce(ftmp3
, tmp
); /* 2^128 - 2^0 */
780 felem_assign(ftmp2
, ftmp3
);
782 for (i
= 0; i
< 128; i
++) {
783 felem_square(tmp
, ftmp3
);
784 felem_reduce(ftmp3
, tmp
); /* 2^256 - 2^128 */
786 felem_mul(tmp
, ftmp3
, ftmp2
);
787 felem_reduce(ftmp3
, tmp
); /* 2^256 - 2^0 */
788 felem_assign(ftmp2
, ftmp3
);
790 for (i
= 0; i
< 256; i
++) {
791 felem_square(tmp
, ftmp3
);
792 felem_reduce(ftmp3
, tmp
); /* 2^512 - 2^256 */
794 felem_mul(tmp
, ftmp3
, ftmp2
);
795 felem_reduce(ftmp3
, tmp
); /* 2^512 - 2^0 */
797 for (i
= 0; i
< 9; i
++) {
798 felem_square(tmp
, ftmp3
);
799 felem_reduce(ftmp3
, tmp
); /* 2^521 - 2^9 */
801 felem_mul(tmp
, ftmp3
, ftmp4
);
802 felem_reduce(ftmp3
, tmp
); /* 2^512 - 2^2 */
803 felem_mul(tmp
, ftmp3
, in
);
804 felem_reduce(out
, tmp
); /* 2^512 - 3 */
807 /* This is 2^521-1, expressed as an felem */
808 static const felem kPrime
=
810 0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff,
811 0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff,
812 0x03ffffffffffffff, 0x03ffffffffffffff, 0x01ffffffffffffff
815 /* felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0
818 * in[i] < 2^59 + 2^14
821 felem_is_zero(const felem in
)
825 felem_assign(ftmp
, in
);
827 ftmp
[0] += ftmp
[8] >> 57;
828 ftmp
[8] &= bottom57bits
;
830 ftmp
[1] += ftmp
[0] >> 58;
831 ftmp
[0] &= bottom58bits
;
832 ftmp
[2] += ftmp
[1] >> 58;
833 ftmp
[1] &= bottom58bits
;
834 ftmp
[3] += ftmp
[2] >> 58;
835 ftmp
[2] &= bottom58bits
;
836 ftmp
[4] += ftmp
[3] >> 58;
837 ftmp
[3] &= bottom58bits
;
838 ftmp
[5] += ftmp
[4] >> 58;
839 ftmp
[4] &= bottom58bits
;
840 ftmp
[6] += ftmp
[5] >> 58;
841 ftmp
[5] &= bottom58bits
;
842 ftmp
[7] += ftmp
[6] >> 58;
843 ftmp
[6] &= bottom58bits
;
844 ftmp
[8] += ftmp
[7] >> 58;
845 ftmp
[7] &= bottom58bits
;
846 /* ftmp[8] < 2^57 + 4 */
849 * The ninth limb of 2*(2^521-1) is 0x03ffffffffffffff, which is
850 * greater than our bound for ftmp[8]. Therefore we only have to
851 * check if the zero is zero or 2^521-1.
867 * We know that ftmp[i] < 2^63, therefore the only way that the top
868 * bit can be set is if is_zero was 0 before the decrement.
870 is_zero
= ((s64
) is_zero
) >> 63;
872 is_p
= ftmp
[0] ^ kPrime
[0];
873 is_p
|= ftmp
[1] ^ kPrime
[1];
874 is_p
|= ftmp
[2] ^ kPrime
[2];
875 is_p
|= ftmp
[3] ^ kPrime
[3];
876 is_p
|= ftmp
[4] ^ kPrime
[4];
877 is_p
|= ftmp
[5] ^ kPrime
[5];
878 is_p
|= ftmp
[6] ^ kPrime
[6];
879 is_p
|= ftmp
[7] ^ kPrime
[7];
880 is_p
|= ftmp
[8] ^ kPrime
[8];
883 is_p
= ((s64
) is_p
) >> 63;
890 felem_is_zero_int(const felem in
)
892 return (int) (felem_is_zero(in
) & ((limb
) 1));
895 /* felem_contract converts |in| to its unique, minimal representation.
897 * in[i] < 2^59 + 2^14
900 felem_contract(felem out
, const felem in
)
902 limb is_p
, is_greater
, sign
;
903 static const limb two58
= ((limb
) 1) << 58;
905 felem_assign(out
, in
);
907 out
[0] += out
[8] >> 57;
908 out
[8] &= bottom57bits
;
910 out
[1] += out
[0] >> 58;
911 out
[0] &= bottom58bits
;
912 out
[2] += out
[1] >> 58;
913 out
[1] &= bottom58bits
;
914 out
[3] += out
[2] >> 58;
915 out
[2] &= bottom58bits
;
916 out
[4] += out
[3] >> 58;
917 out
[3] &= bottom58bits
;
918 out
[5] += out
[4] >> 58;
919 out
[4] &= bottom58bits
;
920 out
[6] += out
[5] >> 58;
921 out
[5] &= bottom58bits
;
922 out
[7] += out
[6] >> 58;
923 out
[6] &= bottom58bits
;
924 out
[8] += out
[7] >> 58;
925 out
[7] &= bottom58bits
;
926 /* out[8] < 2^57 + 4 */
929 * If the value is greater than 2^521-1 then we have to subtract
930 * 2^521-1 out. See the comments in felem_is_zero regarding why we
931 * don't test for other multiples of the prime.
935 * First, if |out| is equal to 2^521-1, we subtract it out to get
939 is_p
= out
[0] ^ kPrime
[0];
940 is_p
|= out
[1] ^ kPrime
[1];
941 is_p
|= out
[2] ^ kPrime
[2];
942 is_p
|= out
[3] ^ kPrime
[3];
943 is_p
|= out
[4] ^ kPrime
[4];
944 is_p
|= out
[5] ^ kPrime
[5];
945 is_p
|= out
[6] ^ kPrime
[6];
946 is_p
|= out
[7] ^ kPrime
[7];
947 is_p
|= out
[8] ^ kPrime
[8];
956 is_p
= ((s64
) is_p
) >> 63;
959 /* is_p is 0 iff |out| == 2^521-1 and all ones otherwise */
972 * In order to test that |out| >= 2^521-1 we need only test if out[8]
973 * >> 57 is greater than zero as (2^521-1) + x >= 2^522
975 is_greater
= out
[8] >> 57;
976 is_greater
|= is_greater
<< 32;
977 is_greater
|= is_greater
<< 16;
978 is_greater
|= is_greater
<< 8;
979 is_greater
|= is_greater
<< 4;
980 is_greater
|= is_greater
<< 2;
981 is_greater
|= is_greater
<< 1;
982 is_greater
= ((s64
) is_greater
) >> 63;
984 out
[0] -= kPrime
[0] & is_greater
;
985 out
[1] -= kPrime
[1] & is_greater
;
986 out
[2] -= kPrime
[2] & is_greater
;
987 out
[3] -= kPrime
[3] & is_greater
;
988 out
[4] -= kPrime
[4] & is_greater
;
989 out
[5] -= kPrime
[5] & is_greater
;
990 out
[6] -= kPrime
[6] & is_greater
;
991 out
[7] -= kPrime
[7] & is_greater
;
992 out
[8] -= kPrime
[8] & is_greater
;
994 /* Eliminate negative coefficients */
995 sign
= -(out
[0] >> 63);
996 out
[0] += (two58
& sign
);
997 out
[1] -= (1 & sign
);
998 sign
= -(out
[1] >> 63);
999 out
[1] += (two58
& sign
);
1000 out
[2] -= (1 & sign
);
1001 sign
= -(out
[2] >> 63);
1002 out
[2] += (two58
& sign
);
1003 out
[3] -= (1 & sign
);
1004 sign
= -(out
[3] >> 63);
1005 out
[3] += (two58
& sign
);
1006 out
[4] -= (1 & sign
);
1007 sign
= -(out
[4] >> 63);
1008 out
[4] += (two58
& sign
);
1009 out
[5] -= (1 & sign
);
1010 sign
= -(out
[0] >> 63);
1011 out
[5] += (two58
& sign
);
1012 out
[6] -= (1 & sign
);
1013 sign
= -(out
[6] >> 63);
1014 out
[6] += (two58
& sign
);
1015 out
[7] -= (1 & sign
);
1016 sign
= -(out
[7] >> 63);
1017 out
[7] += (two58
& sign
);
1018 out
[8] -= (1 & sign
);
1019 sign
= -(out
[5] >> 63);
1020 out
[5] += (two58
& sign
);
1021 out
[6] -= (1 & sign
);
1022 sign
= -(out
[6] >> 63);
1023 out
[6] += (two58
& sign
);
1024 out
[7] -= (1 & sign
);
1025 sign
= -(out
[7] >> 63);
1026 out
[7] += (two58
& sign
);
1027 out
[8] -= (1 & sign
);
1033 * Building on top of the field operations we have the operations on the
1034 * elliptic curve group itself. Points on the curve are represented in Jacobian
1037 /* point_double calcuates 2*(x_in, y_in, z_in)
1039 * The method is taken from:
1040 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
1042 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.
1043 * while x_out == y_in is not (maybe this works, but it's not tested). */
1045 point_double(felem x_out
, felem y_out
, felem z_out
,
1046 const felem x_in
, const felem y_in
, const felem z_in
)
1048 largefelem tmp
, tmp2
;
1049 felem delta
, gamma
, beta
, alpha
, ftmp
, ftmp2
;
1051 felem_assign(ftmp
, x_in
);
1052 felem_assign(ftmp2
, x_in
);
1055 felem_square(tmp
, z_in
);
1056 felem_reduce(delta
, tmp
); /* delta[i] < 2^59 + 2^14 */
1059 felem_square(tmp
, y_in
);
1060 felem_reduce(gamma
, tmp
); /* gamma[i] < 2^59 + 2^14 */
1062 /* beta = x*gamma */
1063 felem_mul(tmp
, x_in
, gamma
);
1064 felem_reduce(beta
, tmp
);/* beta[i] < 2^59 + 2^14 */
1066 /* alpha = 3*(x-delta)*(x+delta) */
1067 felem_diff64(ftmp
, delta
);
1068 /* ftmp[i] < 2^61 */
1069 felem_sum64(ftmp2
, delta
);
1070 /* ftmp2[i] < 2^60 + 2^15 */
1071 felem_scalar64(ftmp2
, 3);
1072 /* ftmp2[i] < 3*2^60 + 3*2^15 */
1073 felem_mul(tmp
, ftmp
, ftmp2
);
1075 * tmp[i] < 17(3*2^121 + 3*2^76) = 61*2^121 + 61*2^76 < 64*2^121 +
1076 * 64*2^76 = 2^127 + 2^82 < 2^128
1078 felem_reduce(alpha
, tmp
);
1080 /* x' = alpha^2 - 8*beta */
1081 felem_square(tmp
, alpha
);
1083 * tmp[i] < 17*2^120 < 2^125
1085 felem_assign(ftmp
, beta
);
1086 felem_scalar64(ftmp
, 8);
1087 /* ftmp[i] < 2^62 + 2^17 */
1088 felem_diff_128_64(tmp
, ftmp
);
1089 /* tmp[i] < 2^125 + 2^63 + 2^62 + 2^17 */
1090 felem_reduce(x_out
, tmp
);
1092 /* z' = (y + z)^2 - gamma - delta */
1093 felem_sum64(delta
, gamma
);
1094 /* delta[i] < 2^60 + 2^15 */
1095 felem_assign(ftmp
, y_in
);
1096 felem_sum64(ftmp
, z_in
);
1097 /* ftmp[i] < 2^60 + 2^15 */
1098 felem_square(tmp
, ftmp
);
1100 * tmp[i] < 17(2^122) < 2^127
1102 felem_diff_128_64(tmp
, delta
);
1103 /* tmp[i] < 2^127 + 2^63 */
1104 felem_reduce(z_out
, tmp
);
1106 /* y' = alpha*(4*beta - x') - 8*gamma^2 */
1107 felem_scalar64(beta
, 4);
1108 /* beta[i] < 2^61 + 2^16 */
1109 felem_diff64(beta
, x_out
);
1110 /* beta[i] < 2^61 + 2^60 + 2^16 */
1111 felem_mul(tmp
, alpha
, beta
);
1113 * tmp[i] < 17*((2^59 + 2^14)(2^61 + 2^60 + 2^16)) = 17*(2^120 + 2^75
1114 * + 2^119 + 2^74 + 2^75 + 2^30) = 17*(2^120 + 2^119 + 2^76 + 2^74 +
1117 felem_square(tmp2
, gamma
);
1119 * tmp2[i] < 17*(2^59 + 2^14)^2 = 17*(2^118 + 2^74 + 2^28)
1121 felem_scalar128(tmp2
, 8);
1123 * tmp2[i] < 8*17*(2^118 + 2^74 + 2^28) = 2^125 + 2^121 + 2^81 + 2^77
1124 * + 2^35 + 2^31 < 2^126
1126 felem_diff128(tmp
, tmp2
);
1128 * tmp[i] < 2^127 - 2^69 + 17(2^120 + 2^119 + 2^76 + 2^74 + 2^30) =
1129 * 2^127 + 2^124 + 2^122 + 2^120 + 2^118 + 2^80 + 2^78 + 2^76 + 2^74
1130 * + 2^69 + 2^34 + 2^30 < 2^128
1132 felem_reduce(y_out
, tmp
);
1135 /* copy_conditional copies in to out iff mask is all ones. */
1137 copy_conditional(felem out
, const felem in
, limb mask
)
1140 for (i
= 0; i
< NLIMBS
; ++i
) {
1141 const limb tmp
= mask
& (in
[i
] ^ out
[i
]);
1146 /* point_add calcuates (x1, y1, z1) + (x2, y2, z2)
1148 * The method is taken from
1149 * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
1150 * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
1152 * This function includes a branch for checking whether the two input points
1153 * are equal (while not equal to the point at infinity). This case never
1154 * happens during single point multiplication, so there is no timing leak for
1155 * ECDH or ECDSA signing. */
1157 point_add(felem x3
, felem y3
, felem z3
,
1158 const felem x1
, const felem y1
, const felem z1
,
1159 const int mixed
, const felem x2
, const felem y2
, const felem z2
)
1161 felem ftmp
, ftmp2
, ftmp3
, ftmp4
, ftmp5
, ftmp6
, x_out
, y_out
, z_out
;
1162 largefelem tmp
, tmp2
;
1163 limb x_equal
, y_equal
, z1_is_zero
, z2_is_zero
;
1165 z1_is_zero
= felem_is_zero(z1
);
1166 z2_is_zero
= felem_is_zero(z2
);
1168 /* ftmp = z1z1 = z1**2 */
1169 felem_square(tmp
, z1
);
1170 felem_reduce(ftmp
, tmp
);
1173 /* ftmp2 = z2z2 = z2**2 */
1174 felem_square(tmp
, z2
);
1175 felem_reduce(ftmp2
, tmp
);
1177 /* u1 = ftmp3 = x1*z2z2 */
1178 felem_mul(tmp
, x1
, ftmp2
);
1179 felem_reduce(ftmp3
, tmp
);
1181 /* ftmp5 = z1 + z2 */
1182 felem_assign(ftmp5
, z1
);
1183 felem_sum64(ftmp5
, z2
);
1184 /* ftmp5[i] < 2^61 */
1186 /* ftmp5 = (z1 + z2)**2 - z1z1 - z2z2 = 2*z1z2 */
1187 felem_square(tmp
, ftmp5
);
1188 /* tmp[i] < 17*2^122 */
1189 felem_diff_128_64(tmp
, ftmp
);
1190 /* tmp[i] < 17*2^122 + 2^63 */
1191 felem_diff_128_64(tmp
, ftmp2
);
1192 /* tmp[i] < 17*2^122 + 2^64 */
1193 felem_reduce(ftmp5
, tmp
);
1195 /* ftmp2 = z2 * z2z2 */
1196 felem_mul(tmp
, ftmp2
, z2
);
1197 felem_reduce(ftmp2
, tmp
);
1199 /* s1 = ftmp6 = y1 * z2**3 */
1200 felem_mul(tmp
, y1
, ftmp2
);
1201 felem_reduce(ftmp6
, tmp
);
1203 /* We'll assume z2 = 1 (special case z2 = 0 is handled later) */
1205 /* u1 = ftmp3 = x1*z2z2 */
1206 felem_assign(ftmp3
, x1
);
1208 /* ftmp5 = 2*z1z2 */
1209 felem_scalar(ftmp5
, z1
, 2);
1211 /* s1 = ftmp6 = y1 * z2**3 */
1212 felem_assign(ftmp6
, y1
);
1216 felem_mul(tmp
, x2
, ftmp
);
1217 /* tmp[i] < 17*2^120 */
1219 /* h = ftmp4 = u2 - u1 */
1220 felem_diff_128_64(tmp
, ftmp3
);
1221 /* tmp[i] < 17*2^120 + 2^63 */
1222 felem_reduce(ftmp4
, tmp
);
1224 x_equal
= felem_is_zero(ftmp4
);
1226 /* z_out = ftmp5 * h */
1227 felem_mul(tmp
, ftmp5
, ftmp4
);
1228 felem_reduce(z_out
, tmp
);
1230 /* ftmp = z1 * z1z1 */
1231 felem_mul(tmp
, ftmp
, z1
);
1232 felem_reduce(ftmp
, tmp
);
1234 /* s2 = tmp = y2 * z1**3 */
1235 felem_mul(tmp
, y2
, ftmp
);
1236 /* tmp[i] < 17*2^120 */
1238 /* r = ftmp5 = (s2 - s1)*2 */
1239 felem_diff_128_64(tmp
, ftmp6
);
1240 /* tmp[i] < 17*2^120 + 2^63 */
1241 felem_reduce(ftmp5
, tmp
);
1242 y_equal
= felem_is_zero(ftmp5
);
1243 felem_scalar64(ftmp5
, 2);
1244 /* ftmp5[i] < 2^61 */
1246 if (x_equal
&& y_equal
&& !z1_is_zero
&& !z2_is_zero
) {
1247 point_double(x3
, y3
, z3
, x1
, y1
, z1
);
1250 /* I = ftmp = (2h)**2 */
1251 felem_assign(ftmp
, ftmp4
);
1252 felem_scalar64(ftmp
, 2);
1253 /* ftmp[i] < 2^61 */
1254 felem_square(tmp
, ftmp
);
1255 /* tmp[i] < 17*2^122 */
1256 felem_reduce(ftmp
, tmp
);
1258 /* J = ftmp2 = h * I */
1259 felem_mul(tmp
, ftmp4
, ftmp
);
1260 felem_reduce(ftmp2
, tmp
);
1262 /* V = ftmp4 = U1 * I */
1263 felem_mul(tmp
, ftmp3
, ftmp
);
1264 felem_reduce(ftmp4
, tmp
);
1266 /* x_out = r**2 - J - 2V */
1267 felem_square(tmp
, ftmp5
);
1268 /* tmp[i] < 17*2^122 */
1269 felem_diff_128_64(tmp
, ftmp2
);
1270 /* tmp[i] < 17*2^122 + 2^63 */
1271 felem_assign(ftmp3
, ftmp4
);
1272 felem_scalar64(ftmp4
, 2);
1273 /* ftmp4[i] < 2^61 */
1274 felem_diff_128_64(tmp
, ftmp4
);
1275 /* tmp[i] < 17*2^122 + 2^64 */
1276 felem_reduce(x_out
, tmp
);
1278 /* y_out = r(V-x_out) - 2 * s1 * J */
1279 felem_diff64(ftmp3
, x_out
);
1281 * ftmp3[i] < 2^60 + 2^60 = 2^61
1283 felem_mul(tmp
, ftmp5
, ftmp3
);
1284 /* tmp[i] < 17*2^122 */
1285 felem_mul(tmp2
, ftmp6
, ftmp2
);
1286 /* tmp2[i] < 17*2^120 */
1287 felem_scalar128(tmp2
, 2);
1288 /* tmp2[i] < 17*2^121 */
1289 felem_diff128(tmp
, tmp2
);
1291 * tmp[i] < 2^127 - 2^69 + 17*2^122 = 2^126 - 2^122 - 2^6 - 2^2 - 1 <
1294 felem_reduce(y_out
, tmp
);
1296 copy_conditional(x_out
, x2
, z1_is_zero
);
1297 copy_conditional(x_out
, x1
, z2_is_zero
);
1298 copy_conditional(y_out
, y2
, z1_is_zero
);
1299 copy_conditional(y_out
, y1
, z2_is_zero
);
1300 copy_conditional(z_out
, z2
, z1_is_zero
);
1301 copy_conditional(z_out
, z1
, z2_is_zero
);
1302 felem_assign(x3
, x_out
);
1303 felem_assign(y3
, y_out
);
1304 felem_assign(z3
, z_out
);
1307 /* Base point pre computation
1308 * --------------------------
1310 * Two different sorts of precomputed tables are used in the following code.
1311 * Each contain various points on the curve, where each point is three field
1312 * elements (x, y, z).
1314 * For the base point table, z is usually 1 (0 for the point at infinity).
1315 * This table has 16 elements:
1316 * index | bits | point
1317 * ------+---------+------------------------------
1320 * 2 | 0 0 1 0 | 2^130G
1321 * 3 | 0 0 1 1 | (2^130 + 1)G
1322 * 4 | 0 1 0 0 | 2^260G
1323 * 5 | 0 1 0 1 | (2^260 + 1)G
1324 * 6 | 0 1 1 0 | (2^260 + 2^130)G
1325 * 7 | 0 1 1 1 | (2^260 + 2^130 + 1)G
1326 * 8 | 1 0 0 0 | 2^390G
1327 * 9 | 1 0 0 1 | (2^390 + 1)G
1328 * 10 | 1 0 1 0 | (2^390 + 2^130)G
1329 * 11 | 1 0 1 1 | (2^390 + 2^130 + 1)G
1330 * 12 | 1 1 0 0 | (2^390 + 2^260)G
1331 * 13 | 1 1 0 1 | (2^390 + 2^260 + 1)G
1332 * 14 | 1 1 1 0 | (2^390 + 2^260 + 2^130)G
1333 * 15 | 1 1 1 1 | (2^390 + 2^260 + 2^130 + 1)G
1335 * The reason for this is so that we can clock bits into four different
1336 * locations when doing simple scalar multiplies against the base point.
1338 * Tables for other points have table[i] = iG for i in 0 .. 16. */
1340 /* gmul is the table of precomputed base points */
1341 static const felem gmul
[16][3] =
1342 {{{0, 0, 0, 0, 0, 0, 0, 0, 0},
1343 {0, 0, 0, 0, 0, 0, 0, 0, 0},
1344 {0, 0, 0, 0, 0, 0, 0, 0, 0}},
1345 {{0x017e7e31c2e5bd66, 0x022cf0615a90a6fe, 0x00127a2ffa8de334,
1346 0x01dfbf9d64a3f877, 0x006b4d3dbaa14b5e, 0x014fed487e0a2bd8,
1347 0x015b4429c6481390, 0x03a73678fb2d988e, 0x00c6858e06b70404},
1348 {0x00be94769fd16650, 0x031c21a89cb09022, 0x039013fad0761353,
1349 0x02657bd099031542, 0x03273e662c97ee72, 0x01e6d11a05ebef45,
1350 0x03d1bd998f544495, 0x03001172297ed0b1, 0x011839296a789a3b},
1351 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1352 {{0x0373faacbc875bae, 0x00f325023721c671, 0x00f666fd3dbde5ad,
1353 0x01a6932363f88ea7, 0x01fc6d9e13f9c47b, 0x03bcbffc2bbf734e,
1354 0x013ee3c3647f3a92, 0x029409fefe75d07d, 0x00ef9199963d85e5},
1355 {0x011173743ad5b178, 0x02499c7c21bf7d46, 0x035beaeabb8b1a58,
1356 0x00f989c4752ea0a3, 0x0101e1de48a9c1a3, 0x01a20076be28ba6c,
1357 0x02f8052e5eb2de95, 0x01bfe8f82dea117c, 0x0160074d3c36ddb7},
1358 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1359 {{0x012f3fc373393b3b, 0x03d3d6172f1419fa, 0x02adc943c0b86873,
1360 0x00d475584177952b, 0x012a4d1673750ee2, 0x00512517a0f13b0c,
1361 0x02b184671a7b1734, 0x0315b84236f1a50a, 0x00a4afc472edbdb9},
1362 {0x00152a7077f385c4, 0x03044007d8d1c2ee, 0x0065829d61d52b52,
1363 0x00494ff6b6631d0d, 0x00a11d94d5f06bcf, 0x02d2f89474d9282e,
1364 0x0241c5727c06eeb9, 0x0386928710fbdb9d, 0x01f883f727b0dfbe},
1365 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1366 {{0x019b0c3c9185544d, 0x006243a37c9d97db, 0x02ee3cbe030a2ad2,
1367 0x00cfdd946bb51e0d, 0x0271c00932606b91, 0x03f817d1ec68c561,
1368 0x03f37009806a369c, 0x03c1f30baf184fd5, 0x01091022d6d2f065},
1369 {0x0292c583514c45ed, 0x0316fca51f9a286c, 0x00300af507c1489a,
1370 0x0295f69008298cf1, 0x02c0ed8274943d7b, 0x016509b9b47a431e,
1371 0x02bc9de9634868ce, 0x005b34929bffcb09, 0x000c1a0121681524},
1372 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1373 {{0x0286abc0292fb9f2, 0x02665eee9805b3f7, 0x01ed7455f17f26d6,
1374 0x0346355b83175d13, 0x006284944cd0a097, 0x0191895bcdec5e51,
1375 0x02e288370afda7d9, 0x03b22312bfefa67a, 0x01d104d3fc0613fe},
1376 {0x0092421a12f7e47f, 0x0077a83fa373c501, 0x03bd25c5f696bd0d,
1377 0x035c41e4d5459761, 0x01ca0d1742b24f53, 0x00aaab27863a509c,
1378 0x018b6de47df73917, 0x025c0b771705cd01, 0x01fd51d566d760a7},
1379 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1380 {{0x01dd92ff6b0d1dbd, 0x039c5e2e8f8afa69, 0x0261ed13242c3b27,
1381 0x0382c6e67026e6a0, 0x01d60b10be2089f9, 0x03c15f3dce86723f,
1382 0x03c764a32d2a062d, 0x017307eac0fad056, 0x018207c0b96c5256},
1383 {0x0196a16d60e13154, 0x03e6ce74c0267030, 0x00ddbf2b4e52a5aa,
1384 0x012738241bbf31c8, 0x00ebe8dc04685a28, 0x024c2ad6d380d4a2,
1385 0x035ee062a6e62d0e, 0x0029ed74af7d3a0f, 0x00eef32aec142ebd},
1386 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1387 {{0x00c31ec398993b39, 0x03a9f45bcda68253, 0x00ac733c24c70890,
1388 0x00872b111401ff01, 0x01d178c23195eafb, 0x03bca2c816b87f74,
1389 0x0261a9af46fbad7a, 0x0324b2a8dd3d28f9, 0x00918121d8f24e23},
1390 {0x032bc8c1ca983cd7, 0x00d869dfb08fc8c6, 0x01693cb61fce1516,
1391 0x012a5ea68f4e88a8, 0x010869cab88d7ae3, 0x009081ad277ceee1,
1392 0x033a77166d064cdc, 0x03955235a1fb3a95, 0x01251a4a9b25b65e},
1393 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1394 {{0x00148a3a1b27f40b, 0x0123186df1b31fdc, 0x00026e7beaad34ce,
1395 0x01db446ac1d3dbba, 0x0299c1a33437eaec, 0x024540610183cbb7,
1396 0x0173bb0e9ce92e46, 0x02b937e43921214b, 0x01ab0436a9bf01b5},
1397 {0x0383381640d46948, 0x008dacbf0e7f330f, 0x03602122bcc3f318,
1398 0x01ee596b200620d6, 0x03bd0585fda430b3, 0x014aed77fd123a83,
1399 0x005ace749e52f742, 0x0390fe041da2b842, 0x0189a8ceb3299242},
1400 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1401 {{0x012a19d6b3282473, 0x00c0915918b423ce, 0x023a954eb94405ae,
1402 0x00529f692be26158, 0x0289fa1b6fa4b2aa, 0x0198ae4ceea346ef,
1403 0x0047d8cdfbdedd49, 0x00cc8c8953f0f6b8, 0x001424abbff49203},
1404 {0x0256732a1115a03a, 0x0351bc38665c6733, 0x03f7b950fb4a6447,
1405 0x000afffa94c22155, 0x025763d0a4dab540, 0x000511e92d4fc283,
1406 0x030a7e9eda0ee96c, 0x004c3cd93a28bf0a, 0x017edb3a8719217f},
1407 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1408 {{0x011de5675a88e673, 0x031d7d0f5e567fbe, 0x0016b2062c970ae5,
1409 0x03f4a2be49d90aa7, 0x03cef0bd13822866, 0x03f0923dcf774a6c,
1410 0x0284bebc4f322f72, 0x016ab2645302bb2c, 0x01793f95dace0e2a},
1411 {0x010646e13527a28f, 0x01ca1babd59dc5e7, 0x01afedfd9a5595df,
1412 0x01f15785212ea6b1, 0x0324e5d64f6ae3f4, 0x02d680f526d00645,
1413 0x0127920fadf627a7, 0x03b383f75df4f684, 0x0089e0057e783b0a},
1414 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1415 {{0x00f334b9eb3c26c6, 0x0298fdaa98568dce, 0x01c2d24843a82292,
1416 0x020bcb24fa1b0711, 0x02cbdb3d2b1875e6, 0x0014907598f89422,
1417 0x03abe3aa43b26664, 0x02cbf47f720bc168, 0x0133b5e73014b79b},
1418 {0x034aab5dab05779d, 0x00cdc5d71fee9abb, 0x0399f16bd4bd9d30,
1419 0x03582fa592d82647, 0x02be1cdfb775b0e9, 0x0034f7cea32e94cb,
1420 0x0335a7f08f56f286, 0x03b707e9565d1c8b, 0x0015c946ea5b614f},
1421 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1422 {{0x024676f6cff72255, 0x00d14625cac96378, 0x00532b6008bc3767,
1423 0x01fc16721b985322, 0x023355ea1b091668, 0x029de7afdc0317c3,
1424 0x02fc8a7ca2da037c, 0x02de1217d74a6f30, 0x013f7173175b73bf},
1425 {0x0344913f441490b5, 0x0200f9e272b61eca, 0x0258a246b1dd55d2,
1426 0x03753db9ea496f36, 0x025e02937a09c5ef, 0x030cbd3d14012692,
1427 0x01793a67e70dc72a, 0x03ec1d37048a662e, 0x006550f700c32a8d},
1428 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1429 {{0x00d3f48a347eba27, 0x008e636649b61bd8, 0x00d3b93716778fb3,
1430 0x004d1915757bd209, 0x019d5311a3da44e0, 0x016d1afcbbe6aade,
1431 0x0241bf5f73265616, 0x0384672e5d50d39b, 0x005009fee522b684},
1432 {0x029b4fab064435fe, 0x018868ee095bbb07, 0x01ea3d6936cc92b8,
1433 0x000608b00f78a2f3, 0x02db911073d1c20f, 0x018205938470100a,
1434 0x01f1e4964cbe6ff2, 0x021a19a29eed4663, 0x01414485f42afa81},
1435 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1436 {{0x01612b3a17f63e34, 0x03813992885428e6, 0x022b3c215b5a9608,
1437 0x029b4057e19f2fcb, 0x0384059a587af7e6, 0x02d6400ace6fe610,
1438 0x029354d896e8e331, 0x00c047ee6dfba65e, 0x0037720542e9d49d},
1439 {0x02ce9eed7c5e9278, 0x0374ed703e79643b, 0x01316c54c4072006,
1440 0x005aaa09054b2ee8, 0x002824000c840d57, 0x03d4eba24771ed86,
1441 0x0189c50aabc3bdae, 0x0338c01541e15510, 0x00466d56e38eed42},
1442 {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1443 {{0x007efd8330ad8bd6, 0x02465ed48047710b, 0x0034c6606b215e0c,
1444 0x016ae30c53cbf839, 0x01fa17bd37161216, 0x018ead4e61ce8ab9,
1445 0x005482ed5f5dee46, 0x037543755bba1d7f, 0x005e5ac7e70a9d0f},
1446 {0x0117e1bb2fdcb2a2, 0x03deea36249f40c4, 0x028d09b4a6246cb7,
1447 0x03524b8855bcf756, 0x023d7d109d5ceb58, 0x0178e43e3223ef9c,
1448 0x0154536a0c6e966a, 0x037964d1286ee9fe, 0x0199bcd90e125055},
1449 {1, 0, 0, 0, 0, 0, 0, 0, 0}}};
1451 /* select_point selects the |idx|th point from a precomputation table and
1452 * copies it to out. */
1454 select_point(const limb idx
, unsigned int size
, const felem pre_comp
[ /* size */ ][3],
1458 limb
*outlimbs
= &out
[0][0];
1459 memset(outlimbs
, 0, 3 * sizeof(felem
));
1461 for (i
= 0; i
< size
; i
++) {
1462 const limb
*inlimbs
= &pre_comp
[i
][0][0];
1463 limb mask
= i
^ idx
;
1469 for (j
= 0; j
< NLIMBS
* 3; j
++)
1470 outlimbs
[j
] |= inlimbs
[j
] & mask
;
1474 /* get_bit returns the |i|th bit in |in| */
1476 get_bit(const felem_bytearray in
, int i
)
1480 return (in
[i
>> 3] >> (i
& 7)) & 1;
1483 /* Interleaved point multiplication using precomputed point multiples:
1484 * The small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[],
1485 * the scalars in scalars[]. If g_scalar is non-NULL, we also add this multiple
1486 * of the generator, using certain (large) precomputed multiples in g_pre_comp.
1487 * Output point (X, Y, Z) is stored in x_out, y_out, z_out */
1489 batch_mul(felem x_out
, felem y_out
, felem z_out
,
1490 const felem_bytearray scalars
[], const unsigned num_points
, const u8
* g_scalar
,
1491 const int mixed
, const felem pre_comp
[][17][3], const felem g_pre_comp
[16][3])
1494 unsigned num
, gen_mul
= (g_scalar
!= NULL
);
1495 felem nq
[3], tmp
[4];
1499 /* set nq to the point at infinity */
1500 memset(nq
, 0, 3 * sizeof(felem
));
1503 * Loop over all scalars msb-to-lsb, interleaving additions of
1504 * multiples of the generator (last quarter of rounds) and additions
1505 * of other points multiples (every 5th round).
1507 skip
= 1; /* save two point operations in the first
1509 for (i
= (num_points
? 520 : 130); i
>= 0; --i
) {
1512 point_double(nq
[0], nq
[1], nq
[2], nq
[0], nq
[1], nq
[2]);
1514 /* add multiples of the generator */
1515 if (gen_mul
&& (i
<= 130)) {
1516 bits
= get_bit(g_scalar
, i
+ 390) << 3;
1518 bits
|= get_bit(g_scalar
, i
+ 260) << 2;
1519 bits
|= get_bit(g_scalar
, i
+ 130) << 1;
1520 bits
|= get_bit(g_scalar
, i
);
1522 /* select the point to add, in constant time */
1523 select_point(bits
, 16, g_pre_comp
, tmp
);
1525 point_add(nq
[0], nq
[1], nq
[2],
1526 nq
[0], nq
[1], nq
[2],
1527 1 /* mixed */ , tmp
[0], tmp
[1], tmp
[2]);
1529 memcpy(nq
, tmp
, 3 * sizeof(felem
));
1533 /* do other additions every 5 doublings */
1534 if (num_points
&& (i
% 5 == 0)) {
1535 /* loop over all scalars */
1536 for (num
= 0; num
< num_points
; ++num
) {
1537 bits
= get_bit(scalars
[num
], i
+ 4) << 5;
1538 bits
|= get_bit(scalars
[num
], i
+ 3) << 4;
1539 bits
|= get_bit(scalars
[num
], i
+ 2) << 3;
1540 bits
|= get_bit(scalars
[num
], i
+ 1) << 2;
1541 bits
|= get_bit(scalars
[num
], i
) << 1;
1542 bits
|= get_bit(scalars
[num
], i
- 1);
1543 ec_GFp_nistp_recode_scalar_bits(&sign
, &digit
, bits
);
1546 * select the point to add or subtract, in
1549 select_point(digit
, 17, pre_comp
[num
], tmp
);
1550 felem_neg(tmp
[3], tmp
[1]); /* (X, -Y, Z) is the
1552 copy_conditional(tmp
[1], tmp
[3], (-(limb
) sign
));
1555 point_add(nq
[0], nq
[1], nq
[2],
1556 nq
[0], nq
[1], nq
[2],
1557 mixed
, tmp
[0], tmp
[1], tmp
[2]);
1559 memcpy(nq
, tmp
, 3 * sizeof(felem
));
1565 felem_assign(x_out
, nq
[0]);
1566 felem_assign(y_out
, nq
[1]);
1567 felem_assign(z_out
, nq
[2]);
1571 /* Precomputation for the group generator. */
1573 felem g_pre_comp
[16][3];
1575 } NISTP521_PRE_COMP
;
1578 EC_GFp_nistp521_method(void)
1580 static const EC_METHOD ret
= {
1581 .flags
= EC_FLAGS_DEFAULT_OCT
,
1582 .field_type
= NID_X9_62_prime_field
,
1583 .group_init
= ec_GFp_nistp521_group_init
,
1584 .group_finish
= ec_GFp_simple_group_finish
,
1585 .group_clear_finish
= ec_GFp_simple_group_clear_finish
,
1586 .group_copy
= ec_GFp_nist_group_copy
,
1587 .group_set_curve
= ec_GFp_nistp521_group_set_curve
,
1588 .group_get_curve
= ec_GFp_simple_group_get_curve
,
1589 .group_get_degree
= ec_GFp_simple_group_get_degree
,
1590 .group_check_discriminant
=
1591 ec_GFp_simple_group_check_discriminant
,
1592 .point_init
= ec_GFp_simple_point_init
,
1593 .point_finish
= ec_GFp_simple_point_finish
,
1594 .point_clear_finish
= ec_GFp_simple_point_clear_finish
,
1595 .point_copy
= ec_GFp_simple_point_copy
,
1596 .point_set_to_infinity
= ec_GFp_simple_point_set_to_infinity
,
1597 .point_set_Jprojective_coordinates_GFp
=
1598 ec_GFp_simple_set_Jprojective_coordinates_GFp
,
1599 .point_get_Jprojective_coordinates_GFp
=
1600 ec_GFp_simple_get_Jprojective_coordinates_GFp
,
1601 .point_set_affine_coordinates
=
1602 ec_GFp_simple_point_set_affine_coordinates
,
1603 .point_get_affine_coordinates
=
1604 ec_GFp_nistp521_point_get_affine_coordinates
,
1605 .add
= ec_GFp_simple_add
,
1606 .dbl
= ec_GFp_simple_dbl
,
1607 .invert
= ec_GFp_simple_invert
,
1608 .is_at_infinity
= ec_GFp_simple_is_at_infinity
,
1609 .is_on_curve
= ec_GFp_simple_is_on_curve
,
1610 .point_cmp
= ec_GFp_simple_cmp
,
1611 .make_affine
= ec_GFp_simple_make_affine
,
1612 .points_make_affine
= ec_GFp_simple_points_make_affine
,
1613 .mul
= ec_GFp_nistp521_points_mul
,
1614 .precompute_mult
= ec_GFp_nistp521_precompute_mult
,
1615 .have_precompute_mult
= ec_GFp_nistp521_have_precompute_mult
,
1616 .field_mul
= ec_GFp_nist_field_mul
,
1617 .field_sqr
= ec_GFp_nist_field_sqr
1624 /******************************************************************************/
1625 /* FUNCTIONS TO MANAGE PRECOMPUTATION
1628 static NISTP521_PRE_COMP
*
1629 nistp521_pre_comp_new()
1631 NISTP521_PRE_COMP
*ret
= NULL
;
1632 ret
= malloc(sizeof(NISTP521_PRE_COMP
));
1634 ECerror(ERR_R_MALLOC_FAILURE
);
1637 memset(ret
->g_pre_comp
, 0, sizeof(ret
->g_pre_comp
));
1638 ret
->references
= 1;
1643 nistp521_pre_comp_dup(void *src_
)
1645 NISTP521_PRE_COMP
*src
= src_
;
1647 /* no need to actually copy, these objects never change! */
1648 CRYPTO_add(&src
->references
, 1, CRYPTO_LOCK_EC_PRE_COMP
);
1654 nistp521_pre_comp_free(void *pre_
)
1657 NISTP521_PRE_COMP
*pre
= pre_
;
1662 i
= CRYPTO_add(&pre
->references
, -1, CRYPTO_LOCK_EC_PRE_COMP
);
1670 nistp521_pre_comp_clear_free(void *pre_
)
1673 NISTP521_PRE_COMP
*pre
= pre_
;
1678 i
= CRYPTO_add(&pre
->references
, -1, CRYPTO_LOCK_EC_PRE_COMP
);
1682 explicit_bzero(pre
, sizeof(*pre
));
1686 /******************************************************************************/
1687 /* OPENSSL EC_METHOD FUNCTIONS
1691 ec_GFp_nistp521_group_init(EC_GROUP
* group
)
1694 ret
= ec_GFp_simple_group_init(group
);
1695 group
->a_is_minus3
= 1;
1700 ec_GFp_nistp521_group_set_curve(EC_GROUP
* group
, const BIGNUM
* p
,
1701 const BIGNUM
* a
, const BIGNUM
* b
, BN_CTX
* ctx
)
1704 BN_CTX
*new_ctx
= NULL
;
1705 BIGNUM
*curve_p
, *curve_a
, *curve_b
;
1708 if ((ctx
= new_ctx
= BN_CTX_new()) == NULL
)
1711 if (((curve_p
= BN_CTX_get(ctx
)) == NULL
) ||
1712 ((curve_a
= BN_CTX_get(ctx
)) == NULL
) ||
1713 ((curve_b
= BN_CTX_get(ctx
)) == NULL
))
1715 BN_bin2bn(nistp521_curve_params
[0], sizeof(felem_bytearray
), curve_p
);
1716 BN_bin2bn(nistp521_curve_params
[1], sizeof(felem_bytearray
), curve_a
);
1717 BN_bin2bn(nistp521_curve_params
[2], sizeof(felem_bytearray
), curve_b
);
1718 if ((BN_cmp(curve_p
, p
)) || (BN_cmp(curve_a
, a
)) ||
1719 (BN_cmp(curve_b
, b
))) {
1720 ECerror(EC_R_WRONG_CURVE_PARAMETERS
);
1723 group
->field_mod_func
= BN_nist_mod_521
;
1724 ret
= ec_GFp_simple_group_set_curve(group
, p
, a
, b
, ctx
);
1727 BN_CTX_free(new_ctx
);
1731 /* Takes the Jacobian coordinates (X, Y, Z) of a point and returns
1732 * (X', Y') = (X/Z^2, Y/Z^3) */
1734 ec_GFp_nistp521_point_get_affine_coordinates(const EC_GROUP
* group
,
1735 const EC_POINT
* point
, BIGNUM
* x
, BIGNUM
* y
, BN_CTX
* ctx
)
1737 felem z1
, z2
, x_in
, y_in
, x_out
, y_out
;
1740 if (EC_POINT_is_at_infinity(group
, point
) > 0) {
1741 ECerror(EC_R_POINT_AT_INFINITY
);
1744 if ((!BN_to_felem(x_in
, &point
->X
)) || (!BN_to_felem(y_in
, &point
->Y
)) ||
1745 (!BN_to_felem(z1
, &point
->Z
)))
1748 felem_square(tmp
, z2
);
1749 felem_reduce(z1
, tmp
);
1750 felem_mul(tmp
, x_in
, z1
);
1751 felem_reduce(x_in
, tmp
);
1752 felem_contract(x_out
, x_in
);
1754 if (!felem_to_BN(x
, x_out
)) {
1755 ECerror(ERR_R_BN_LIB
);
1759 felem_mul(tmp
, z1
, z2
);
1760 felem_reduce(z1
, tmp
);
1761 felem_mul(tmp
, y_in
, z1
);
1762 felem_reduce(y_in
, tmp
);
1763 felem_contract(y_out
, y_in
);
1765 if (!felem_to_BN(y
, y_out
)) {
1766 ECerror(ERR_R_BN_LIB
);
1774 make_points_affine(size_t num
, felem points
[ /* num */ ][3], felem tmp_felems
[ /* num+1 */ ])
1777 * Runs in constant time, unless an input is the point at infinity
1778 * (which normally shouldn't happen).
1780 ec_GFp_nistp_points_make_affine_internal(
1785 (void (*) (void *)) felem_one
,
1786 (int (*) (const void *)) felem_is_zero_int
,
1787 (void (*) (void *, const void *)) felem_assign
,
1788 (void (*) (void *, const void *)) felem_square_reduce
,
1789 (void (*) (void *, const void *, const void *)) felem_mul_reduce
,
1790 (void (*) (void *, const void *)) felem_inv
,
1791 (void (*) (void *, const void *)) felem_contract
);
1794 /* Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL values
1795 * Result is stored in r (r can equal one of the inputs). */
1797 ec_GFp_nistp521_points_mul(const EC_GROUP
* group
, EC_POINT
* r
,
1798 const BIGNUM
* scalar
, size_t num
, const EC_POINT
* points
[],
1799 const BIGNUM
* scalars
[], BN_CTX
* ctx
)
1804 BN_CTX
*new_ctx
= NULL
;
1805 BIGNUM
*x
, *y
, *z
, *tmp_scalar
;
1806 felem_bytearray g_secret
;
1807 felem_bytearray
*secrets
= NULL
;
1808 felem(*pre_comp
)[17][3] = NULL
;
1809 felem
*tmp_felems
= NULL
;
1810 felem_bytearray tmp
;
1811 unsigned i
, num_bytes
;
1812 int have_pre_comp
= 0;
1813 size_t num_points
= num
;
1814 felem x_in
, y_in
, z_in
, x_out
, y_out
, z_out
;
1815 NISTP521_PRE_COMP
*pre
= NULL
;
1816 felem(*g_pre_comp
)[3] = NULL
;
1817 EC_POINT
*generator
= NULL
;
1818 const EC_POINT
*p
= NULL
;
1819 const BIGNUM
*p_scalar
= NULL
;
1822 if ((ctx
= new_ctx
= BN_CTX_new()) == NULL
)
1825 if (((x
= BN_CTX_get(ctx
)) == NULL
) ||
1826 ((y
= BN_CTX_get(ctx
)) == NULL
) ||
1827 ((z
= BN_CTX_get(ctx
)) == NULL
) ||
1828 ((tmp_scalar
= BN_CTX_get(ctx
)) == NULL
))
1831 if (scalar
!= NULL
) {
1832 pre
= EC_EX_DATA_get_data(group
->extra_data
,
1833 nistp521_pre_comp_dup
, nistp521_pre_comp_free
,
1834 nistp521_pre_comp_clear_free
);
1836 /* we have precomputation, try to use it */
1837 g_pre_comp
= &pre
->g_pre_comp
[0];
1839 /* try to use the standard precomputation */
1840 g_pre_comp
= (felem(*)[3]) gmul
;
1841 generator
= EC_POINT_new(group
);
1842 if (generator
== NULL
)
1844 /* get the generator from precomputation */
1845 if (!felem_to_BN(x
, g_pre_comp
[1][0]) ||
1846 !felem_to_BN(y
, g_pre_comp
[1][1]) ||
1847 !felem_to_BN(z
, g_pre_comp
[1][2])) {
1848 ECerror(ERR_R_BN_LIB
);
1851 if (!EC_POINT_set_Jprojective_coordinates_GFp(group
,
1852 generator
, x
, y
, z
, ctx
))
1854 if (0 == EC_POINT_cmp(group
, generator
, group
->generator
, ctx
))
1855 /* precomputation matches generator */
1859 * we don't have valid precomputation: treat the
1860 * generator as a random point
1864 if (num_points
> 0) {
1865 if (num_points
>= 2) {
1867 * unless we precompute multiples for just one point,
1868 * converting those into affine form is time well
1873 secrets
= calloc(num_points
, sizeof(felem_bytearray
));
1874 pre_comp
= calloc(num_points
, 17 * 3 * sizeof(felem
));
1876 /* XXX should do more int overflow checking */
1877 tmp_felems
= reallocarray(NULL
,
1878 (num_points
* 17 + 1), sizeof(felem
));
1880 if ((secrets
== NULL
) || (pre_comp
== NULL
) || (mixed
&& (tmp_felems
== NULL
))) {
1881 ECerror(ERR_R_MALLOC_FAILURE
);
1885 * we treat NULL scalars as 0, and NULL points as points at
1886 * infinity, i.e., they contribute nothing to the linear
1889 for (i
= 0; i
< num_points
; ++i
) {
1892 * we didn't have a valid precomputation, so
1893 * we pick the generator
1896 p
= EC_GROUP_get0_generator(group
);
1899 /* the i^th point */
1902 p_scalar
= scalars
[i
];
1904 if ((p_scalar
!= NULL
) && (p
!= NULL
)) {
1905 /* reduce scalar to 0 <= scalar < 2^521 */
1906 if ((BN_num_bits(p_scalar
) > 521) || (BN_is_negative(p_scalar
))) {
1908 * this is an unusual input, and we
1909 * don't guarantee constant-timeness
1911 if (!BN_nnmod(tmp_scalar
, p_scalar
, &group
->order
, ctx
)) {
1912 ECerror(ERR_R_BN_LIB
);
1915 num_bytes
= BN_bn2bin(tmp_scalar
, tmp
);
1917 num_bytes
= BN_bn2bin(p_scalar
, tmp
);
1918 flip_endian(secrets
[i
], tmp
, num_bytes
);
1919 /* precompute multiples */
1920 if ((!BN_to_felem(x_out
, &p
->X
)) ||
1921 (!BN_to_felem(y_out
, &p
->Y
)) ||
1922 (!BN_to_felem(z_out
, &p
->Z
)))
1924 memcpy(pre_comp
[i
][1][0], x_out
, sizeof(felem
));
1925 memcpy(pre_comp
[i
][1][1], y_out
, sizeof(felem
));
1926 memcpy(pre_comp
[i
][1][2], z_out
, sizeof(felem
));
1927 for (j
= 2; j
<= 16; ++j
) {
1930 pre_comp
[i
][j
][0], pre_comp
[i
][j
][1], pre_comp
[i
][j
][2],
1931 pre_comp
[i
][1][0], pre_comp
[i
][1][1], pre_comp
[i
][1][2],
1932 0, pre_comp
[i
][j
- 1][0], pre_comp
[i
][j
- 1][1], pre_comp
[i
][j
- 1][2]);
1935 pre_comp
[i
][j
][0], pre_comp
[i
][j
][1], pre_comp
[i
][j
][2],
1936 pre_comp
[i
][j
/ 2][0], pre_comp
[i
][j
/ 2][1], pre_comp
[i
][j
/ 2][2]);
1942 make_points_affine(num_points
* 17, pre_comp
[0], tmp_felems
);
1944 /* the scalar for the generator */
1945 if ((scalar
!= NULL
) && (have_pre_comp
)) {
1946 memset(g_secret
, 0, sizeof(g_secret
));
1947 /* reduce scalar to 0 <= scalar < 2^521 */
1948 if ((BN_num_bits(scalar
) > 521) || (BN_is_negative(scalar
))) {
1950 * this is an unusual input, and we don't guarantee
1953 if (!BN_nnmod(tmp_scalar
, scalar
, &group
->order
, ctx
)) {
1954 ECerror(ERR_R_BN_LIB
);
1957 num_bytes
= BN_bn2bin(tmp_scalar
, tmp
);
1959 num_bytes
= BN_bn2bin(scalar
, tmp
);
1960 flip_endian(g_secret
, tmp
, num_bytes
);
1961 /* do the multiplication with generator precomputation */
1962 batch_mul(x_out
, y_out
, z_out
,
1963 (const felem_bytearray(*)) secrets
, num_points
,
1965 mixed
, (const felem(*)[17][3]) pre_comp
,
1966 (const felem(*)[3]) g_pre_comp
);
1968 /* do the multiplication without generator precomputation */
1969 batch_mul(x_out
, y_out
, z_out
,
1970 (const felem_bytearray(*)) secrets
, num_points
,
1971 NULL
, mixed
, (const felem(*)[17][3]) pre_comp
, NULL
);
1972 /* reduce the output to its unique minimal representation */
1973 felem_contract(x_in
, x_out
);
1974 felem_contract(y_in
, y_out
);
1975 felem_contract(z_in
, z_out
);
1976 if ((!felem_to_BN(x
, x_in
)) || (!felem_to_BN(y
, y_in
)) ||
1977 (!felem_to_BN(z
, z_in
))) {
1978 ECerror(ERR_R_BN_LIB
);
1981 ret
= EC_POINT_set_Jprojective_coordinates_GFp(group
, r
, x
, y
, z
, ctx
);
1985 EC_POINT_free(generator
);
1986 BN_CTX_free(new_ctx
);
1994 ec_GFp_nistp521_precompute_mult(EC_GROUP
* group
, BN_CTX
* ctx
)
1997 NISTP521_PRE_COMP
*pre
= NULL
;
1999 BN_CTX
*new_ctx
= NULL
;
2001 EC_POINT
*generator
= NULL
;
2002 felem tmp_felems
[16];
2004 /* throw away old precomputation */
2005 EC_EX_DATA_free_data(&group
->extra_data
, nistp521_pre_comp_dup
,
2006 nistp521_pre_comp_free
, nistp521_pre_comp_clear_free
);
2008 if ((ctx
= new_ctx
= BN_CTX_new()) == NULL
)
2011 if (((x
= BN_CTX_get(ctx
)) == NULL
) ||
2012 ((y
= BN_CTX_get(ctx
)) == NULL
))
2014 /* get the generator */
2015 if (group
->generator
== NULL
)
2017 generator
= EC_POINT_new(group
);
2018 if (generator
== NULL
)
2020 BN_bin2bn(nistp521_curve_params
[3], sizeof(felem_bytearray
), x
);
2021 BN_bin2bn(nistp521_curve_params
[4], sizeof(felem_bytearray
), y
);
2022 if (!EC_POINT_set_affine_coordinates_GFp(group
, generator
, x
, y
, ctx
))
2024 if ((pre
= nistp521_pre_comp_new()) == NULL
)
2026 /* if the generator is the standard one, use built-in precomputation */
2027 if (0 == EC_POINT_cmp(group
, generator
, group
->generator
, ctx
)) {
2028 memcpy(pre
->g_pre_comp
, gmul
, sizeof(pre
->g_pre_comp
));
2032 if ((!BN_to_felem(pre
->g_pre_comp
[1][0], &group
->generator
->X
)) ||
2033 (!BN_to_felem(pre
->g_pre_comp
[1][1], &group
->generator
->Y
)) ||
2034 (!BN_to_felem(pre
->g_pre_comp
[1][2], &group
->generator
->Z
)))
2036 /* compute 2^130*G, 2^260*G, 2^390*G */
2037 for (i
= 1; i
<= 4; i
<<= 1) {
2038 point_double(pre
->g_pre_comp
[2 * i
][0], pre
->g_pre_comp
[2 * i
][1],
2039 pre
->g_pre_comp
[2 * i
][2], pre
->g_pre_comp
[i
][0],
2040 pre
->g_pre_comp
[i
][1], pre
->g_pre_comp
[i
][2]);
2041 for (j
= 0; j
< 129; ++j
) {
2042 point_double(pre
->g_pre_comp
[2 * i
][0],
2043 pre
->g_pre_comp
[2 * i
][1],
2044 pre
->g_pre_comp
[2 * i
][2],
2045 pre
->g_pre_comp
[2 * i
][0],
2046 pre
->g_pre_comp
[2 * i
][1],
2047 pre
->g_pre_comp
[2 * i
][2]);
2050 /* g_pre_comp[0] is the point at infinity */
2051 memset(pre
->g_pre_comp
[0], 0, sizeof(pre
->g_pre_comp
[0]));
2052 /* the remaining multiples */
2053 /* 2^130*G + 2^260*G */
2054 point_add(pre
->g_pre_comp
[6][0], pre
->g_pre_comp
[6][1],
2055 pre
->g_pre_comp
[6][2], pre
->g_pre_comp
[4][0],
2056 pre
->g_pre_comp
[4][1], pre
->g_pre_comp
[4][2],
2057 0, pre
->g_pre_comp
[2][0], pre
->g_pre_comp
[2][1],
2058 pre
->g_pre_comp
[2][2]);
2059 /* 2^130*G + 2^390*G */
2060 point_add(pre
->g_pre_comp
[10][0], pre
->g_pre_comp
[10][1],
2061 pre
->g_pre_comp
[10][2], pre
->g_pre_comp
[8][0],
2062 pre
->g_pre_comp
[8][1], pre
->g_pre_comp
[8][2],
2063 0, pre
->g_pre_comp
[2][0], pre
->g_pre_comp
[2][1],
2064 pre
->g_pre_comp
[2][2]);
2065 /* 2^260*G + 2^390*G */
2066 point_add(pre
->g_pre_comp
[12][0], pre
->g_pre_comp
[12][1],
2067 pre
->g_pre_comp
[12][2], pre
->g_pre_comp
[8][0],
2068 pre
->g_pre_comp
[8][1], pre
->g_pre_comp
[8][2],
2069 0, pre
->g_pre_comp
[4][0], pre
->g_pre_comp
[4][1],
2070 pre
->g_pre_comp
[4][2]);
2071 /* 2^130*G + 2^260*G + 2^390*G */
2072 point_add(pre
->g_pre_comp
[14][0], pre
->g_pre_comp
[14][1],
2073 pre
->g_pre_comp
[14][2], pre
->g_pre_comp
[12][0],
2074 pre
->g_pre_comp
[12][1], pre
->g_pre_comp
[12][2],
2075 0, pre
->g_pre_comp
[2][0], pre
->g_pre_comp
[2][1],
2076 pre
->g_pre_comp
[2][2]);
2077 for (i
= 1; i
< 8; ++i
) {
2078 /* odd multiples: add G */
2079 point_add(pre
->g_pre_comp
[2 * i
+ 1][0], pre
->g_pre_comp
[2 * i
+ 1][1],
2080 pre
->g_pre_comp
[2 * i
+ 1][2], pre
->g_pre_comp
[2 * i
][0],
2081 pre
->g_pre_comp
[2 * i
][1], pre
->g_pre_comp
[2 * i
][2],
2082 0, pre
->g_pre_comp
[1][0], pre
->g_pre_comp
[1][1],
2083 pre
->g_pre_comp
[1][2]);
2085 make_points_affine(15, &(pre
->g_pre_comp
[1]), tmp_felems
);
2087 if (!EC_EX_DATA_set_data(&group
->extra_data
, pre
, nistp521_pre_comp_dup
,
2088 nistp521_pre_comp_free
, nistp521_pre_comp_clear_free
))
2094 EC_POINT_free(generator
);
2095 BN_CTX_free(new_ctx
);
2096 nistp521_pre_comp_free(pre
);
2101 ec_GFp_nistp521_have_precompute_mult(const EC_GROUP
* group
)
2103 if (EC_EX_DATA_get_data(group
->extra_data
, nistp521_pre_comp_dup
,
2104 nistp521_pre_comp_free
, nistp521_pre_comp_clear_free
)