1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // This is an implementation of the P224 elliptic curve group. It's written to
6 // be short and simple rather than fast, although it's still constant-time.
8 // See http://www.imperialviolet.org/2010/12/04/ecc.html ([1]) for background.
10 #include "crypto/p224.h"
14 #include "base/sys_byteorder.h"
17 // Allow htonl/ntohl to be called without requiring ws2_32.dll to be loaded,
18 // which isn't available in Chrome's sandbox. See crbug.com/116591.
19 // TODO(wez): Replace these calls with base::htonl() etc when available.
20 #define ntohl(x) _byteswap_ulong(x)
21 #define htonl(x) _byteswap_ulong(x)
26 // Field element functions.
28 // The field that we're dealing with is ℤ/pℤ where p = 2**224 - 2**96 + 1.
30 // Field elements are represented by a FieldElement, which is a typedef to an
31 // array of 8 uint32's. The value of a FieldElement, a, is:
32 // a[0] + 2**28·a[1] + 2**56·a[1] + ... + 2**196·a[7]
34 // Using 28-bit limbs means that there's only 4 bits of headroom, which is less
35 // than we would really like. But it has the useful feature that we hit 2**224
36 // exactly, making the reflections during a reduce much nicer.
38 using crypto::p224::FieldElement
;
40 // Add computes *out = a+b
42 // a[i] + b[i] < 2**32
43 void Add(FieldElement
* out
, const FieldElement
& a
, const FieldElement
& b
) {
44 for (int i
= 0; i
< 8; i
++) {
45 (*out
)[i
] = a
[i
] + b
[i
];
49 static const uint32 kTwo31p3
= (1u<<31) + (1u<<3);
50 static const uint32 kTwo31m3
= (1u<<31) - (1u<<3);
51 static const uint32 kTwo31m15m3
= (1u<<31) - (1u<<15) - (1u<<3);
52 // kZero31ModP is 0 mod p where bit 31 is set in all limbs so that we can
53 // subtract smaller amounts without underflow. See the section "Subtraction" in
55 static const FieldElement kZero31ModP
= {
56 kTwo31p3
, kTwo31m3
, kTwo31m3
, kTwo31m15m3
,
57 kTwo31m3
, kTwo31m3
, kTwo31m3
, kTwo31m3
60 // Subtract computes *out = a-b
64 void Subtract(FieldElement
* out
, const FieldElement
& a
, const FieldElement
& b
) {
65 for (int i
= 0; i
< 8; i
++) {
66 // See the section on "Subtraction" in [1] for details.
67 (*out
)[i
] = a
[i
] + kZero31ModP
[i
] - b
[i
];
71 static const uint64 kTwo63p35
= (1ull<<63) + (1ull<<35);
72 static const uint64 kTwo63m35
= (1ull<<63) - (1ull<<35);
73 static const uint64 kTwo63m35m19
= (1ull<<63) - (1ull<<35) - (1ull<<19);
74 // kZero63ModP is 0 mod p where bit 63 is set in all limbs. See the section
75 // "Subtraction" in [1] for why.
76 static const uint64 kZero63ModP
[8] = {
77 kTwo63p35
, kTwo63m35
, kTwo63m35
, kTwo63m35
,
78 kTwo63m35m19
, kTwo63m35
, kTwo63m35
, kTwo63m35
,
81 static const uint32 kBottom28Bits
= 0xfffffff;
83 // LargeFieldElement also represents an element of the field. The limbs are
84 // still spaced 28-bits apart and in little-endian order. So the limbs are at
85 // 0, 28, 56, ..., 392 bits, each 64-bits wide.
86 typedef uint64 LargeFieldElement
[15];
88 // ReduceLarge converts a LargeFieldElement to a FieldElement.
91 void ReduceLarge(FieldElement
* out
, LargeFieldElement
* inptr
) {
92 LargeFieldElement
& in(*inptr
);
94 for (int i
= 0; i
< 8; i
++) {
95 in
[i
] += kZero63ModP
[i
];
98 // Eliminate the coefficients at 2**224 and greater while maintaining the
100 for (int i
= 14; i
>= 8; i
--) {
101 in
[i
-8] -= in
[i
]; // reflection off the "+1" term of p.
102 in
[i
-5] += (in
[i
] & 0xffff) << 12; // part of the "-2**96" reflection.
103 in
[i
-4] += in
[i
] >> 16; // the rest of the "-2**96" reflection.
108 // As the values become small enough, we start to store them in |out| and use
109 // 32-bit operations.
110 for (int i
= 1; i
< 8; i
++) {
111 in
[i
+1] += in
[i
] >> 28;
112 (*out
)[i
] = static_cast<uint32
>(in
[i
] & kBottom28Bits
);
114 // Eliminate the term at 2*224 that we introduced while keeping the same
116 in
[0] -= in
[8]; // reflection off the "+1" term of p.
117 (*out
)[3] += static_cast<uint32
>(in
[8] & 0xffff) << 12; // "-2**96" term
118 (*out
)[4] += static_cast<uint32
>(in
[8] >> 16); // rest of "-2**96" term
122 // out[1,2,5..7] < 2**28
124 (*out
)[0] = static_cast<uint32
>(in
[0] & kBottom28Bits
);
125 (*out
)[1] += static_cast<uint32
>((in
[0] >> 28) & kBottom28Bits
);
126 (*out
)[2] += static_cast<uint32
>(in
[0] >> 56);
132 // Mul computes *out = a*b
134 // a[i] < 2**29, b[i] < 2**30 (or vice versa)
136 void Mul(FieldElement
* out
, const FieldElement
& a
, const FieldElement
& b
) {
137 LargeFieldElement tmp
;
138 memset(&tmp
, 0, sizeof(tmp
));
140 for (int i
= 0; i
< 8; i
++) {
141 for (int j
= 0; j
< 8; j
++) {
142 tmp
[i
+j
] += static_cast<uint64
>(a
[i
]) * static_cast<uint64
>(b
[j
]);
146 ReduceLarge(out
, &tmp
);
149 // Square computes *out = a*a
153 void Square(FieldElement
* out
, const FieldElement
& a
) {
154 LargeFieldElement tmp
;
155 memset(&tmp
, 0, sizeof(tmp
));
157 for (int i
= 0; i
< 8; i
++) {
158 for (int j
= 0; j
<= i
; j
++) {
159 uint64 r
= static_cast<uint64
>(a
[i
]) * static_cast<uint64
>(a
[j
]);
168 ReduceLarge(out
, &tmp
);
171 // Reduce reduces the coefficients of in_out to smaller bounds.
173 // On entry: a[i] < 2**31 + 2**30
174 // On exit: a[i] < 2**29
175 void Reduce(FieldElement
* in_out
) {
176 FieldElement
& a
= *in_out
;
178 for (int i
= 0; i
< 7; i
++) {
179 a
[i
+1] += a
[i
] >> 28;
180 a
[i
] &= kBottom28Bits
;
182 uint32 top
= a
[7] >> 28;
183 a
[7] &= kBottom28Bits
;
186 // Constant-time: mask = (top != 0) ? 0xffffffff : 0
191 mask
= static_cast<uint32
>(static_cast<int32
>(mask
) >> 31);
193 // Eliminate top while maintaining the same value mod p.
197 // We may have just made a[0] negative but, if we did, then we must
198 // have added something to a[3], thus it's > 2**12. Therefore we can
199 // carry down to a[0].
201 a
[2] += mask
& ((1<<28) - 1);
202 a
[1] += mask
& ((1<<28) - 1);
203 a
[0] += mask
& (1<<28);
206 // Invert calcuates *out = in**-1 by computing in**(2**224 - 2**96 - 1), i.e.
207 // Fermat's little theorem.
208 void Invert(FieldElement
* out
, const FieldElement
& in
) {
209 FieldElement f1
, f2
, f3
, f4
;
211 Square(&f1
, in
); // 2
212 Mul(&f1
, f1
, in
); // 2**2 - 1
213 Square(&f1
, f1
); // 2**3 - 2
214 Mul(&f1
, f1
, in
); // 2**3 - 1
215 Square(&f2
, f1
); // 2**4 - 2
216 Square(&f2
, f2
); // 2**5 - 4
217 Square(&f2
, f2
); // 2**6 - 8
218 Mul(&f1
, f1
, f2
); // 2**6 - 1
219 Square(&f2
, f1
); // 2**7 - 2
220 for (int i
= 0; i
< 5; i
++) { // 2**12 - 2**6
223 Mul(&f2
, f2
, f1
); // 2**12 - 1
224 Square(&f3
, f2
); // 2**13 - 2
225 for (int i
= 0; i
< 11; i
++) { // 2**24 - 2**12
228 Mul(&f2
, f3
, f2
); // 2**24 - 1
229 Square(&f3
, f2
); // 2**25 - 2
230 for (int i
= 0; i
< 23; i
++) { // 2**48 - 2**24
233 Mul(&f3
, f3
, f2
); // 2**48 - 1
234 Square(&f4
, f3
); // 2**49 - 2
235 for (int i
= 0; i
< 47; i
++) { // 2**96 - 2**48
238 Mul(&f3
, f3
, f4
); // 2**96 - 1
239 Square(&f4
, f3
); // 2**97 - 2
240 for (int i
= 0; i
< 23; i
++) { // 2**120 - 2**24
243 Mul(&f2
, f4
, f2
); // 2**120 - 1
244 for (int i
= 0; i
< 6; i
++) { // 2**126 - 2**6
247 Mul(&f1
, f1
, f2
); // 2**126 - 1
248 Square(&f1
, f1
); // 2**127 - 2
249 Mul(&f1
, f1
, in
); // 2**127 - 1
250 for (int i
= 0; i
< 97; i
++) { // 2**224 - 2**97
253 Mul(out
, f1
, f3
); // 2**224 - 2**96 - 1
256 // Contract converts a FieldElement to its minimal, distinguished form.
258 // On entry, in[i] < 2**29
259 // On exit, in[i] < 2**28
260 void Contract(FieldElement
* inout
) {
261 FieldElement
& out
= *inout
;
263 // Reduce the coefficients to < 2**28.
264 for (int i
= 0; i
< 7; i
++) {
265 out
[i
+1] += out
[i
] >> 28;
266 out
[i
] &= kBottom28Bits
;
268 uint32 top
= out
[7] >> 28;
269 out
[7] &= kBottom28Bits
;
271 // Eliminate top while maintaining the same value mod p.
275 // We may just have made out[0] negative. So we carry down. If we made
276 // out[0] negative then we know that out[3] is sufficiently positive
277 // because we just added to it.
278 for (int i
= 0; i
< 3; i
++) {
279 uint32 mask
= static_cast<uint32
>(static_cast<int32
>(out
[i
]) >> 31);
280 out
[i
] += (1 << 28) & mask
;
281 out
[i
+1] -= 1 & mask
;
284 // We might have pushed out[3] over 2**28 so we perform another, partial
286 for (int i
= 3; i
< 7; i
++) {
287 out
[i
+1] += out
[i
] >> 28;
288 out
[i
] &= kBottom28Bits
;
291 out
[7] &= kBottom28Bits
;
293 // Eliminate top while maintaining the same value mod p.
297 // There are two cases to consider for out[3]:
298 // 1) The first time that we eliminated top, we didn't push out[3] over
299 // 2**28. In this case, the partial carry chain didn't change any values
301 // 2) We did push out[3] over 2**28 the first time that we eliminated top.
302 // The first value of top was in [0..16), therefore, prior to eliminating
303 // the first top, 0xfff1000 <= out[3] <= 0xfffffff. Therefore, after
304 // overflowing and being reduced by the second carry chain, out[3] <=
305 // 0xf000. Thus it cannot have overflowed when we eliminated top for the
308 // Again, we may just have made out[0] negative, so do the same carry down.
309 // As before, if we made out[0] negative then we know that out[3] is
310 // sufficiently positive.
311 for (int i
= 0; i
< 3; i
++) {
312 uint32 mask
= static_cast<uint32
>(static_cast<int32
>(out
[i
]) >> 31);
313 out
[i
] += (1 << 28) & mask
;
314 out
[i
+1] -= 1 & mask
;
317 // The value is < 2**224, but maybe greater than p. In order to reduce to a
318 // unique, minimal value we see if the value is >= p and, if so, subtract p.
320 // First we build a mask from the top four limbs, which must all be
321 // equal to bottom28Bits if the whole value is >= p. If top4AllOnes
322 // ends up with any zero bits in the bottom 28 bits, then this wasn't
324 uint32 top4AllOnes
= 0xffffffffu
;
325 for (int i
= 4; i
< 8; i
++) {
326 top4AllOnes
&= (out
[i
] & kBottom28Bits
) - 1;
328 top4AllOnes
|= 0xf0000000;
329 // Now we replicate any zero bits to all the bits in top4AllOnes.
330 top4AllOnes
&= top4AllOnes
>> 16;
331 top4AllOnes
&= top4AllOnes
>> 8;
332 top4AllOnes
&= top4AllOnes
>> 4;
333 top4AllOnes
&= top4AllOnes
>> 2;
334 top4AllOnes
&= top4AllOnes
>> 1;
336 static_cast<uint32
>(static_cast<int32
>(top4AllOnes
<< 31) >> 31);
338 // Now we test whether the bottom three limbs are non-zero.
339 uint32 bottom3NonZero
= out
[0] | out
[1] | out
[2];
340 bottom3NonZero
|= bottom3NonZero
>> 16;
341 bottom3NonZero
|= bottom3NonZero
>> 8;
342 bottom3NonZero
|= bottom3NonZero
>> 4;
343 bottom3NonZero
|= bottom3NonZero
>> 2;
344 bottom3NonZero
|= bottom3NonZero
>> 1;
346 static_cast<uint32
>(static_cast<int32
>(bottom3NonZero
<< 31) >> 31);
348 // Everything depends on the value of out[3].
349 // If it's > 0xffff000 and top4AllOnes != 0 then the whole value is >= p
350 // If it's = 0xffff000 and top4AllOnes != 0 and bottom3NonZero != 0,
351 // then the whole value is >= p
352 // If it's < 0xffff000, then the whole value is < p
353 uint32 n
= out
[3] - 0xffff000;
354 uint32 out3Equal
= n
;
355 out3Equal
|= out3Equal
>> 16;
356 out3Equal
|= out3Equal
>> 8;
357 out3Equal
|= out3Equal
>> 4;
358 out3Equal
|= out3Equal
>> 2;
359 out3Equal
|= out3Equal
>> 1;
361 ~static_cast<uint32
>(static_cast<int32
>(out3Equal
<< 31) >> 31);
363 // If out[3] > 0xffff000 then n's MSB will be zero.
364 uint32 out3GT
= ~static_cast<uint32
>(static_cast<int32
>(n
<< 31) >> 31);
366 uint32 mask
= top4AllOnes
& ((out3Equal
& bottom3NonZero
) | out3GT
);
368 out
[3] -= 0xffff000 & mask
;
369 out
[4] -= 0xfffffff & mask
;
370 out
[5] -= 0xfffffff & mask
;
371 out
[6] -= 0xfffffff & mask
;
372 out
[7] -= 0xfffffff & mask
;
376 // Group element functions.
378 // These functions deal with group elements. The group is an elliptic curve
379 // group with a = -3 defined in FIPS 186-3, section D.2.2.
381 using crypto::p224::Point
;
383 // kP is the P224 prime.
384 const FieldElement kP
= {
386 268435455, 268435455, 268435455, 268435455,
389 // kB is parameter of the elliptic curve.
390 const FieldElement kB
= {
391 55967668, 11768882, 265861671, 185302395,
392 39211076, 180311059, 84673715, 188764328,
395 // AddJacobian computes *out = a+b where a != b.
396 void AddJacobian(Point
*out
,
399 // See http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl
400 FieldElement z1z1
, z2z2
, u1
, u2
, s1
, s2
, h
, i
, j
, r
, v
;
423 Subtract(&h
, u2
, u1
);
427 for (int j
= 0; j
< 8; j
++) {
436 Subtract(&r
, s2
, s1
);
438 for (int i
= 0; i
< 8; i
++) {
446 // Z3 = ((Z1+Z2)²-Z1Z1-Z2Z2)*H
447 Add(&z1z1
, z1z1
, z2z2
);
448 Add(&z2z2
, a
.z
, b
.z
);
451 Subtract(&out
->z
, z2z2
, z1z1
);
453 Mul(&out
->z
, out
->z
, h
);
456 for (int i
= 0; i
< 8; i
++) {
462 Subtract(&out
->x
, out
->x
, z1z1
);
465 // Y3 = r*(V-X3)-2*S1*J
466 for (int i
= 0; i
< 8; i
++) {
470 Subtract(&z1z1
, v
, out
->x
);
473 Subtract(&out
->y
, z1z1
, s1
);
477 // DoubleJacobian computes *out = a+a.
478 void DoubleJacobian(Point
* out
, const Point
& a
) {
479 // See http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
480 FieldElement delta
, gamma
, beta
, alpha
, t
;
484 Mul(&beta
, a
.x
, gamma
);
486 // alpha = 3*(X1-delta)*(X1+delta)
488 for (int i
= 0; i
< 8; i
++) {
492 Subtract(&alpha
, a
.x
, delta
);
494 Mul(&alpha
, alpha
, t
);
496 // Z3 = (Y1+Z1)²-gamma-delta
497 Add(&out
->z
, a
.y
, a
.z
);
499 Square(&out
->z
, out
->z
);
500 Subtract(&out
->z
, out
->z
, gamma
);
502 Subtract(&out
->z
, out
->z
, delta
);
505 // X3 = alpha²-8*beta
506 for (int i
= 0; i
< 8; i
++) {
507 delta
[i
] = beta
[i
] << 3;
510 Square(&out
->x
, alpha
);
511 Subtract(&out
->x
, out
->x
, delta
);
514 // Y3 = alpha*(4*beta-X3)-8*gamma²
515 for (int i
= 0; i
< 8; i
++) {
519 Subtract(&beta
, beta
, out
->x
);
521 Square(&gamma
, gamma
);
522 for (int i
= 0; i
< 8; i
++) {
526 Mul(&out
->y
, alpha
, beta
);
527 Subtract(&out
->y
, out
->y
, gamma
);
531 // CopyConditional sets *out=a if mask is 0xffffffff. mask must be either 0 of
533 void CopyConditional(Point
* out
,
536 for (int i
= 0; i
< 8; i
++) {
537 out
->x
[i
] ^= mask
& (a
.x
[i
] ^ out
->x
[i
]);
538 out
->y
[i
] ^= mask
& (a
.y
[i
] ^ out
->y
[i
]);
539 out
->z
[i
] ^= mask
& (a
.z
[i
] ^ out
->z
[i
]);
543 // ScalarMult calculates *out = a*scalar where scalar is a big-endian number of
544 // length scalar_len and != 0.
545 void ScalarMult(Point
* out
, const Point
& a
,
546 const uint8
* scalar
, size_t scalar_len
) {
547 memset(out
, 0, sizeof(*out
));
550 uint32 first_bit
= 0xffffffff;
551 for (size_t i
= 0; i
< scalar_len
; i
++) {
552 for (unsigned int bit_num
= 0; bit_num
< 8; bit_num
++) {
553 DoubleJacobian(out
, *out
);
554 uint32 bit
= static_cast<uint32
>(static_cast<int32
>(
555 (((scalar
[i
] >> (7 - bit_num
)) & 1) << 31) >> 31));
556 AddJacobian(&tmp
, a
, *out
);
557 CopyConditional(out
, a
, first_bit
& bit
);
558 CopyConditional(out
, tmp
, ~first_bit
& bit
);
559 first_bit
= first_bit
& ~bit
;
564 // Get224Bits reads 7 words from in and scatters their contents in
565 // little-endian form into 8 words at out, 28 bits per output word.
566 void Get224Bits(uint32
* out
, const uint32
* in
) {
567 out
[0] = ntohl(in
[6]) & kBottom28Bits
;
568 out
[1] = ((ntohl(in
[5]) << 4) | (ntohl(in
[6]) >> 28)) & kBottom28Bits
;
569 out
[2] = ((ntohl(in
[4]) << 8) | (ntohl(in
[5]) >> 24)) & kBottom28Bits
;
570 out
[3] = ((ntohl(in
[3]) << 12) | (ntohl(in
[4]) >> 20)) & kBottom28Bits
;
571 out
[4] = ((ntohl(in
[2]) << 16) | (ntohl(in
[3]) >> 16)) & kBottom28Bits
;
572 out
[5] = ((ntohl(in
[1]) << 20) | (ntohl(in
[2]) >> 12)) & kBottom28Bits
;
573 out
[6] = ((ntohl(in
[0]) << 24) | (ntohl(in
[1]) >> 8)) & kBottom28Bits
;
574 out
[7] = (ntohl(in
[0]) >> 4) & kBottom28Bits
;
577 // Put224Bits performs the inverse operation to Get224Bits: taking 28 bits from
578 // each of 8 input words and writing them in big-endian order to 7 words at
580 void Put224Bits(uint32
* out
, const uint32
* in
) {
581 out
[6] = htonl((in
[0] >> 0) | (in
[1] << 28));
582 out
[5] = htonl((in
[1] >> 4) | (in
[2] << 24));
583 out
[4] = htonl((in
[2] >> 8) | (in
[3] << 20));
584 out
[3] = htonl((in
[3] >> 12) | (in
[4] << 16));
585 out
[2] = htonl((in
[4] >> 16) | (in
[5] << 12));
586 out
[1] = htonl((in
[5] >> 20) | (in
[6] << 8));
587 out
[0] = htonl((in
[6] >> 24) | (in
[7] << 4));
590 } // anonymous namespace
596 bool Point::SetFromString(const base::StringPiece
& in
) {
597 if (in
.size() != 2*28)
599 const uint32
* inwords
= reinterpret_cast<const uint32
*>(in
.data());
600 Get224Bits(x
, inwords
);
601 Get224Bits(y
, inwords
+ 7);
602 memset(&z
, 0, sizeof(z
));
605 // Check that the point is on the curve, i.e. that y² = x³ - 3x + b.
614 FieldElement three_x
;
615 for (int i
= 0; i
< 8; i
++) {
616 three_x
[i
] = x
[i
] * 3;
619 Subtract(&rhs
, rhs
, three_x
);
622 ::Add(&rhs
, rhs
, kB
);
624 return memcmp(&lhs
, &rhs
, sizeof(lhs
)) == 0;
627 std::string
Point::ToString() const {
628 FieldElement zinv
, zinv_sq
, x
, y
;
630 Invert(&zinv
, this->z
);
631 Square(&zinv_sq
, zinv
);
632 Mul(&x
, this->x
, zinv_sq
);
633 Mul(&zinv_sq
, zinv_sq
, zinv
);
634 Mul(&y
, this->y
, zinv_sq
);
640 Put224Bits(outwords
, x
);
641 Put224Bits(outwords
+ 7, y
);
642 return std::string(reinterpret_cast<const char*>(outwords
), sizeof(outwords
));
645 void ScalarMult(const Point
& in
, const uint8
* scalar
, Point
* out
) {
646 ::ScalarMult(out
, in
, scalar
, 28);
649 // kBasePoint is the base point (generator) of the elliptic curve group.
650 static const Point kBasePoint
= {
651 {22813985, 52956513, 34677300, 203240812,
652 12143107, 133374265, 225162431, 191946955},
653 {83918388, 223877528, 122119236, 123340192,
654 266784067, 263504429, 146143011, 198407736},
655 {1, 0, 0, 0, 0, 0, 0, 0},
658 void ScalarBaseMult(const uint8
* scalar
, Point
* out
) {
659 ::ScalarMult(out
, kBasePoint
, scalar
, 28);
662 void Add(const Point
& a
, const Point
& b
, Point
* out
) {
663 AddJacobian(out
, a
, b
);
666 void Negate(const Point
& in
, Point
* out
) {
667 // Guide to elliptic curve cryptography, page 89 suggests that (X : X+Y : Z)
668 // is the negative in Jacobian coordinates, but it doesn't actually appear to
669 // be true in testing so this performs the negation in affine coordinates.
670 FieldElement zinv
, zinv_sq
, y
;
672 Square(&zinv_sq
, zinv
);
673 Mul(&out
->x
, in
.x
, zinv_sq
);
674 Mul(&zinv_sq
, zinv_sq
, zinv
);
675 Mul(&y
, in
.y
, zinv_sq
);
677 Subtract(&out
->y
, kP
, y
);
680 memset(&out
->z
, 0, sizeof(out
->z
));
686 } // namespace crypto