1 // Copyright 2015 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // This file contains the Go wrapper for the constant-time, 64-bit assembly
6 // implementation of P256. The optimizations performed here are described in
8 // S.Gueron and V.Krasnov, "Fast prime field elliptic-curve cryptography with
10 // http://link.springer.com/article/10.1007%2Fs13389-014-0090-x
11 // https://eprint.iacr.org/2013/816.pdf
35 p256Precomputed
*[37][64 * 8]uint64
36 precomputeOnce sync
.Once
40 // See FIPS 186-3, section D.2.3
41 p256
.CurveParams
= &CurveParams
{Name
: "P-256"}
42 p256
.P
, _
= new(big
.Int
).SetString("115792089210356248762697446949407573530086143415290314195533631308867097853951", 10)
43 p256
.N
, _
= new(big
.Int
).SetString("115792089210356248762697446949407573529996955224135760342422259061068512044369", 10)
44 p256
.B
, _
= new(big
.Int
).SetString("5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b", 16)
45 p256
.Gx
, _
= new(big
.Int
).SetString("6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296", 16)
46 p256
.Gy
, _
= new(big
.Int
).SetString("4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5", 16)
50 func (curve p256Curve
) Params() *CurveParams
{
51 return curve
.CurveParams
54 // Functions implemented in p256_asm_amd64.s
55 // Montgomery multiplication modulo P256
56 func p256Mul(res
, in1
, in2
[]uint64)
58 // Montgomery square modulo P256
59 func p256Sqr(res
, in
[]uint64)
61 // Montgomery multiplication by 1
62 func p256FromMont(res
, in
[]uint64)
64 // iff cond == 1 val <- -val
65 func p256NegCond(val
[]uint64, cond
int)
67 // if cond == 0 res <- b; else res <- a
68 func p256MovCond(res
, a
, b
[]uint64, cond
int)
71 func p256BigToLittle(res
[]uint64, in
[]byte)
72 func p256LittleToBig(res
[]byte, in
[]uint64)
74 // Constant time table access
75 func p256Select(point
, table
[]uint64, idx
int)
76 func p256SelectBase(point
, table
[]uint64, idx
int)
78 // Montgomery multiplication modulo Ord(G)
79 func p256OrdMul(res
, in1
, in2
[]uint64)
81 // Montgomery square modulo Ord(G), repeated n times
82 func p256OrdSqr(res
, in
[]uint64, n
int)
84 // Point add with in2 being affine point
85 // If sign == 1 -> in2 = -in2
86 // If sel == 0 -> res = in1
87 // if zero == 0 -> res = in2
88 func p256PointAddAffineAsm(res
, in1
, in2
[]uint64, sign
, sel
, zero
int)
91 func p256PointAddAsm(res
, in1
, in2
[]uint64)
94 func p256PointDoubleAsm(res
, in
[]uint64)
96 func (curve p256Curve
) Inverse(k
*big
.Int
) *big
.Int
{
98 // This should never happen.
99 k
= new(big
.Int
).Neg(k
)
102 if k
.Cmp(p256
.N
) >= 0 {
103 // This should never happen.
104 k
= new(big
.Int
).Mod(k
, p256
.N
)
107 // table will store precomputed powers of x. The four words at index
108 // 4×i store x^(i+1).
109 var table
[4 * 15]uint64
111 x
:= make([]uint64, 4)
113 // This code operates in the Montgomery domain where R = 2^256 mod n
114 // and n is the order of the scalar field. (See initP256 for the
115 // value.) Elements in the Montgomery domain take the form a×R and
116 // multiplication of x and y in the calculates (x × y × R^-1) mod n. RR
117 // is R×R mod n thus the Montgomery multiplication x and RR gives x×R,
118 // i.e. converts x into the Montgomery domain.
119 RR
:= []uint64{0x83244c95be79eea2, 0x4699799c49bd6fa6, 0x2845b2392b6bec59, 0x66e12d94f3d95620}
120 p256OrdMul(table
[:4], x
, RR
)
122 // Prepare the table, no need in constant time access, because the
123 // power is not a secret. (Entry 0 is never used.)
124 for i
:= 2; i
< 16; i
+= 2 {
125 p256OrdSqr(table
[4*(i
-1):], table
[4*((i
/2)-1):], 1)
126 p256OrdMul(table
[4*i
:], table
[4*(i
-1):], table
[:4])
129 x
[0] = table
[4*14+0] // f
135 p256OrdMul(x
, x
, table
[4*14:4*14+4]) // ff
136 t
:= make([]uint64, 4, 4)
143 p256OrdMul(x
, x
, t
) // ffff
150 p256OrdMul(x
, x
, t
) // ffffffff
156 p256OrdSqr(x
, x
, 64) // ffffffff0000000000000000
157 p256OrdMul(x
, x
, t
) // ffffffff00000000ffffffff
158 p256OrdSqr(x
, x
, 32) // ffffffff00000000ffffffff00000000
159 p256OrdMul(x
, x
, t
) // ffffffff00000000ffffffffffffffff
161 // Remaining 32 windows
162 expLo
:= [32]byte{0xb, 0xc, 0xe, 0x6, 0xf, 0xa, 0xa, 0xd, 0xa, 0x7, 0x1, 0x7, 0x9, 0xe, 0x8, 0x4, 0xf, 0x3, 0xb, 0x9, 0xc, 0xa, 0xc, 0x2, 0xf, 0xc, 0x6, 0x3, 0x2, 0x5, 0x4, 0xf}
163 for i
:= 0; i
< 32; i
++ {
165 p256OrdMul(x
, x
, table
[4*(expLo
[i
]-1):])
168 // Multiplying by one in the Montgomery domain converts a Montgomery
169 // value out of the domain.
170 one
:= []uint64{1, 0, 0, 0}
171 p256OrdMul(x
, x
, one
)
173 xOut
:= make([]byte, 32)
174 p256LittleToBig(xOut
, x
)
175 return new(big
.Int
).SetBytes(xOut
)
178 // fromBig converts a *big.Int into a format used by this code.
179 func fromBig(out
[]uint64, big
*big
.Int
) {
184 for i
, v
:= range big
.Bits() {
189 // p256GetScalar endian-swaps the big-endian scalar value from in and writes it
190 // to out. If the scalar is equal or greater than the order of the group, it's
191 // reduced modulo that order.
192 func p256GetScalar(out
[]uint64, in
[]byte) {
193 n
:= new(big
.Int
).SetBytes(in
)
195 if n
.Cmp(p256
.N
) >= 0 {
201 // p256Mul operates in a Montgomery domain with R = 2^256 mod p, where p is the
202 // underlying field of the curve. (See initP256 for the value.) Thus rr here is
203 // R×R mod p. See comment in Inverse about how this is used.
204 var rr
= []uint64{0x0000000000000003, 0xfffffffbffffffff, 0xfffffffffffffffe, 0x00000004fffffffd}
206 func maybeReduceModP(in
*big
.Int
) *big
.Int
{
207 if in
.Cmp(p256
.P
) < 0 {
210 return new(big
.Int
).Mod(in
, p256
.P
)
213 func (curve p256Curve
) CombinedMult(bigX
, bigY
*big
.Int
, baseScalar
, scalar
[]byte) (x
, y
*big
.Int
) {
214 scalarReversed
:= make([]uint64, 4)
216 p256GetScalar(scalarReversed
, baseScalar
)
217 r1
.p256BaseMult(scalarReversed
)
219 p256GetScalar(scalarReversed
, scalar
)
220 fromBig(r2
.xyz
[0:4], maybeReduceModP(bigX
))
221 fromBig(r2
.xyz
[4:8], maybeReduceModP(bigY
))
222 p256Mul(r2
.xyz
[0:4], r2
.xyz
[0:4], rr
[:])
223 p256Mul(r2
.xyz
[4:8], r2
.xyz
[4:8], rr
[:])
225 // This sets r2's Z value to 1, in the Montgomery domain.
226 r2
.xyz
[8] = 0x0000000000000001
227 r2
.xyz
[9] = 0xffffffff00000000
228 r2
.xyz
[10] = 0xffffffffffffffff
229 r2
.xyz
[11] = 0x00000000fffffffe
231 r2
.p256ScalarMult(scalarReversed
)
232 p256PointAddAsm(r1
.xyz
[:], r1
.xyz
[:], r2
.xyz
[:])
233 return r1
.p256PointToAffine()
236 func (curve p256Curve
) ScalarBaseMult(scalar
[]byte) (x
, y
*big
.Int
) {
237 scalarReversed
:= make([]uint64, 4)
238 p256GetScalar(scalarReversed
, scalar
)
241 r
.p256BaseMult(scalarReversed
)
242 return r
.p256PointToAffine()
245 func (curve p256Curve
) ScalarMult(bigX
, bigY
*big
.Int
, scalar
[]byte) (x
, y
*big
.Int
) {
246 scalarReversed
:= make([]uint64, 4)
247 p256GetScalar(scalarReversed
, scalar
)
250 fromBig(r
.xyz
[0:4], maybeReduceModP(bigX
))
251 fromBig(r
.xyz
[4:8], maybeReduceModP(bigY
))
252 p256Mul(r
.xyz
[0:4], r
.xyz
[0:4], rr
[:])
253 p256Mul(r
.xyz
[4:8], r
.xyz
[4:8], rr
[:])
254 // This sets r2's Z value to 1, in the Montgomery domain.
255 r
.xyz
[8] = 0x0000000000000001
256 r
.xyz
[9] = 0xffffffff00000000
257 r
.xyz
[10] = 0xffffffffffffffff
258 r
.xyz
[11] = 0x00000000fffffffe
260 r
.p256ScalarMult(scalarReversed
)
261 return r
.p256PointToAffine()
264 func (p
*p256Point
) p256PointToAffine() (x
, y
*big
.Int
) {
265 zInv
:= make([]uint64, 4)
266 zInvSq
:= make([]uint64, 4)
267 p256Inverse(zInv
, p
.xyz
[8:12])
268 p256Sqr(zInvSq
, zInv
)
269 p256Mul(zInv
, zInv
, zInvSq
)
271 p256Mul(zInvSq
, p
.xyz
[0:4], zInvSq
)
272 p256Mul(zInv
, p
.xyz
[4:8], zInv
)
274 p256FromMont(zInvSq
, zInvSq
)
275 p256FromMont(zInv
, zInv
)
277 xOut
:= make([]byte, 32)
278 yOut
:= make([]byte, 32)
279 p256LittleToBig(xOut
, zInvSq
)
280 p256LittleToBig(yOut
, zInv
)
282 return new(big
.Int
).SetBytes(xOut
), new(big
.Int
).SetBytes(yOut
)
285 // p256Inverse sets out to in^-1 mod p.
286 func p256Inverse(out
, in
[]uint64) {
287 var stack
[6 * 4]uint64
288 p2
:= stack
[4*0 : 4*0+4]
289 p4
:= stack
[4*1 : 4*1+4]
290 p8
:= stack
[4*2 : 4*2+4]
291 p16
:= stack
[4*3 : 4*3+4]
292 p32
:= stack
[4*4 : 4*4+4]
295 p256Mul(p2
, out
, in
) // 3*p
299 p256Mul(p4
, out
, p2
) // f*p
305 p256Mul(p8
, out
, p4
) // ff*p
309 for i
:= 0; i
< 7; i
++ {
312 p256Mul(p16
, out
, p8
) // ffff*p
315 for i
:= 0; i
< 15; i
++ {
318 p256Mul(p32
, out
, p16
) // ffffffff*p
322 for i
:= 0; i
< 31; i
++ {
325 p256Mul(out
, out
, in
)
327 for i
:= 0; i
< 32*4; i
++ {
330 p256Mul(out
, out
, p32
)
332 for i
:= 0; i
< 32; i
++ {
335 p256Mul(out
, out
, p32
)
337 for i
:= 0; i
< 16; i
++ {
340 p256Mul(out
, out
, p16
)
342 for i
:= 0; i
< 8; i
++ {
345 p256Mul(out
, out
, p8
)
351 p256Mul(out
, out
, p4
)
355 p256Mul(out
, out
, p2
)
359 p256Mul(out
, out
, in
)
362 func (p
*p256Point
) p256StorePoint(r
*[16 * 4 * 3]uint64, index
int) {
363 copy(r
[index
*12:], p
.xyz
[:])
366 func boothW5(in
uint) (int, int) {
367 var s
uint = ^((in
>> 5) - 1)
368 var d
uint = (1 << 6) - in
- 1
369 d
= (d
& s
) |
(in
& (^s
))
370 d
= (d
>> 1) + (d
& 1)
371 return int(d
), int(s
& 1)
374 func boothW7(in
uint) (int, int) {
375 var s
uint = ^((in
>> 7) - 1)
376 var d
uint = (1 << 8) - in
- 1
377 d
= (d
& s
) |
(in
& (^s
))
378 d
= (d
>> 1) + (d
& 1)
379 return int(d
), int(s
& 1)
383 p256Precomputed
= new([37][64 * 8]uint64)
385 basePoint
:= []uint64{
386 0x79e730d418a9143c, 0x75ba95fc5fedb601, 0x79fb732b77622510, 0x18905f76a53755c6,
387 0xddf25357ce95560a, 0x8b4ab8e4ba19e45c, 0xd2e88688dd21f325, 0x8571ff1825885d85,
388 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe,
390 t1
:= make([]uint64, 12)
391 t2
:= make([]uint64, 12)
394 zInv
:= make([]uint64, 4)
395 zInvSq
:= make([]uint64, 4)
396 for j
:= 0; j
< 64; j
++ {
398 for i
:= 0; i
< 37; i
++ {
399 // The window size is 7 so we need to double 7 times.
401 for k
:= 0; k
< 7; k
++ {
402 p256PointDoubleAsm(t1
, t1
)
405 // Convert the point to affine form. (Its values are
406 // still in Montgomery form however.)
407 p256Inverse(zInv
, t1
[8:12])
408 p256Sqr(zInvSq
, zInv
)
409 p256Mul(zInv
, zInv
, zInvSq
)
411 p256Mul(t1
[:4], t1
[:4], zInvSq
)
412 p256Mul(t1
[4:8], t1
[4:8], zInv
)
414 copy(t1
[8:12], basePoint
[8:12])
415 // Update the table entry
416 copy(p256Precomputed
[i
][j
*8:], t1
[:8])
419 p256PointDoubleAsm(t2
, basePoint
)
421 p256PointAddAsm(t2
, t2
, basePoint
)
426 func (p
*p256Point
) p256BaseMult(scalar
[]uint64) {
427 precomputeOnce
.Do(initTable
)
429 wvalue
:= (scalar
[0] << 1) & 0xff
430 sel
, sign
:= boothW7(uint(wvalue
))
431 p256SelectBase(p
.xyz
[0:8], p256Precomputed
[0][0:], sel
)
432 p256NegCond(p
.xyz
[4:8], sign
)
434 // (This is one, in the Montgomery domain.)
435 p
.xyz
[8] = 0x0000000000000001
436 p
.xyz
[9] = 0xffffffff00000000
437 p
.xyz
[10] = 0xffffffffffffffff
438 p
.xyz
[11] = 0x00000000fffffffe
441 // (This is one, in the Montgomery domain.)
442 t0
.xyz
[8] = 0x0000000000000001
443 t0
.xyz
[9] = 0xffffffff00000000
444 t0
.xyz
[10] = 0xffffffffffffffff
445 t0
.xyz
[11] = 0x00000000fffffffe
450 for i
:= 1; i
< 37; i
++ {
452 wvalue
= ((scalar
[index
/64] >> (index
% 64)) + (scalar
[index
/64+1] << (64 - (index
% 64)))) & 0xff
454 wvalue
= (scalar
[index
/64] >> (index
% 64)) & 0xff
457 sel
, sign
= boothW7(uint(wvalue
))
458 p256SelectBase(t0
.xyz
[0:8], p256Precomputed
[i
][0:], sel
)
459 p256PointAddAffineAsm(p
.xyz
[0:12], p
.xyz
[0:12], t0
.xyz
[0:8], sign
, sel
, zero
)
464 func (p
*p256Point
) p256ScalarMult(scalar
[]uint64) {
465 // precomp is a table of precomputed points that stores powers of p
467 var precomp
[16 * 4 * 3]uint64
468 var t0
, t1
, t2
, t3 p256Point
471 p
.p256StorePoint(&precomp
, 0) // 1
473 p256PointDoubleAsm(t0
.xyz
[:], p
.xyz
[:])
474 p256PointDoubleAsm(t1
.xyz
[:], t0
.xyz
[:])
475 p256PointDoubleAsm(t2
.xyz
[:], t1
.xyz
[:])
476 p256PointDoubleAsm(t3
.xyz
[:], t2
.xyz
[:])
477 t0
.p256StorePoint(&precomp
, 1) // 2
478 t1
.p256StorePoint(&precomp
, 3) // 4
479 t2
.p256StorePoint(&precomp
, 7) // 8
480 t3
.p256StorePoint(&precomp
, 15) // 16
482 p256PointAddAsm(t0
.xyz
[:], t0
.xyz
[:], p
.xyz
[:])
483 p256PointAddAsm(t1
.xyz
[:], t1
.xyz
[:], p
.xyz
[:])
484 p256PointAddAsm(t2
.xyz
[:], t2
.xyz
[:], p
.xyz
[:])
485 t0
.p256StorePoint(&precomp
, 2) // 3
486 t1
.p256StorePoint(&precomp
, 4) // 5
487 t2
.p256StorePoint(&precomp
, 8) // 9
489 p256PointDoubleAsm(t0
.xyz
[:], t0
.xyz
[:])
490 p256PointDoubleAsm(t1
.xyz
[:], t1
.xyz
[:])
491 t0
.p256StorePoint(&precomp
, 5) // 6
492 t1
.p256StorePoint(&precomp
, 9) // 10
494 p256PointAddAsm(t2
.xyz
[:], t0
.xyz
[:], p
.xyz
[:])
495 p256PointAddAsm(t1
.xyz
[:], t1
.xyz
[:], p
.xyz
[:])
496 t2
.p256StorePoint(&precomp
, 6) // 7
497 t1
.p256StorePoint(&precomp
, 10) // 11
499 p256PointDoubleAsm(t0
.xyz
[:], t0
.xyz
[:])
500 p256PointDoubleAsm(t2
.xyz
[:], t2
.xyz
[:])
501 t0
.p256StorePoint(&precomp
, 11) // 12
502 t2
.p256StorePoint(&precomp
, 13) // 14
504 p256PointAddAsm(t0
.xyz
[:], t0
.xyz
[:], p
.xyz
[:])
505 p256PointAddAsm(t2
.xyz
[:], t2
.xyz
[:], p
.xyz
[:])
506 t0
.p256StorePoint(&precomp
, 12) // 13
507 t2
.p256StorePoint(&precomp
, 14) // 15
509 // Start scanning the window from top bit
513 wvalue
:= (scalar
[index
/64] >> (index
% 64)) & 0x3f
514 sel
, _
= boothW5(uint(wvalue
))
516 p256Select(p
.xyz
[0:12], precomp
[0:], sel
)
521 p256PointDoubleAsm(p
.xyz
[:], p
.xyz
[:])
522 p256PointDoubleAsm(p
.xyz
[:], p
.xyz
[:])
523 p256PointDoubleAsm(p
.xyz
[:], p
.xyz
[:])
524 p256PointDoubleAsm(p
.xyz
[:], p
.xyz
[:])
525 p256PointDoubleAsm(p
.xyz
[:], p
.xyz
[:])
528 wvalue
= ((scalar
[index
/64] >> (index
% 64)) + (scalar
[index
/64+1] << (64 - (index
% 64)))) & 0x3f
530 wvalue
= (scalar
[index
/64] >> (index
% 64)) & 0x3f
533 sel
, sign
= boothW5(uint(wvalue
))
535 p256Select(t0
.xyz
[0:], precomp
[0:], sel
)
536 p256NegCond(t0
.xyz
[4:8], sign
)
537 p256PointAddAsm(t1
.xyz
[:], p
.xyz
[:], t0
.xyz
[:])
538 p256MovCond(t1
.xyz
[0:12], t1
.xyz
[0:12], p
.xyz
[0:12], sel
)
539 p256MovCond(p
.xyz
[0:12], t1
.xyz
[0:12], t0
.xyz
[0:12], zero
)
543 p256PointDoubleAsm(p
.xyz
[:], p
.xyz
[:])
544 p256PointDoubleAsm(p
.xyz
[:], p
.xyz
[:])
545 p256PointDoubleAsm(p
.xyz
[:], p
.xyz
[:])
546 p256PointDoubleAsm(p
.xyz
[:], p
.xyz
[:])
547 p256PointDoubleAsm(p
.xyz
[:], p
.xyz
[:])
549 wvalue
= (scalar
[0] << 1) & 0x3f
550 sel
, sign
= boothW5(uint(wvalue
))
552 p256Select(t0
.xyz
[0:], precomp
[0:], sel
)
553 p256NegCond(t0
.xyz
[4:8], sign
)
554 p256PointAddAsm(t1
.xyz
[:], p
.xyz
[:], t0
.xyz
[:])
555 p256MovCond(t1
.xyz
[0:12], t1
.xyz
[0:12], p
.xyz
[0:12], sel
)
556 p256MovCond(p
.xyz
[0:12], t1
.xyz
[0:12], t0
.xyz
[0:12], zero
)