libgo: update to Go 1.11
[official-gcc.git] / libgo / go / crypto / elliptic / p256_asm.go
blobaee065b38331336c89026867ba0e012d95c0e214
1 // Copyright 2015 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // This file contains the Go wrapper for the constant-time, 64-bit assembly
6 // implementation of P256. The optimizations performed here are described in
7 // detail in:
8 // S.Gueron and V.Krasnov, "Fast prime field elliptic-curve cryptography with
9 // 256-bit primes"
10 // https://link.springer.com/article/10.1007%2Fs13389-014-0090-x
11 // https://eprint.iacr.org/2013/816.pdf
13 // +build ignore_for_gccgo
14 // +build amd64 arm64
16 package elliptic
18 import (
19 "math/big"
20 "sync"
23 type (
24 p256Curve struct {
25 *CurveParams
28 p256Point struct {
29 xyz [12]uint64
33 var (
34 p256 p256Curve
35 p256Precomputed *[43][32 * 8]uint64
36 precomputeOnce sync.Once
39 func initP256() {
40 // See FIPS 186-3, section D.2.3
41 p256.CurveParams = &CurveParams{Name: "P-256"}
42 p256.P, _ = new(big.Int).SetString("115792089210356248762697446949407573530086143415290314195533631308867097853951", 10)
43 p256.N, _ = new(big.Int).SetString("115792089210356248762697446949407573529996955224135760342422259061068512044369", 10)
44 p256.B, _ = new(big.Int).SetString("5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b", 16)
45 p256.Gx, _ = new(big.Int).SetString("6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296", 16)
46 p256.Gy, _ = new(big.Int).SetString("4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5", 16)
47 p256.BitSize = 256
50 func (curve p256Curve) Params() *CurveParams {
51 return curve.CurveParams
54 // Functions implemented in p256_asm_*64.s
55 // Montgomery multiplication modulo P256
56 //go:noescape
57 func p256Mul(res, in1, in2 []uint64)
59 // Montgomery square modulo P256, repeated n times (n >= 1)
60 //go:noescape
61 func p256Sqr(res, in []uint64, n int)
63 // Montgomery multiplication by 1
64 //go:noescape
65 func p256FromMont(res, in []uint64)
67 // iff cond == 1 val <- -val
68 //go:noescape
69 func p256NegCond(val []uint64, cond int)
71 // if cond == 0 res <- b; else res <- a
72 //go:noescape
73 func p256MovCond(res, a, b []uint64, cond int)
75 // Endianness swap
76 //go:noescape
77 func p256BigToLittle(res []uint64, in []byte)
79 //go:noescape
80 func p256LittleToBig(res []byte, in []uint64)
82 // Constant time table access
83 //go:noescape
84 func p256Select(point, table []uint64, idx int)
86 //go:noescape
87 func p256SelectBase(point, table []uint64, idx int)
89 // Montgomery multiplication modulo Ord(G)
90 //go:noescape
91 func p256OrdMul(res, in1, in2 []uint64)
93 // Montgomery square modulo Ord(G), repeated n times
94 //go:noescape
95 func p256OrdSqr(res, in []uint64, n int)
97 // Point add with in2 being affine point
98 // If sign == 1 -> in2 = -in2
99 // If sel == 0 -> res = in1
100 // if zero == 0 -> res = in2
101 //go:noescape
102 func p256PointAddAffineAsm(res, in1, in2 []uint64, sign, sel, zero int)
104 // Point add. Returns one if the two input points were equal and zero
105 // otherwise. (Note that, due to the way that the equations work out, some
106 // representations of ∞ are considered equal to everything by this function.)
107 //go:noescape
108 func p256PointAddAsm(res, in1, in2 []uint64) int
110 // Point double
111 //go:noescape
112 func p256PointDoubleAsm(res, in []uint64)
114 func (curve p256Curve) Inverse(k *big.Int) *big.Int {
115 if k.Sign() < 0 {
116 // This should never happen.
117 k = new(big.Int).Neg(k)
120 if k.Cmp(p256.N) >= 0 {
121 // This should never happen.
122 k = new(big.Int).Mod(k, p256.N)
125 // table will store precomputed powers of x.
126 var table [4 * 9]uint64
127 var (
128 _1 = table[4*0 : 4*1]
129 _11 = table[4*1 : 4*2]
130 _101 = table[4*2 : 4*3]
131 _111 = table[4*3 : 4*4]
132 _1111 = table[4*4 : 4*5]
133 _10101 = table[4*5 : 4*6]
134 _101111 = table[4*6 : 4*7]
135 x = table[4*7 : 4*8]
136 t = table[4*8 : 4*9]
139 fromBig(x[:], k)
140 // This code operates in the Montgomery domain where R = 2^256 mod n
141 // and n is the order of the scalar field. (See initP256 for the
142 // value.) Elements in the Montgomery domain take the form a×R and
143 // multiplication of x and y in the calculates (x × y × R^-1) mod n. RR
144 // is R×R mod n thus the Montgomery multiplication x and RR gives x×R,
145 // i.e. converts x into the Montgomery domain.
146 // Window values borrowed from https://briansmith.org/ecc-inversion-addition-chains-01#p256_scalar_inversion
147 RR := []uint64{0x83244c95be79eea2, 0x4699799c49bd6fa6, 0x2845b2392b6bec59, 0x66e12d94f3d95620}
148 p256OrdMul(_1, x, RR) // _1
149 p256OrdSqr(x, _1, 1) // _10
150 p256OrdMul(_11, x, _1) // _11
151 p256OrdMul(_101, x, _11) // _101
152 p256OrdMul(_111, x, _101) // _111
153 p256OrdSqr(x, _101, 1) // _1010
154 p256OrdMul(_1111, _101, x) // _1111
156 p256OrdSqr(t, x, 1) // _10100
157 p256OrdMul(_10101, t, _1) // _10101
158 p256OrdSqr(x, _10101, 1) // _101010
159 p256OrdMul(_101111, _101, x) // _101111
160 p256OrdMul(x, _10101, x) // _111111 = x6
161 p256OrdSqr(t, x, 2) // _11111100
162 p256OrdMul(t, t, _11) // _11111111 = x8
163 p256OrdSqr(x, t, 8) // _ff00
164 p256OrdMul(x, x, t) // _ffff = x16
165 p256OrdSqr(t, x, 16) // _ffff0000
166 p256OrdMul(t, t, x) // _ffffffff = x32
168 p256OrdSqr(x, t, 64)
169 p256OrdMul(x, x, t)
170 p256OrdSqr(x, x, 32)
171 p256OrdMul(x, x, t)
173 sqrs := []uint8{
174 6, 5, 4, 5, 5,
175 4, 3, 3, 5, 9,
176 6, 2, 5, 6, 5,
177 4, 5, 5, 3, 10,
178 2, 5, 5, 3, 7, 6}
179 muls := [][]uint64{
180 _101111, _111, _11, _1111, _10101,
181 _101, _101, _101, _111, _101111,
182 _1111, _1, _1, _1111, _111,
183 _111, _111, _101, _11, _101111,
184 _11, _11, _11, _1, _10101, _1111}
186 for i, s := range sqrs {
187 p256OrdSqr(x, x, int(s))
188 p256OrdMul(x, x, muls[i])
191 // Multiplying by one in the Montgomery domain converts a Montgomery
192 // value out of the domain.
193 one := []uint64{1, 0, 0, 0}
194 p256OrdMul(x, x, one)
196 xOut := make([]byte, 32)
197 p256LittleToBig(xOut, x)
198 return new(big.Int).SetBytes(xOut)
201 // fromBig converts a *big.Int into a format used by this code.
202 func fromBig(out []uint64, big *big.Int) {
203 for i := range out {
204 out[i] = 0
207 for i, v := range big.Bits() {
208 out[i] = uint64(v)
212 // p256GetScalar endian-swaps the big-endian scalar value from in and writes it
213 // to out. If the scalar is equal or greater than the order of the group, it's
214 // reduced modulo that order.
215 func p256GetScalar(out []uint64, in []byte) {
216 n := new(big.Int).SetBytes(in)
218 if n.Cmp(p256.N) >= 0 {
219 n.Mod(n, p256.N)
221 fromBig(out, n)
224 // p256Mul operates in a Montgomery domain with R = 2^256 mod p, where p is the
225 // underlying field of the curve. (See initP256 for the value.) Thus rr here is
226 // R×R mod p. See comment in Inverse about how this is used.
227 var rr = []uint64{0x0000000000000003, 0xfffffffbffffffff, 0xfffffffffffffffe, 0x00000004fffffffd}
229 func maybeReduceModP(in *big.Int) *big.Int {
230 if in.Cmp(p256.P) < 0 {
231 return in
233 return new(big.Int).Mod(in, p256.P)
236 func (curve p256Curve) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) {
237 scalarReversed := make([]uint64, 4)
238 var r1, r2 p256Point
239 p256GetScalar(scalarReversed, baseScalar)
240 r1IsInfinity := scalarIsZero(scalarReversed)
241 r1.p256BaseMult(scalarReversed)
243 p256GetScalar(scalarReversed, scalar)
244 r2IsInfinity := scalarIsZero(scalarReversed)
245 fromBig(r2.xyz[0:4], maybeReduceModP(bigX))
246 fromBig(r2.xyz[4:8], maybeReduceModP(bigY))
247 p256Mul(r2.xyz[0:4], r2.xyz[0:4], rr[:])
248 p256Mul(r2.xyz[4:8], r2.xyz[4:8], rr[:])
250 // This sets r2's Z value to 1, in the Montgomery domain.
251 r2.xyz[8] = 0x0000000000000001
252 r2.xyz[9] = 0xffffffff00000000
253 r2.xyz[10] = 0xffffffffffffffff
254 r2.xyz[11] = 0x00000000fffffffe
256 r2.p256ScalarMult(scalarReversed)
258 var sum, double p256Point
259 pointsEqual := p256PointAddAsm(sum.xyz[:], r1.xyz[:], r2.xyz[:])
260 p256PointDoubleAsm(double.xyz[:], r1.xyz[:])
261 sum.CopyConditional(&double, pointsEqual)
262 sum.CopyConditional(&r1, r2IsInfinity)
263 sum.CopyConditional(&r2, r1IsInfinity)
265 return sum.p256PointToAffine()
268 func (curve p256Curve) ScalarBaseMult(scalar []byte) (x, y *big.Int) {
269 scalarReversed := make([]uint64, 4)
270 p256GetScalar(scalarReversed, scalar)
272 var r p256Point
273 r.p256BaseMult(scalarReversed)
274 return r.p256PointToAffine()
277 func (curve p256Curve) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) {
278 scalarReversed := make([]uint64, 4)
279 p256GetScalar(scalarReversed, scalar)
281 var r p256Point
282 fromBig(r.xyz[0:4], maybeReduceModP(bigX))
283 fromBig(r.xyz[4:8], maybeReduceModP(bigY))
284 p256Mul(r.xyz[0:4], r.xyz[0:4], rr[:])
285 p256Mul(r.xyz[4:8], r.xyz[4:8], rr[:])
286 // This sets r2's Z value to 1, in the Montgomery domain.
287 r.xyz[8] = 0x0000000000000001
288 r.xyz[9] = 0xffffffff00000000
289 r.xyz[10] = 0xffffffffffffffff
290 r.xyz[11] = 0x00000000fffffffe
292 r.p256ScalarMult(scalarReversed)
293 return r.p256PointToAffine()
296 // uint64IsZero returns 1 if x is zero and zero otherwise.
297 func uint64IsZero(x uint64) int {
298 x = ^x
299 x &= x >> 32
300 x &= x >> 16
301 x &= x >> 8
302 x &= x >> 4
303 x &= x >> 2
304 x &= x >> 1
305 return int(x & 1)
308 // scalarIsZero returns 1 if scalar represents the zero value, and zero
309 // otherwise.
310 func scalarIsZero(scalar []uint64) int {
311 return uint64IsZero(scalar[0] | scalar[1] | scalar[2] | scalar[3])
314 func (p *p256Point) p256PointToAffine() (x, y *big.Int) {
315 zInv := make([]uint64, 4)
316 zInvSq := make([]uint64, 4)
317 p256Inverse(zInv, p.xyz[8:12])
318 p256Sqr(zInvSq, zInv, 1)
319 p256Mul(zInv, zInv, zInvSq)
321 p256Mul(zInvSq, p.xyz[0:4], zInvSq)
322 p256Mul(zInv, p.xyz[4:8], zInv)
324 p256FromMont(zInvSq, zInvSq)
325 p256FromMont(zInv, zInv)
327 xOut := make([]byte, 32)
328 yOut := make([]byte, 32)
329 p256LittleToBig(xOut, zInvSq)
330 p256LittleToBig(yOut, zInv)
332 return new(big.Int).SetBytes(xOut), new(big.Int).SetBytes(yOut)
335 // CopyConditional copies overwrites p with src if v == 1, and leaves p
336 // unchanged if v == 0.
337 func (p *p256Point) CopyConditional(src *p256Point, v int) {
338 pMask := uint64(v) - 1
339 srcMask := ^pMask
341 for i, n := range p.xyz {
342 p.xyz[i] = (n & pMask) | (src.xyz[i] & srcMask)
346 // p256Inverse sets out to in^-1 mod p.
347 func p256Inverse(out, in []uint64) {
348 var stack [6 * 4]uint64
349 p2 := stack[4*0 : 4*0+4]
350 p4 := stack[4*1 : 4*1+4]
351 p8 := stack[4*2 : 4*2+4]
352 p16 := stack[4*3 : 4*3+4]
353 p32 := stack[4*4 : 4*4+4]
355 p256Sqr(out, in, 1)
356 p256Mul(p2, out, in) // 3*p
358 p256Sqr(out, p2, 2)
359 p256Mul(p4, out, p2) // f*p
361 p256Sqr(out, p4, 4)
362 p256Mul(p8, out, p4) // ff*p
364 p256Sqr(out, p8, 8)
365 p256Mul(p16, out, p8) // ffff*p
367 p256Sqr(out, p16, 16)
368 p256Mul(p32, out, p16) // ffffffff*p
370 p256Sqr(out, p32, 32)
371 p256Mul(out, out, in)
373 p256Sqr(out, out, 128)
374 p256Mul(out, out, p32)
376 p256Sqr(out, out, 32)
377 p256Mul(out, out, p32)
379 p256Sqr(out, out, 16)
380 p256Mul(out, out, p16)
382 p256Sqr(out, out, 8)
383 p256Mul(out, out, p8)
385 p256Sqr(out, out, 4)
386 p256Mul(out, out, p4)
388 p256Sqr(out, out, 2)
389 p256Mul(out, out, p2)
391 p256Sqr(out, out, 2)
392 p256Mul(out, out, in)
395 func (p *p256Point) p256StorePoint(r *[16 * 4 * 3]uint64, index int) {
396 copy(r[index*12:], p.xyz[:])
399 func boothW5(in uint) (int, int) {
400 var s uint = ^((in >> 5) - 1)
401 var d uint = (1 << 6) - in - 1
402 d = (d & s) | (in & (^s))
403 d = (d >> 1) + (d & 1)
404 return int(d), int(s & 1)
407 func boothW6(in uint) (int, int) {
408 var s uint = ^((in >> 6) - 1)
409 var d uint = (1 << 7) - in - 1
410 d = (d & s) | (in & (^s))
411 d = (d >> 1) + (d & 1)
412 return int(d), int(s & 1)
415 func initTable() {
416 p256Precomputed = new([43][32 * 8]uint64)
418 basePoint := []uint64{
419 0x79e730d418a9143c, 0x75ba95fc5fedb601, 0x79fb732b77622510, 0x18905f76a53755c6,
420 0xddf25357ce95560a, 0x8b4ab8e4ba19e45c, 0xd2e88688dd21f325, 0x8571ff1825885d85,
421 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe,
423 t1 := make([]uint64, 12)
424 t2 := make([]uint64, 12)
425 copy(t2, basePoint)
427 zInv := make([]uint64, 4)
428 zInvSq := make([]uint64, 4)
429 for j := 0; j < 32; j++ {
430 copy(t1, t2)
431 for i := 0; i < 43; i++ {
432 // The window size is 6 so we need to double 6 times.
433 if i != 0 {
434 for k := 0; k < 6; k++ {
435 p256PointDoubleAsm(t1, t1)
438 // Convert the point to affine form. (Its values are
439 // still in Montgomery form however.)
440 p256Inverse(zInv, t1[8:12])
441 p256Sqr(zInvSq, zInv, 1)
442 p256Mul(zInv, zInv, zInvSq)
444 p256Mul(t1[:4], t1[:4], zInvSq)
445 p256Mul(t1[4:8], t1[4:8], zInv)
447 copy(t1[8:12], basePoint[8:12])
448 // Update the table entry
449 copy(p256Precomputed[i][j*8:], t1[:8])
451 if j == 0 {
452 p256PointDoubleAsm(t2, basePoint)
453 } else {
454 p256PointAddAsm(t2, t2, basePoint)
459 func (p *p256Point) p256BaseMult(scalar []uint64) {
460 precomputeOnce.Do(initTable)
462 wvalue := (scalar[0] << 1) & 0x7f
463 sel, sign := boothW6(uint(wvalue))
464 p256SelectBase(p.xyz[0:8], p256Precomputed[0][0:], sel)
465 p256NegCond(p.xyz[4:8], sign)
467 // (This is one, in the Montgomery domain.)
468 p.xyz[8] = 0x0000000000000001
469 p.xyz[9] = 0xffffffff00000000
470 p.xyz[10] = 0xffffffffffffffff
471 p.xyz[11] = 0x00000000fffffffe
473 var t0 p256Point
474 // (This is one, in the Montgomery domain.)
475 t0.xyz[8] = 0x0000000000000001
476 t0.xyz[9] = 0xffffffff00000000
477 t0.xyz[10] = 0xffffffffffffffff
478 t0.xyz[11] = 0x00000000fffffffe
480 index := uint(5)
481 zero := sel
483 for i := 1; i < 43; i++ {
484 if index < 192 {
485 wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x7f
486 } else {
487 wvalue = (scalar[index/64] >> (index % 64)) & 0x7f
489 index += 6
490 sel, sign = boothW6(uint(wvalue))
491 p256SelectBase(t0.xyz[0:8], p256Precomputed[i][0:], sel)
492 p256PointAddAffineAsm(p.xyz[0:12], p.xyz[0:12], t0.xyz[0:8], sign, sel, zero)
493 zero |= sel
497 func (p *p256Point) p256ScalarMult(scalar []uint64) {
498 // precomp is a table of precomputed points that stores powers of p
499 // from p^1 to p^16.
500 var precomp [16 * 4 * 3]uint64
501 var t0, t1, t2, t3 p256Point
503 // Prepare the table
504 p.p256StorePoint(&precomp, 0) // 1
506 p256PointDoubleAsm(t0.xyz[:], p.xyz[:])
507 p256PointDoubleAsm(t1.xyz[:], t0.xyz[:])
508 p256PointDoubleAsm(t2.xyz[:], t1.xyz[:])
509 p256PointDoubleAsm(t3.xyz[:], t2.xyz[:])
510 t0.p256StorePoint(&precomp, 1) // 2
511 t1.p256StorePoint(&precomp, 3) // 4
512 t2.p256StorePoint(&precomp, 7) // 8
513 t3.p256StorePoint(&precomp, 15) // 16
515 p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:])
516 p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:])
517 p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:])
518 t0.p256StorePoint(&precomp, 2) // 3
519 t1.p256StorePoint(&precomp, 4) // 5
520 t2.p256StorePoint(&precomp, 8) // 9
522 p256PointDoubleAsm(t0.xyz[:], t0.xyz[:])
523 p256PointDoubleAsm(t1.xyz[:], t1.xyz[:])
524 t0.p256StorePoint(&precomp, 5) // 6
525 t1.p256StorePoint(&precomp, 9) // 10
527 p256PointAddAsm(t2.xyz[:], t0.xyz[:], p.xyz[:])
528 p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:])
529 t2.p256StorePoint(&precomp, 6) // 7
530 t1.p256StorePoint(&precomp, 10) // 11
532 p256PointDoubleAsm(t0.xyz[:], t0.xyz[:])
533 p256PointDoubleAsm(t2.xyz[:], t2.xyz[:])
534 t0.p256StorePoint(&precomp, 11) // 12
535 t2.p256StorePoint(&precomp, 13) // 14
537 p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:])
538 p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:])
539 t0.p256StorePoint(&precomp, 12) // 13
540 t2.p256StorePoint(&precomp, 14) // 15
542 // Start scanning the window from top bit
543 index := uint(254)
544 var sel, sign int
546 wvalue := (scalar[index/64] >> (index % 64)) & 0x3f
547 sel, _ = boothW5(uint(wvalue))
549 p256Select(p.xyz[0:12], precomp[0:], sel)
550 zero := sel
552 for index > 4 {
553 index -= 5
554 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
555 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
556 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
557 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
558 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
560 if index < 192 {
561 wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x3f
562 } else {
563 wvalue = (scalar[index/64] >> (index % 64)) & 0x3f
566 sel, sign = boothW5(uint(wvalue))
568 p256Select(t0.xyz[0:], precomp[0:], sel)
569 p256NegCond(t0.xyz[4:8], sign)
570 p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:])
571 p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel)
572 p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero)
573 zero |= sel
576 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
577 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
578 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
579 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
580 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
582 wvalue = (scalar[0] << 1) & 0x3f
583 sel, sign = boothW5(uint(wvalue))
585 p256Select(t0.xyz[0:], precomp[0:], sel)
586 p256NegCond(t0.xyz[4:8], sign)
587 p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:])
588 p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel)
589 p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero)