* tree-vect-loop-manip.c (vect_do_peeling): Do not use
[official-gcc.git] / libgo / go / crypto / elliptic / p256_amd64.go
blob8f3db0718b35356065526ceb5b076ec8abdf4059
1 // Copyright 2015 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // This file contains the Go wrapper for the constant-time, 64-bit assembly
6 // implementation of P256. The optimizations performed here are described in
7 // detail in:
8 // S.Gueron and V.Krasnov, "Fast prime field elliptic-curve cryptography with
9 // 256-bit primes"
10 // http://link.springer.com/article/10.1007%2Fs13389-014-0090-x
11 // https://eprint.iacr.org/2013/816.pdf
13 // +build ignore
14 // +build amd64
16 package elliptic
18 import (
19 "math/big"
20 "sync"
23 type (
24 p256Curve struct {
25 *CurveParams
28 p256Point struct {
29 xyz [12]uint64
33 var (
34 p256 p256Curve
35 p256Precomputed *[37][64 * 8]uint64
36 precomputeOnce sync.Once
39 func initP256() {
40 // See FIPS 186-3, section D.2.3
41 p256.CurveParams = &CurveParams{Name: "P-256"}
42 p256.P, _ = new(big.Int).SetString("115792089210356248762697446949407573530086143415290314195533631308867097853951", 10)
43 p256.N, _ = new(big.Int).SetString("115792089210356248762697446949407573529996955224135760342422259061068512044369", 10)
44 p256.B, _ = new(big.Int).SetString("5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b", 16)
45 p256.Gx, _ = new(big.Int).SetString("6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296", 16)
46 p256.Gy, _ = new(big.Int).SetString("4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5", 16)
47 p256.BitSize = 256
50 func (curve p256Curve) Params() *CurveParams {
51 return curve.CurveParams
54 // Functions implemented in p256_asm_amd64.s
55 // Montgomery multiplication modulo P256
56 func p256Mul(res, in1, in2 []uint64)
58 // Montgomery square modulo P256
59 func p256Sqr(res, in []uint64)
61 // Montgomery multiplication by 1
62 func p256FromMont(res, in []uint64)
64 // iff cond == 1 val <- -val
65 func p256NegCond(val []uint64, cond int)
67 // if cond == 0 res <- b; else res <- a
68 func p256MovCond(res, a, b []uint64, cond int)
70 // Endianness swap
71 func p256BigToLittle(res []uint64, in []byte)
72 func p256LittleToBig(res []byte, in []uint64)
74 // Constant time table access
75 func p256Select(point, table []uint64, idx int)
76 func p256SelectBase(point, table []uint64, idx int)
78 // Montgomery multiplication modulo Ord(G)
79 func p256OrdMul(res, in1, in2 []uint64)
81 // Montgomery square modulo Ord(G), repeated n times
82 func p256OrdSqr(res, in []uint64, n int)
84 // Point add with in2 being affine point
85 // If sign == 1 -> in2 = -in2
86 // If sel == 0 -> res = in1
87 // if zero == 0 -> res = in2
88 func p256PointAddAffineAsm(res, in1, in2 []uint64, sign, sel, zero int)
90 // Point add
91 func p256PointAddAsm(res, in1, in2 []uint64)
93 // Point double
94 func p256PointDoubleAsm(res, in []uint64)
96 func (curve p256Curve) Inverse(k *big.Int) *big.Int {
97 if k.Sign() < 0 {
98 // This should never happen.
99 k = new(big.Int).Neg(k)
102 if k.Cmp(p256.N) >= 0 {
103 // This should never happen.
104 k = new(big.Int).Mod(k, p256.N)
107 // table will store precomputed powers of x. The four words at index
108 // 4×i store x^(i+1).
109 var table [4 * 15]uint64
111 x := make([]uint64, 4)
112 fromBig(x[:], k)
113 // This code operates in the Montgomery domain where R = 2^256 mod n
114 // and n is the order of the scalar field. (See initP256 for the
115 // value.) Elements in the Montgomery domain take the form a×R and
116 // multiplication of x and y in the calculates (x × y × R^-1) mod n. RR
117 // is R×R mod n thus the Montgomery multiplication x and RR gives x×R,
118 // i.e. converts x into the Montgomery domain.
119 RR := []uint64{0x83244c95be79eea2, 0x4699799c49bd6fa6, 0x2845b2392b6bec59, 0x66e12d94f3d95620}
120 p256OrdMul(table[:4], x, RR)
122 // Prepare the table, no need in constant time access, because the
123 // power is not a secret. (Entry 0 is never used.)
124 for i := 2; i < 16; i += 2 {
125 p256OrdSqr(table[4*(i-1):], table[4*((i/2)-1):], 1)
126 p256OrdMul(table[4*i:], table[4*(i-1):], table[:4])
129 x[0] = table[4*14+0] // f
130 x[1] = table[4*14+1]
131 x[2] = table[4*14+2]
132 x[3] = table[4*14+3]
134 p256OrdSqr(x, x, 4)
135 p256OrdMul(x, x, table[4*14:4*14+4]) // ff
136 t := make([]uint64, 4, 4)
137 t[0] = x[0]
138 t[1] = x[1]
139 t[2] = x[2]
140 t[3] = x[3]
142 p256OrdSqr(x, x, 8)
143 p256OrdMul(x, x, t) // ffff
144 t[0] = x[0]
145 t[1] = x[1]
146 t[2] = x[2]
147 t[3] = x[3]
149 p256OrdSqr(x, x, 16)
150 p256OrdMul(x, x, t) // ffffffff
151 t[0] = x[0]
152 t[1] = x[1]
153 t[2] = x[2]
154 t[3] = x[3]
156 p256OrdSqr(x, x, 64) // ffffffff0000000000000000
157 p256OrdMul(x, x, t) // ffffffff00000000ffffffff
158 p256OrdSqr(x, x, 32) // ffffffff00000000ffffffff00000000
159 p256OrdMul(x, x, t) // ffffffff00000000ffffffffffffffff
161 // Remaining 32 windows
162 expLo := [32]byte{0xb, 0xc, 0xe, 0x6, 0xf, 0xa, 0xa, 0xd, 0xa, 0x7, 0x1, 0x7, 0x9, 0xe, 0x8, 0x4, 0xf, 0x3, 0xb, 0x9, 0xc, 0xa, 0xc, 0x2, 0xf, 0xc, 0x6, 0x3, 0x2, 0x5, 0x4, 0xf}
163 for i := 0; i < 32; i++ {
164 p256OrdSqr(x, x, 4)
165 p256OrdMul(x, x, table[4*(expLo[i]-1):])
168 // Multiplying by one in the Montgomery domain converts a Montgomery
169 // value out of the domain.
170 one := []uint64{1, 0, 0, 0}
171 p256OrdMul(x, x, one)
173 xOut := make([]byte, 32)
174 p256LittleToBig(xOut, x)
175 return new(big.Int).SetBytes(xOut)
178 // fromBig converts a *big.Int into a format used by this code.
179 func fromBig(out []uint64, big *big.Int) {
180 for i := range out {
181 out[i] = 0
184 for i, v := range big.Bits() {
185 out[i] = uint64(v)
189 // p256GetScalar endian-swaps the big-endian scalar value from in and writes it
190 // to out. If the scalar is equal or greater than the order of the group, it's
191 // reduced modulo that order.
192 func p256GetScalar(out []uint64, in []byte) {
193 n := new(big.Int).SetBytes(in)
195 if n.Cmp(p256.N) >= 0 {
196 n.Mod(n, p256.N)
198 fromBig(out, n)
201 // p256Mul operates in a Montgomery domain with R = 2^256 mod p, where p is the
202 // underlying field of the curve. (See initP256 for the value.) Thus rr here is
203 // R×R mod p. See comment in Inverse about how this is used.
204 var rr = []uint64{0x0000000000000003, 0xfffffffbffffffff, 0xfffffffffffffffe, 0x00000004fffffffd}
206 func maybeReduceModP(in *big.Int) *big.Int {
207 if in.Cmp(p256.P) < 0 {
208 return in
210 return new(big.Int).Mod(in, p256.P)
213 func (curve p256Curve) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) {
214 scalarReversed := make([]uint64, 4)
215 var r1, r2 p256Point
216 p256GetScalar(scalarReversed, baseScalar)
217 r1.p256BaseMult(scalarReversed)
219 p256GetScalar(scalarReversed, scalar)
220 fromBig(r2.xyz[0:4], maybeReduceModP(bigX))
221 fromBig(r2.xyz[4:8], maybeReduceModP(bigY))
222 p256Mul(r2.xyz[0:4], r2.xyz[0:4], rr[:])
223 p256Mul(r2.xyz[4:8], r2.xyz[4:8], rr[:])
225 // This sets r2's Z value to 1, in the Montgomery domain.
226 r2.xyz[8] = 0x0000000000000001
227 r2.xyz[9] = 0xffffffff00000000
228 r2.xyz[10] = 0xffffffffffffffff
229 r2.xyz[11] = 0x00000000fffffffe
231 r2.p256ScalarMult(scalarReversed)
232 p256PointAddAsm(r1.xyz[:], r1.xyz[:], r2.xyz[:])
233 return r1.p256PointToAffine()
236 func (curve p256Curve) ScalarBaseMult(scalar []byte) (x, y *big.Int) {
237 scalarReversed := make([]uint64, 4)
238 p256GetScalar(scalarReversed, scalar)
240 var r p256Point
241 r.p256BaseMult(scalarReversed)
242 return r.p256PointToAffine()
245 func (curve p256Curve) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) {
246 scalarReversed := make([]uint64, 4)
247 p256GetScalar(scalarReversed, scalar)
249 var r p256Point
250 fromBig(r.xyz[0:4], maybeReduceModP(bigX))
251 fromBig(r.xyz[4:8], maybeReduceModP(bigY))
252 p256Mul(r.xyz[0:4], r.xyz[0:4], rr[:])
253 p256Mul(r.xyz[4:8], r.xyz[4:8], rr[:])
254 // This sets r2's Z value to 1, in the Montgomery domain.
255 r.xyz[8] = 0x0000000000000001
256 r.xyz[9] = 0xffffffff00000000
257 r.xyz[10] = 0xffffffffffffffff
258 r.xyz[11] = 0x00000000fffffffe
260 r.p256ScalarMult(scalarReversed)
261 return r.p256PointToAffine()
264 func (p *p256Point) p256PointToAffine() (x, y *big.Int) {
265 zInv := make([]uint64, 4)
266 zInvSq := make([]uint64, 4)
267 p256Inverse(zInv, p.xyz[8:12])
268 p256Sqr(zInvSq, zInv)
269 p256Mul(zInv, zInv, zInvSq)
271 p256Mul(zInvSq, p.xyz[0:4], zInvSq)
272 p256Mul(zInv, p.xyz[4:8], zInv)
274 p256FromMont(zInvSq, zInvSq)
275 p256FromMont(zInv, zInv)
277 xOut := make([]byte, 32)
278 yOut := make([]byte, 32)
279 p256LittleToBig(xOut, zInvSq)
280 p256LittleToBig(yOut, zInv)
282 return new(big.Int).SetBytes(xOut), new(big.Int).SetBytes(yOut)
285 // p256Inverse sets out to in^-1 mod p.
286 func p256Inverse(out, in []uint64) {
287 var stack [6 * 4]uint64
288 p2 := stack[4*0 : 4*0+4]
289 p4 := stack[4*1 : 4*1+4]
290 p8 := stack[4*2 : 4*2+4]
291 p16 := stack[4*3 : 4*3+4]
292 p32 := stack[4*4 : 4*4+4]
294 p256Sqr(out, in)
295 p256Mul(p2, out, in) // 3*p
297 p256Sqr(out, p2)
298 p256Sqr(out, out)
299 p256Mul(p4, out, p2) // f*p
301 p256Sqr(out, p4)
302 p256Sqr(out, out)
303 p256Sqr(out, out)
304 p256Sqr(out, out)
305 p256Mul(p8, out, p4) // ff*p
307 p256Sqr(out, p8)
309 for i := 0; i < 7; i++ {
310 p256Sqr(out, out)
312 p256Mul(p16, out, p8) // ffff*p
314 p256Sqr(out, p16)
315 for i := 0; i < 15; i++ {
316 p256Sqr(out, out)
318 p256Mul(p32, out, p16) // ffffffff*p
320 p256Sqr(out, p32)
322 for i := 0; i < 31; i++ {
323 p256Sqr(out, out)
325 p256Mul(out, out, in)
327 for i := 0; i < 32*4; i++ {
328 p256Sqr(out, out)
330 p256Mul(out, out, p32)
332 for i := 0; i < 32; i++ {
333 p256Sqr(out, out)
335 p256Mul(out, out, p32)
337 for i := 0; i < 16; i++ {
338 p256Sqr(out, out)
340 p256Mul(out, out, p16)
342 for i := 0; i < 8; i++ {
343 p256Sqr(out, out)
345 p256Mul(out, out, p8)
347 p256Sqr(out, out)
348 p256Sqr(out, out)
349 p256Sqr(out, out)
350 p256Sqr(out, out)
351 p256Mul(out, out, p4)
353 p256Sqr(out, out)
354 p256Sqr(out, out)
355 p256Mul(out, out, p2)
357 p256Sqr(out, out)
358 p256Sqr(out, out)
359 p256Mul(out, out, in)
362 func (p *p256Point) p256StorePoint(r *[16 * 4 * 3]uint64, index int) {
363 copy(r[index*12:], p.xyz[:])
366 func boothW5(in uint) (int, int) {
367 var s uint = ^((in >> 5) - 1)
368 var d uint = (1 << 6) - in - 1
369 d = (d & s) | (in & (^s))
370 d = (d >> 1) + (d & 1)
371 return int(d), int(s & 1)
374 func boothW7(in uint) (int, int) {
375 var s uint = ^((in >> 7) - 1)
376 var d uint = (1 << 8) - in - 1
377 d = (d & s) | (in & (^s))
378 d = (d >> 1) + (d & 1)
379 return int(d), int(s & 1)
382 func initTable() {
383 p256Precomputed = new([37][64 * 8]uint64)
385 basePoint := []uint64{
386 0x79e730d418a9143c, 0x75ba95fc5fedb601, 0x79fb732b77622510, 0x18905f76a53755c6,
387 0xddf25357ce95560a, 0x8b4ab8e4ba19e45c, 0xd2e88688dd21f325, 0x8571ff1825885d85,
388 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe,
390 t1 := make([]uint64, 12)
391 t2 := make([]uint64, 12)
392 copy(t2, basePoint)
394 zInv := make([]uint64, 4)
395 zInvSq := make([]uint64, 4)
396 for j := 0; j < 64; j++ {
397 copy(t1, t2)
398 for i := 0; i < 37; i++ {
399 // The window size is 7 so we need to double 7 times.
400 if i != 0 {
401 for k := 0; k < 7; k++ {
402 p256PointDoubleAsm(t1, t1)
405 // Convert the point to affine form. (Its values are
406 // still in Montgomery form however.)
407 p256Inverse(zInv, t1[8:12])
408 p256Sqr(zInvSq, zInv)
409 p256Mul(zInv, zInv, zInvSq)
411 p256Mul(t1[:4], t1[:4], zInvSq)
412 p256Mul(t1[4:8], t1[4:8], zInv)
414 copy(t1[8:12], basePoint[8:12])
415 // Update the table entry
416 copy(p256Precomputed[i][j*8:], t1[:8])
418 if j == 0 {
419 p256PointDoubleAsm(t2, basePoint)
420 } else {
421 p256PointAddAsm(t2, t2, basePoint)
426 func (p *p256Point) p256BaseMult(scalar []uint64) {
427 precomputeOnce.Do(initTable)
429 wvalue := (scalar[0] << 1) & 0xff
430 sel, sign := boothW7(uint(wvalue))
431 p256SelectBase(p.xyz[0:8], p256Precomputed[0][0:], sel)
432 p256NegCond(p.xyz[4:8], sign)
434 // (This is one, in the Montgomery domain.)
435 p.xyz[8] = 0x0000000000000001
436 p.xyz[9] = 0xffffffff00000000
437 p.xyz[10] = 0xffffffffffffffff
438 p.xyz[11] = 0x00000000fffffffe
440 var t0 p256Point
441 // (This is one, in the Montgomery domain.)
442 t0.xyz[8] = 0x0000000000000001
443 t0.xyz[9] = 0xffffffff00000000
444 t0.xyz[10] = 0xffffffffffffffff
445 t0.xyz[11] = 0x00000000fffffffe
447 index := uint(6)
448 zero := sel
450 for i := 1; i < 37; i++ {
451 if index < 192 {
452 wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0xff
453 } else {
454 wvalue = (scalar[index/64] >> (index % 64)) & 0xff
456 index += 7
457 sel, sign = boothW7(uint(wvalue))
458 p256SelectBase(t0.xyz[0:8], p256Precomputed[i][0:], sel)
459 p256PointAddAffineAsm(p.xyz[0:12], p.xyz[0:12], t0.xyz[0:8], sign, sel, zero)
460 zero |= sel
464 func (p *p256Point) p256ScalarMult(scalar []uint64) {
465 // precomp is a table of precomputed points that stores powers of p
466 // from p^1 to p^16.
467 var precomp [16 * 4 * 3]uint64
468 var t0, t1, t2, t3 p256Point
470 // Prepare the table
471 p.p256StorePoint(&precomp, 0) // 1
473 p256PointDoubleAsm(t0.xyz[:], p.xyz[:])
474 p256PointDoubleAsm(t1.xyz[:], t0.xyz[:])
475 p256PointDoubleAsm(t2.xyz[:], t1.xyz[:])
476 p256PointDoubleAsm(t3.xyz[:], t2.xyz[:])
477 t0.p256StorePoint(&precomp, 1) // 2
478 t1.p256StorePoint(&precomp, 3) // 4
479 t2.p256StorePoint(&precomp, 7) // 8
480 t3.p256StorePoint(&precomp, 15) // 16
482 p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:])
483 p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:])
484 p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:])
485 t0.p256StorePoint(&precomp, 2) // 3
486 t1.p256StorePoint(&precomp, 4) // 5
487 t2.p256StorePoint(&precomp, 8) // 9
489 p256PointDoubleAsm(t0.xyz[:], t0.xyz[:])
490 p256PointDoubleAsm(t1.xyz[:], t1.xyz[:])
491 t0.p256StorePoint(&precomp, 5) // 6
492 t1.p256StorePoint(&precomp, 9) // 10
494 p256PointAddAsm(t2.xyz[:], t0.xyz[:], p.xyz[:])
495 p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:])
496 t2.p256StorePoint(&precomp, 6) // 7
497 t1.p256StorePoint(&precomp, 10) // 11
499 p256PointDoubleAsm(t0.xyz[:], t0.xyz[:])
500 p256PointDoubleAsm(t2.xyz[:], t2.xyz[:])
501 t0.p256StorePoint(&precomp, 11) // 12
502 t2.p256StorePoint(&precomp, 13) // 14
504 p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:])
505 p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:])
506 t0.p256StorePoint(&precomp, 12) // 13
507 t2.p256StorePoint(&precomp, 14) // 15
509 // Start scanning the window from top bit
510 index := uint(254)
511 var sel, sign int
513 wvalue := (scalar[index/64] >> (index % 64)) & 0x3f
514 sel, _ = boothW5(uint(wvalue))
516 p256Select(p.xyz[0:12], precomp[0:], sel)
517 zero := sel
519 for index > 4 {
520 index -= 5
521 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
522 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
523 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
524 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
525 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
527 if index < 192 {
528 wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x3f
529 } else {
530 wvalue = (scalar[index/64] >> (index % 64)) & 0x3f
533 sel, sign = boothW5(uint(wvalue))
535 p256Select(t0.xyz[0:], precomp[0:], sel)
536 p256NegCond(t0.xyz[4:8], sign)
537 p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:])
538 p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel)
539 p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero)
540 zero |= sel
543 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
544 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
545 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
546 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
547 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
549 wvalue = (scalar[0] << 1) & 0x3f
550 sel, sign = boothW5(uint(wvalue))
552 p256Select(t0.xyz[0:], precomp[0:], sel)
553 p256NegCond(t0.xyz[4:8], sign)
554 p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:])
555 p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel)
556 p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero)