* tree-vect-loop-manip.c (vect_do_peeling): Do not use
[official-gcc.git] / libgo / go / crypto / elliptic / p256_s390x.go
blob5f99e71e5d60576fa9aa0b7fa5b502ef0075bcd2
1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // +build ignore
6 // -build s390x
8 package elliptic
10 import (
11 "math/big"
14 type p256CurveFast struct {
15 *CurveParams
18 type p256Point struct {
19 x [32]byte
20 y [32]byte
21 z [32]byte
24 var (
25 p256 Curve
26 p256PreFast *[37][64]p256Point
29 // hasVectorFacility reports whether the machine has the z/Architecture
30 // vector facility installed and enabled.
31 func hasVectorFacility() bool
33 var hasVX = hasVectorFacility()
35 func initP256Arch() {
36 if hasVX {
37 p256 = p256CurveFast{p256Params}
38 initTable()
39 return
42 // No vector support, use pure Go implementation.
43 p256 = p256Curve{p256Params}
44 return
47 func (curve p256CurveFast) Params() *CurveParams {
48 return curve.CurveParams
51 // Functions implemented in p256_asm_s390x.s
52 // Montgomery multiplication modulo P256
53 func p256MulAsm(res, in1, in2 []byte)
55 // Montgomery square modulo P256
56 func p256Sqr(res, in []byte) {
57 p256MulAsm(res, in, in)
60 // Montgomery multiplication by 1
61 func p256FromMont(res, in []byte)
63 // iff cond == 1 val <- -val
64 func p256NegCond(val *p256Point, cond int)
66 // if cond == 0 res <- b; else res <- a
67 func p256MovCond(res, a, b *p256Point, cond int)
69 // Constant time table access
70 func p256Select(point *p256Point, table []p256Point, idx int)
71 func p256SelectBase(point *p256Point, table []p256Point, idx int)
73 // Montgomery multiplication modulo Ord(G)
74 func p256OrdMul(res, in1, in2 []byte)
76 // Montgomery square modulo Ord(G), repeated n times
77 func p256OrdSqr(res, in []byte, n int) {
78 copy(res, in)
79 for i := 0; i < n; i += 1 {
80 p256OrdMul(res, res, res)
84 // Point add with P2 being affine point
85 // If sign == 1 -> P2 = -P2
86 // If sel == 0 -> P3 = P1
87 // if zero == 0 -> P3 = P2
88 func p256PointAddAffineAsm(P3, P1, P2 *p256Point, sign, sel, zero int)
90 // Point add
91 func p256PointAddAsm(P3, P1, P2 *p256Point)
92 func p256PointDoubleAsm(P3, P1 *p256Point)
94 func (curve p256CurveFast) Inverse(k *big.Int) *big.Int {
95 if k.Cmp(p256Params.N) >= 0 {
96 // This should never happen.
97 reducedK := new(big.Int).Mod(k, p256Params.N)
98 k = reducedK
101 // table will store precomputed powers of x. The 32 bytes at index
102 // i store x^(i+1).
103 var table [15][32]byte
105 x := fromBig(k)
106 // This code operates in the Montgomery domain where R = 2^256 mod n
107 // and n is the order of the scalar field. (See initP256 for the
108 // value.) Elements in the Montgomery domain take the form a×R and
109 // multiplication of x and y in the calculates (x × y × R^-1) mod n. RR
110 // is R×R mod n thus the Montgomery multiplication x and RR gives x×R,
111 // i.e. converts x into the Montgomery domain. Stored in BigEndian form
112 RR := []byte{0x66, 0xe1, 0x2d, 0x94, 0xf3, 0xd9, 0x56, 0x20, 0x28, 0x45, 0xb2, 0x39, 0x2b, 0x6b, 0xec, 0x59,
113 0x46, 0x99, 0x79, 0x9c, 0x49, 0xbd, 0x6f, 0xa6, 0x83, 0x24, 0x4c, 0x95, 0xbe, 0x79, 0xee, 0xa2}
115 p256OrdMul(table[0][:], x, RR)
117 // Prepare the table, no need in constant time access, because the
118 // power is not a secret. (Entry 0 is never used.)
119 for i := 2; i < 16; i += 2 {
120 p256OrdSqr(table[i-1][:], table[(i/2)-1][:], 1)
121 p256OrdMul(table[i][:], table[i-1][:], table[0][:])
124 copy(x, table[14][:]) // f
126 p256OrdSqr(x[0:32], x[0:32], 4)
127 p256OrdMul(x[0:32], x[0:32], table[14][:]) // ff
128 t := make([]byte, 32)
129 copy(t, x)
131 p256OrdSqr(x, x, 8)
132 p256OrdMul(x, x, t) // ffff
133 copy(t, x)
135 p256OrdSqr(x, x, 16)
136 p256OrdMul(x, x, t) // ffffffff
137 copy(t, x)
139 p256OrdSqr(x, x, 64) // ffffffff0000000000000000
140 p256OrdMul(x, x, t) // ffffffff00000000ffffffff
141 p256OrdSqr(x, x, 32) // ffffffff00000000ffffffff00000000
142 p256OrdMul(x, x, t) // ffffffff00000000ffffffffffffffff
144 // Remaining 32 windows
145 expLo := [32]byte{0xb, 0xc, 0xe, 0x6, 0xf, 0xa, 0xa, 0xd, 0xa, 0x7, 0x1, 0x7, 0x9, 0xe, 0x8, 0x4,
146 0xf, 0x3, 0xb, 0x9, 0xc, 0xa, 0xc, 0x2, 0xf, 0xc, 0x6, 0x3, 0x2, 0x5, 0x4, 0xf}
147 for i := 0; i < 32; i++ {
148 p256OrdSqr(x, x, 4)
149 p256OrdMul(x, x, table[expLo[i]-1][:])
152 // Multiplying by one in the Montgomery domain converts a Montgomery
153 // value out of the domain.
154 one := []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
155 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}
156 p256OrdMul(x, x, one)
158 return new(big.Int).SetBytes(x)
161 // fromBig converts a *big.Int into a format used by this code.
162 func fromBig(big *big.Int) []byte {
163 // This could be done a lot more efficiently...
164 res := big.Bytes()
165 if 32 == len(res) {
166 return res
168 t := make([]byte, 32)
169 offset := 32 - len(res)
170 for i := len(res) - 1; i >= 0; i-- {
171 t[i+offset] = res[i]
173 return t
176 // p256GetMultiplier makes sure byte array will have 32 byte elements, If the scalar
177 // is equal or greater than the order of the group, it's reduced modulo that order.
178 func p256GetMultiplier(in []byte) []byte {
179 n := new(big.Int).SetBytes(in)
181 if n.Cmp(p256Params.N) >= 0 {
182 n.Mod(n, p256Params.N)
184 return fromBig(n)
187 // p256MulAsm operates in a Montgomery domain with R = 2^256 mod p, where p is the
188 // underlying field of the curve. (See initP256 for the value.) Thus rr here is
189 // R×R mod p. See comment in Inverse about how this is used.
190 var rr = []byte{0x00, 0x00, 0x00, 0x04, 0xff, 0xff, 0xff, 0xfd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe,
191 0xff, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03}
193 // (This is one, in the Montgomery domain.)
194 var one = []byte{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
195 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}
197 func maybeReduceModP(in *big.Int) *big.Int {
198 if in.Cmp(p256Params.P) < 0 {
199 return in
201 return new(big.Int).Mod(in, p256Params.P)
204 func (curve p256CurveFast) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) {
205 var r1, r2 p256Point
206 r1.p256BaseMult(p256GetMultiplier(baseScalar))
208 copy(r2.x[:], fromBig(maybeReduceModP(bigX)))
209 copy(r2.y[:], fromBig(maybeReduceModP(bigY)))
210 copy(r2.z[:], one)
211 p256MulAsm(r2.x[:], r2.x[:], rr[:])
212 p256MulAsm(r2.y[:], r2.y[:], rr[:])
214 r2.p256ScalarMult(p256GetMultiplier(scalar))
215 p256PointAddAsm(&r1, &r1, &r2)
216 return r1.p256PointToAffine()
219 func (curve p256CurveFast) ScalarBaseMult(scalar []byte) (x, y *big.Int) {
220 var r p256Point
221 r.p256BaseMult(p256GetMultiplier(scalar))
222 return r.p256PointToAffine()
225 func (curve p256CurveFast) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) {
226 var r p256Point
227 copy(r.x[:], fromBig(maybeReduceModP(bigX)))
228 copy(r.y[:], fromBig(maybeReduceModP(bigY)))
229 copy(r.z[:], one)
230 p256MulAsm(r.x[:], r.x[:], rr[:])
231 p256MulAsm(r.y[:], r.y[:], rr[:])
232 r.p256ScalarMult(p256GetMultiplier(scalar))
233 return r.p256PointToAffine()
236 func (p *p256Point) p256PointToAffine() (x, y *big.Int) {
237 zInv := make([]byte, 32)
238 zInvSq := make([]byte, 32)
240 p256Inverse(zInv, p.z[:])
241 p256Sqr(zInvSq, zInv)
242 p256MulAsm(zInv, zInv, zInvSq)
244 p256MulAsm(zInvSq, p.x[:], zInvSq)
245 p256MulAsm(zInv, p.y[:], zInv)
247 p256FromMont(zInvSq, zInvSq)
248 p256FromMont(zInv, zInv)
250 return new(big.Int).SetBytes(zInvSq), new(big.Int).SetBytes(zInv)
253 // p256Inverse sets out to in^-1 mod p.
254 func p256Inverse(out, in []byte) {
255 var stack [6 * 32]byte
256 p2 := stack[32*0 : 32*0+32]
257 p4 := stack[32*1 : 32*1+32]
258 p8 := stack[32*2 : 32*2+32]
259 p16 := stack[32*3 : 32*3+32]
260 p32 := stack[32*4 : 32*4+32]
262 p256Sqr(out, in)
263 p256MulAsm(p2, out, in) // 3*p
265 p256Sqr(out, p2)
266 p256Sqr(out, out)
267 p256MulAsm(p4, out, p2) // f*p
269 p256Sqr(out, p4)
270 p256Sqr(out, out)
271 p256Sqr(out, out)
272 p256Sqr(out, out)
273 p256MulAsm(p8, out, p4) // ff*p
275 p256Sqr(out, p8)
277 for i := 0; i < 7; i++ {
278 p256Sqr(out, out)
280 p256MulAsm(p16, out, p8) // ffff*p
282 p256Sqr(out, p16)
283 for i := 0; i < 15; i++ {
284 p256Sqr(out, out)
286 p256MulAsm(p32, out, p16) // ffffffff*p
288 p256Sqr(out, p32)
290 for i := 0; i < 31; i++ {
291 p256Sqr(out, out)
293 p256MulAsm(out, out, in)
295 for i := 0; i < 32*4; i++ {
296 p256Sqr(out, out)
298 p256MulAsm(out, out, p32)
300 for i := 0; i < 32; i++ {
301 p256Sqr(out, out)
303 p256MulAsm(out, out, p32)
305 for i := 0; i < 16; i++ {
306 p256Sqr(out, out)
308 p256MulAsm(out, out, p16)
310 for i := 0; i < 8; i++ {
311 p256Sqr(out, out)
313 p256MulAsm(out, out, p8)
315 p256Sqr(out, out)
316 p256Sqr(out, out)
317 p256Sqr(out, out)
318 p256Sqr(out, out)
319 p256MulAsm(out, out, p4)
321 p256Sqr(out, out)
322 p256Sqr(out, out)
323 p256MulAsm(out, out, p2)
325 p256Sqr(out, out)
326 p256Sqr(out, out)
327 p256MulAsm(out, out, in)
330 func boothW5(in uint) (int, int) {
331 var s uint = ^((in >> 5) - 1)
332 var d uint = (1 << 6) - in - 1
333 d = (d & s) | (in & (^s))
334 d = (d >> 1) + (d & 1)
335 return int(d), int(s & 1)
338 func boothW7(in uint) (int, int) {
339 var s uint = ^((in >> 7) - 1)
340 var d uint = (1 << 8) - in - 1
341 d = (d & s) | (in & (^s))
342 d = (d >> 1) + (d & 1)
343 return int(d), int(s & 1)
346 func initTable() {
347 p256PreFast = new([37][64]p256Point) //z coordinate not used
348 basePoint := p256Point{
349 x: [32]byte{0x18, 0x90, 0x5f, 0x76, 0xa5, 0x37, 0x55, 0xc6, 0x79, 0xfb, 0x73, 0x2b, 0x77, 0x62, 0x25, 0x10,
350 0x75, 0xba, 0x95, 0xfc, 0x5f, 0xed, 0xb6, 0x01, 0x79, 0xe7, 0x30, 0xd4, 0x18, 0xa9, 0x14, 0x3c}, //(p256.x*2^256)%p
351 y: [32]byte{0x85, 0x71, 0xff, 0x18, 0x25, 0x88, 0x5d, 0x85, 0xd2, 0xe8, 0x86, 0x88, 0xdd, 0x21, 0xf3, 0x25,
352 0x8b, 0x4a, 0xb8, 0xe4, 0xba, 0x19, 0xe4, 0x5c, 0xdd, 0xf2, 0x53, 0x57, 0xce, 0x95, 0x56, 0x0a}, //(p256.y*2^256)%p
353 z: [32]byte{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
354 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}, //(p256.z*2^256)%p
357 t1 := new(p256Point)
358 t2 := new(p256Point)
359 *t2 = basePoint
361 zInv := make([]byte, 32)
362 zInvSq := make([]byte, 32)
363 for j := 0; j < 64; j++ {
364 *t1 = *t2
365 for i := 0; i < 37; i++ {
366 // The window size is 7 so we need to double 7 times.
367 if i != 0 {
368 for k := 0; k < 7; k++ {
369 p256PointDoubleAsm(t1, t1)
372 // Convert the point to affine form. (Its values are
373 // still in Montgomery form however.)
374 p256Inverse(zInv, t1.z[:])
375 p256Sqr(zInvSq, zInv)
376 p256MulAsm(zInv, zInv, zInvSq)
378 p256MulAsm(t1.x[:], t1.x[:], zInvSq)
379 p256MulAsm(t1.y[:], t1.y[:], zInv)
381 copy(t1.z[:], basePoint.z[:])
382 // Update the table entry
383 copy(p256PreFast[i][j].x[:], t1.x[:])
384 copy(p256PreFast[i][j].y[:], t1.y[:])
386 if j == 0 {
387 p256PointDoubleAsm(t2, &basePoint)
388 } else {
389 p256PointAddAsm(t2, t2, &basePoint)
394 func (p *p256Point) p256BaseMult(scalar []byte) {
395 wvalue := (uint(scalar[31]) << 1) & 0xff
396 sel, sign := boothW7(uint(wvalue))
397 p256SelectBase(p, p256PreFast[0][:], sel)
398 p256NegCond(p, sign)
400 copy(p.z[:], one[:])
401 var t0 p256Point
403 copy(t0.z[:], one[:])
405 index := uint(6)
406 zero := sel
408 for i := 1; i < 37; i++ {
409 if index < 247 {
410 wvalue = ((uint(scalar[31-index/8]) >> (index % 8)) + (uint(scalar[31-index/8-1]) << (8 - (index % 8)))) & 0xff
411 } else {
412 wvalue = (uint(scalar[31-index/8]) >> (index % 8)) & 0xff
414 index += 7
415 sel, sign = boothW7(uint(wvalue))
416 p256SelectBase(&t0, p256PreFast[i][:], sel)
417 p256PointAddAffineAsm(p, p, &t0, sign, sel, zero)
418 zero |= sel
422 func (p *p256Point) p256ScalarMult(scalar []byte) {
423 // precomp is a table of precomputed points that stores powers of p
424 // from p^1 to p^16.
425 var precomp [16]p256Point
426 var t0, t1, t2, t3 p256Point
428 // Prepare the table
429 *&precomp[0] = *p
431 p256PointDoubleAsm(&t0, p)
432 p256PointDoubleAsm(&t1, &t0)
433 p256PointDoubleAsm(&t2, &t1)
434 p256PointDoubleAsm(&t3, &t2)
435 *&precomp[1] = t0 // 2
436 *&precomp[3] = t1 // 4
437 *&precomp[7] = t2 // 8
438 *&precomp[15] = t3 // 16
440 p256PointAddAsm(&t0, &t0, p)
441 p256PointAddAsm(&t1, &t1, p)
442 p256PointAddAsm(&t2, &t2, p)
443 *&precomp[2] = t0 // 3
444 *&precomp[4] = t1 // 5
445 *&precomp[8] = t2 // 9
447 p256PointDoubleAsm(&t0, &t0)
448 p256PointDoubleAsm(&t1, &t1)
449 *&precomp[5] = t0 // 6
450 *&precomp[9] = t1 // 10
452 p256PointAddAsm(&t2, &t0, p)
453 p256PointAddAsm(&t1, &t1, p)
454 *&precomp[6] = t2 // 7
455 *&precomp[10] = t1 // 11
457 p256PointDoubleAsm(&t0, &t0)
458 p256PointDoubleAsm(&t2, &t2)
459 *&precomp[11] = t0 // 12
460 *&precomp[13] = t2 // 14
462 p256PointAddAsm(&t0, &t0, p)
463 p256PointAddAsm(&t2, &t2, p)
464 *&precomp[12] = t0 // 13
465 *&precomp[14] = t2 // 15
467 // Start scanning the window from top bit
468 index := uint(254)
469 var sel, sign int
471 wvalue := (uint(scalar[31-index/8]) >> (index % 8)) & 0x3f
472 sel, _ = boothW5(uint(wvalue))
473 p256Select(p, precomp[:], sel)
474 zero := sel
476 for index > 4 {
477 index -= 5
478 p256PointDoubleAsm(p, p)
479 p256PointDoubleAsm(p, p)
480 p256PointDoubleAsm(p, p)
481 p256PointDoubleAsm(p, p)
482 p256PointDoubleAsm(p, p)
484 if index < 247 {
485 wvalue = ((uint(scalar[31-index/8]) >> (index % 8)) + (uint(scalar[31-index/8-1]) << (8 - (index % 8)))) & 0x3f
486 } else {
487 wvalue = (uint(scalar[31-index/8]) >> (index % 8)) & 0x3f
490 sel, sign = boothW5(uint(wvalue))
492 p256Select(&t0, precomp[:], sel)
493 p256NegCond(&t0, sign)
494 p256PointAddAsm(&t1, p, &t0)
495 p256MovCond(&t1, &t1, p, sel)
496 p256MovCond(p, &t1, &t0, zero)
497 zero |= sel
500 p256PointDoubleAsm(p, p)
501 p256PointDoubleAsm(p, p)
502 p256PointDoubleAsm(p, p)
503 p256PointDoubleAsm(p, p)
504 p256PointDoubleAsm(p, p)
506 wvalue = (uint(scalar[31]) << 1) & 0x3f
507 sel, sign = boothW5(uint(wvalue))
509 p256Select(&t0, precomp[:], sel)
510 p256NegCond(&t0, sign)
511 p256PointAddAsm(&t1, p, &t0)
512 p256MovCond(&t1, &t1, p, sel)
513 p256MovCond(p, &t1, &t0, zero)