1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
14 type p256CurveFast
struct {
18 type p256Point
struct {
26 p256PreFast
*[37][64]p256Point
29 // hasVectorFacility reports whether the machine has the z/Architecture
30 // vector facility installed and enabled.
31 func hasVectorFacility() bool
33 var hasVX
= hasVectorFacility()
37 p256
= p256CurveFast
{p256Params
}
42 // No vector support, use pure Go implementation.
43 p256
= p256Curve
{p256Params
}
47 func (curve p256CurveFast
) Params() *CurveParams
{
48 return curve
.CurveParams
51 // Functions implemented in p256_asm_s390x.s
52 // Montgomery multiplication modulo P256
53 func p256MulAsm(res
, in1
, in2
[]byte)
55 // Montgomery square modulo P256
56 func p256Sqr(res
, in
[]byte) {
57 p256MulAsm(res
, in
, in
)
60 // Montgomery multiplication by 1
61 func p256FromMont(res
, in
[]byte)
63 // iff cond == 1 val <- -val
64 func p256NegCond(val
*p256Point
, cond
int)
66 // if cond == 0 res <- b; else res <- a
67 func p256MovCond(res
, a
, b
*p256Point
, cond
int)
69 // Constant time table access
70 func p256Select(point
*p256Point
, table
[]p256Point
, idx
int)
71 func p256SelectBase(point
*p256Point
, table
[]p256Point
, idx
int)
73 // Montgomery multiplication modulo Ord(G)
74 func p256OrdMul(res
, in1
, in2
[]byte)
76 // Montgomery square modulo Ord(G), repeated n times
77 func p256OrdSqr(res
, in
[]byte, n
int) {
79 for i
:= 0; i
< n
; i
+= 1 {
80 p256OrdMul(res
, res
, res
)
84 // Point add with P2 being affine point
85 // If sign == 1 -> P2 = -P2
86 // If sel == 0 -> P3 = P1
87 // if zero == 0 -> P3 = P2
88 func p256PointAddAffineAsm(P3
, P1
, P2
*p256Point
, sign
, sel
, zero
int)
91 func p256PointAddAsm(P3
, P1
, P2
*p256Point
)
92 func p256PointDoubleAsm(P3
, P1
*p256Point
)
94 func (curve p256CurveFast
) Inverse(k
*big
.Int
) *big
.Int
{
95 if k
.Cmp(p256Params
.N
) >= 0 {
96 // This should never happen.
97 reducedK
:= new(big
.Int
).Mod(k
, p256Params
.N
)
101 // table will store precomputed powers of x. The 32 bytes at index
103 var table
[15][32]byte
106 // This code operates in the Montgomery domain where R = 2^256 mod n
107 // and n is the order of the scalar field. (See initP256 for the
108 // value.) Elements in the Montgomery domain take the form a×R and
109 // multiplication of x and y in the calculates (x × y × R^-1) mod n. RR
110 // is R×R mod n thus the Montgomery multiplication x and RR gives x×R,
111 // i.e. converts x into the Montgomery domain. Stored in BigEndian form
112 RR
:= []byte{0x66, 0xe1, 0x2d, 0x94, 0xf3, 0xd9, 0x56, 0x20, 0x28, 0x45, 0xb2, 0x39, 0x2b, 0x6b, 0xec, 0x59,
113 0x46, 0x99, 0x79, 0x9c, 0x49, 0xbd, 0x6f, 0xa6, 0x83, 0x24, 0x4c, 0x95, 0xbe, 0x79, 0xee, 0xa2}
115 p256OrdMul(table
[0][:], x
, RR
)
117 // Prepare the table, no need in constant time access, because the
118 // power is not a secret. (Entry 0 is never used.)
119 for i
:= 2; i
< 16; i
+= 2 {
120 p256OrdSqr(table
[i
-1][:], table
[(i
/2)-1][:], 1)
121 p256OrdMul(table
[i
][:], table
[i
-1][:], table
[0][:])
124 copy(x
, table
[14][:]) // f
126 p256OrdSqr(x
[0:32], x
[0:32], 4)
127 p256OrdMul(x
[0:32], x
[0:32], table
[14][:]) // ff
128 t
:= make([]byte, 32)
132 p256OrdMul(x
, x
, t
) // ffff
136 p256OrdMul(x
, x
, t
) // ffffffff
139 p256OrdSqr(x
, x
, 64) // ffffffff0000000000000000
140 p256OrdMul(x
, x
, t
) // ffffffff00000000ffffffff
141 p256OrdSqr(x
, x
, 32) // ffffffff00000000ffffffff00000000
142 p256OrdMul(x
, x
, t
) // ffffffff00000000ffffffffffffffff
144 // Remaining 32 windows
145 expLo
:= [32]byte{0xb, 0xc, 0xe, 0x6, 0xf, 0xa, 0xa, 0xd, 0xa, 0x7, 0x1, 0x7, 0x9, 0xe, 0x8, 0x4,
146 0xf, 0x3, 0xb, 0x9, 0xc, 0xa, 0xc, 0x2, 0xf, 0xc, 0x6, 0x3, 0x2, 0x5, 0x4, 0xf}
147 for i
:= 0; i
< 32; i
++ {
149 p256OrdMul(x
, x
, table
[expLo
[i
]-1][:])
152 // Multiplying by one in the Montgomery domain converts a Montgomery
153 // value out of the domain.
154 one
:= []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
155 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}
156 p256OrdMul(x
, x
, one
)
158 return new(big
.Int
).SetBytes(x
)
161 // fromBig converts a *big.Int into a format used by this code.
162 func fromBig(big
*big
.Int
) []byte {
163 // This could be done a lot more efficiently...
168 t
:= make([]byte, 32)
169 offset
:= 32 - len(res
)
170 for i
:= len(res
) - 1; i
>= 0; i
-- {
176 // p256GetMultiplier makes sure byte array will have 32 byte elements, If the scalar
177 // is equal or greater than the order of the group, it's reduced modulo that order.
178 func p256GetMultiplier(in
[]byte) []byte {
179 n
:= new(big
.Int
).SetBytes(in
)
181 if n
.Cmp(p256Params
.N
) >= 0 {
182 n
.Mod(n
, p256Params
.N
)
187 // p256MulAsm operates in a Montgomery domain with R = 2^256 mod p, where p is the
188 // underlying field of the curve. (See initP256 for the value.) Thus rr here is
189 // R×R mod p. See comment in Inverse about how this is used.
190 var rr
= []byte{0x00, 0x00, 0x00, 0x04, 0xff, 0xff, 0xff, 0xfd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe,
191 0xff, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03}
193 // (This is one, in the Montgomery domain.)
194 var one
= []byte{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
195 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}
197 func maybeReduceModP(in
*big
.Int
) *big
.Int
{
198 if in
.Cmp(p256Params
.P
) < 0 {
201 return new(big
.Int
).Mod(in
, p256Params
.P
)
204 func (curve p256CurveFast
) CombinedMult(bigX
, bigY
*big
.Int
, baseScalar
, scalar
[]byte) (x
, y
*big
.Int
) {
206 r1
.p256BaseMult(p256GetMultiplier(baseScalar
))
208 copy(r2
.x
[:], fromBig(maybeReduceModP(bigX
)))
209 copy(r2
.y
[:], fromBig(maybeReduceModP(bigY
)))
211 p256MulAsm(r2
.x
[:], r2
.x
[:], rr
[:])
212 p256MulAsm(r2
.y
[:], r2
.y
[:], rr
[:])
214 r2
.p256ScalarMult(p256GetMultiplier(scalar
))
215 p256PointAddAsm(&r1
, &r1
, &r2
)
216 return r1
.p256PointToAffine()
219 func (curve p256CurveFast
) ScalarBaseMult(scalar
[]byte) (x
, y
*big
.Int
) {
221 r
.p256BaseMult(p256GetMultiplier(scalar
))
222 return r
.p256PointToAffine()
225 func (curve p256CurveFast
) ScalarMult(bigX
, bigY
*big
.Int
, scalar
[]byte) (x
, y
*big
.Int
) {
227 copy(r
.x
[:], fromBig(maybeReduceModP(bigX
)))
228 copy(r
.y
[:], fromBig(maybeReduceModP(bigY
)))
230 p256MulAsm(r
.x
[:], r
.x
[:], rr
[:])
231 p256MulAsm(r
.y
[:], r
.y
[:], rr
[:])
232 r
.p256ScalarMult(p256GetMultiplier(scalar
))
233 return r
.p256PointToAffine()
236 func (p
*p256Point
) p256PointToAffine() (x
, y
*big
.Int
) {
237 zInv
:= make([]byte, 32)
238 zInvSq
:= make([]byte, 32)
240 p256Inverse(zInv
, p
.z
[:])
241 p256Sqr(zInvSq
, zInv
)
242 p256MulAsm(zInv
, zInv
, zInvSq
)
244 p256MulAsm(zInvSq
, p
.x
[:], zInvSq
)
245 p256MulAsm(zInv
, p
.y
[:], zInv
)
247 p256FromMont(zInvSq
, zInvSq
)
248 p256FromMont(zInv
, zInv
)
250 return new(big
.Int
).SetBytes(zInvSq
), new(big
.Int
).SetBytes(zInv
)
253 // p256Inverse sets out to in^-1 mod p.
254 func p256Inverse(out
, in
[]byte) {
255 var stack
[6 * 32]byte
256 p2
:= stack
[32*0 : 32*0+32]
257 p4
:= stack
[32*1 : 32*1+32]
258 p8
:= stack
[32*2 : 32*2+32]
259 p16
:= stack
[32*3 : 32*3+32]
260 p32
:= stack
[32*4 : 32*4+32]
263 p256MulAsm(p2
, out
, in
) // 3*p
267 p256MulAsm(p4
, out
, p2
) // f*p
273 p256MulAsm(p8
, out
, p4
) // ff*p
277 for i
:= 0; i
< 7; i
++ {
280 p256MulAsm(p16
, out
, p8
) // ffff*p
283 for i
:= 0; i
< 15; i
++ {
286 p256MulAsm(p32
, out
, p16
) // ffffffff*p
290 for i
:= 0; i
< 31; i
++ {
293 p256MulAsm(out
, out
, in
)
295 for i
:= 0; i
< 32*4; i
++ {
298 p256MulAsm(out
, out
, p32
)
300 for i
:= 0; i
< 32; i
++ {
303 p256MulAsm(out
, out
, p32
)
305 for i
:= 0; i
< 16; i
++ {
308 p256MulAsm(out
, out
, p16
)
310 for i
:= 0; i
< 8; i
++ {
313 p256MulAsm(out
, out
, p8
)
319 p256MulAsm(out
, out
, p4
)
323 p256MulAsm(out
, out
, p2
)
327 p256MulAsm(out
, out
, in
)
330 func boothW5(in
uint) (int, int) {
331 var s
uint = ^((in
>> 5) - 1)
332 var d
uint = (1 << 6) - in
- 1
333 d
= (d
& s
) |
(in
& (^s
))
334 d
= (d
>> 1) + (d
& 1)
335 return int(d
), int(s
& 1)
338 func boothW7(in
uint) (int, int) {
339 var s
uint = ^((in
>> 7) - 1)
340 var d
uint = (1 << 8) - in
- 1
341 d
= (d
& s
) |
(in
& (^s
))
342 d
= (d
>> 1) + (d
& 1)
343 return int(d
), int(s
& 1)
347 p256PreFast
= new([37][64]p256Point
) //z coordinate not used
348 basePoint
:= p256Point
{
349 x
: [32]byte{0x18, 0x90, 0x5f, 0x76, 0xa5, 0x37, 0x55, 0xc6, 0x79, 0xfb, 0x73, 0x2b, 0x77, 0x62, 0x25, 0x10,
350 0x75, 0xba, 0x95, 0xfc, 0x5f, 0xed, 0xb6, 0x01, 0x79, 0xe7, 0x30, 0xd4, 0x18, 0xa9, 0x14, 0x3c}, //(p256.x*2^256)%p
351 y
: [32]byte{0x85, 0x71, 0xff, 0x18, 0x25, 0x88, 0x5d, 0x85, 0xd2, 0xe8, 0x86, 0x88, 0xdd, 0x21, 0xf3, 0x25,
352 0x8b, 0x4a, 0xb8, 0xe4, 0xba, 0x19, 0xe4, 0x5c, 0xdd, 0xf2, 0x53, 0x57, 0xce, 0x95, 0x56, 0x0a}, //(p256.y*2^256)%p
353 z
: [32]byte{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
354 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}, //(p256.z*2^256)%p
361 zInv
:= make([]byte, 32)
362 zInvSq
:= make([]byte, 32)
363 for j
:= 0; j
< 64; j
++ {
365 for i
:= 0; i
< 37; i
++ {
366 // The window size is 7 so we need to double 7 times.
368 for k
:= 0; k
< 7; k
++ {
369 p256PointDoubleAsm(t1
, t1
)
372 // Convert the point to affine form. (Its values are
373 // still in Montgomery form however.)
374 p256Inverse(zInv
, t1
.z
[:])
375 p256Sqr(zInvSq
, zInv
)
376 p256MulAsm(zInv
, zInv
, zInvSq
)
378 p256MulAsm(t1
.x
[:], t1
.x
[:], zInvSq
)
379 p256MulAsm(t1
.y
[:], t1
.y
[:], zInv
)
381 copy(t1
.z
[:], basePoint
.z
[:])
382 // Update the table entry
383 copy(p256PreFast
[i
][j
].x
[:], t1
.x
[:])
384 copy(p256PreFast
[i
][j
].y
[:], t1
.y
[:])
387 p256PointDoubleAsm(t2
, &basePoint
)
389 p256PointAddAsm(t2
, t2
, &basePoint
)
394 func (p
*p256Point
) p256BaseMult(scalar
[]byte) {
395 wvalue
:= (uint(scalar
[31]) << 1) & 0xff
396 sel
, sign
:= boothW7(uint(wvalue
))
397 p256SelectBase(p
, p256PreFast
[0][:], sel
)
403 copy(t0
.z
[:], one
[:])
408 for i
:= 1; i
< 37; i
++ {
410 wvalue
= ((uint(scalar
[31-index
/8]) >> (index
% 8)) + (uint(scalar
[31-index
/8-1]) << (8 - (index
% 8)))) & 0xff
412 wvalue
= (uint(scalar
[31-index
/8]) >> (index
% 8)) & 0xff
415 sel
, sign
= boothW7(uint(wvalue
))
416 p256SelectBase(&t0
, p256PreFast
[i
][:], sel
)
417 p256PointAddAffineAsm(p
, p
, &t0
, sign
, sel
, zero
)
422 func (p
*p256Point
) p256ScalarMult(scalar
[]byte) {
423 // precomp is a table of precomputed points that stores powers of p
425 var precomp
[16]p256Point
426 var t0
, t1
, t2
, t3 p256Point
431 p256PointDoubleAsm(&t0
, p
)
432 p256PointDoubleAsm(&t1
, &t0
)
433 p256PointDoubleAsm(&t2
, &t1
)
434 p256PointDoubleAsm(&t3
, &t2
)
435 *&precomp
[1] = t0
// 2
436 *&precomp
[3] = t1
// 4
437 *&precomp
[7] = t2
// 8
438 *&precomp
[15] = t3
// 16
440 p256PointAddAsm(&t0
, &t0
, p
)
441 p256PointAddAsm(&t1
, &t1
, p
)
442 p256PointAddAsm(&t2
, &t2
, p
)
443 *&precomp
[2] = t0
// 3
444 *&precomp
[4] = t1
// 5
445 *&precomp
[8] = t2
// 9
447 p256PointDoubleAsm(&t0
, &t0
)
448 p256PointDoubleAsm(&t1
, &t1
)
449 *&precomp
[5] = t0
// 6
450 *&precomp
[9] = t1
// 10
452 p256PointAddAsm(&t2
, &t0
, p
)
453 p256PointAddAsm(&t1
, &t1
, p
)
454 *&precomp
[6] = t2
// 7
455 *&precomp
[10] = t1
// 11
457 p256PointDoubleAsm(&t0
, &t0
)
458 p256PointDoubleAsm(&t2
, &t2
)
459 *&precomp
[11] = t0
// 12
460 *&precomp
[13] = t2
// 14
462 p256PointAddAsm(&t0
, &t0
, p
)
463 p256PointAddAsm(&t2
, &t2
, p
)
464 *&precomp
[12] = t0
// 13
465 *&precomp
[14] = t2
// 15
467 // Start scanning the window from top bit
471 wvalue
:= (uint(scalar
[31-index
/8]) >> (index
% 8)) & 0x3f
472 sel
, _
= boothW5(uint(wvalue
))
473 p256Select(p
, precomp
[:], sel
)
478 p256PointDoubleAsm(p
, p
)
479 p256PointDoubleAsm(p
, p
)
480 p256PointDoubleAsm(p
, p
)
481 p256PointDoubleAsm(p
, p
)
482 p256PointDoubleAsm(p
, p
)
485 wvalue
= ((uint(scalar
[31-index
/8]) >> (index
% 8)) + (uint(scalar
[31-index
/8-1]) << (8 - (index
% 8)))) & 0x3f
487 wvalue
= (uint(scalar
[31-index
/8]) >> (index
% 8)) & 0x3f
490 sel
, sign
= boothW5(uint(wvalue
))
492 p256Select(&t0
, precomp
[:], sel
)
493 p256NegCond(&t0
, sign
)
494 p256PointAddAsm(&t1
, p
, &t0
)
495 p256MovCond(&t1
, &t1
, p
, sel
)
496 p256MovCond(p
, &t1
, &t0
, zero
)
500 p256PointDoubleAsm(p
, p
)
501 p256PointDoubleAsm(p
, p
)
502 p256PointDoubleAsm(p
, p
)
503 p256PointDoubleAsm(p
, p
)
504 p256PointDoubleAsm(p
, p
)
506 wvalue
= (uint(scalar
[31]) << 1) & 0x3f
507 sel
, sign
= boothW5(uint(wvalue
))
509 p256Select(&t0
, precomp
[:], sel
)
510 p256NegCond(&t0
, sign
)
511 p256PointAddAsm(&t1
, p
, &t0
)
512 p256MovCond(&t1
, &t1
, p
, sel
)
513 p256MovCond(p
, &t1
, &t0
, zero
)