libgo/go/crypto/elliptic/p256_s390x.go

   1 // Copyright 2016 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 // +build ignore
   6 // -build s390x
   7
   8 package elliptic
   9
  10 import (
  11         "math/big"
  12 )
  13
  14 type p256CurveFast struct {
  15         *CurveParams
  16 }
  17
  18 type p256Point struct {
  19         x [32]byte
  20         y [32]byte
  21         z [32]byte
  22 }
  23
  24 var (
  25         p256        Curve
  26         p256PreFast *[37][64]p256Point
  27 )
  28
  29 // hasVectorFacility reports whether the machine has the z/Architecture
  30 // vector facility installed and enabled.
  31 func hasVectorFacility() bool
  32
  33 var hasVX = hasVectorFacility()
  34
  35 func initP256Arch() {
  36         if hasVX {
  37                 p256 = p256CurveFast{p256Params}
  38                 initTable()
  39                 return
  40         }
  41
  42         // No vector support, use pure Go implementation.
  43         p256 = p256Curve{p256Params}
  44         return
  45 }
  46
  47 func (curve p256CurveFast) Params() *CurveParams {
  48         return curve.CurveParams
  49 }
  50
  51 // Functions implemented in p256_asm_s390x.s
  52 // Montgomery multiplication modulo P256
  53 func p256MulAsm(res, in1, in2 []byte)
  54
  55 // Montgomery square modulo P256
  56 func p256Sqr(res, in []byte) {
  57         p256MulAsm(res, in, in)
  58 }
  59
  60 // Montgomery multiplication by 1
  61 func p256FromMont(res, in []byte)
  62
  63 // iff cond == 1  val <- -val
  64 func p256NegCond(val *p256Point, cond int)
  65
  66 // if cond == 0 res <- b; else res <- a
  67 func p256MovCond(res, a, b *p256Point, cond int)
  68
  69 // Constant time table access
  70 func p256Select(point *p256Point, table []p256Point, idx int)
  71 func p256SelectBase(point *p256Point, table []p256Point, idx int)
  72
  73 // Montgomery multiplication modulo Ord(G)
  74 func p256OrdMul(res, in1, in2 []byte)
  75
  76 // Montgomery square modulo Ord(G), repeated n times
  77 func p256OrdSqr(res, in []byte, n int) {
  78         copy(res, in)
  79         for i := 0; i < n; i += 1 {
  80                 p256OrdMul(res, res, res)
  81         }
  82 }
  83
  84 // Point add with P2 being affine point
  85 // If sign == 1 -> P2 = -P2
  86 // If sel == 0 -> P3 = P1
  87 // if zero == 0 -> P3 = P2
  88 func p256PointAddAffineAsm(P3, P1, P2 *p256Point, sign, sel, zero int)
  89
  90 // Point add
  91 func p256PointAddAsm(P3, P1, P2 *p256Point)
  92 func p256PointDoubleAsm(P3, P1 *p256Point)
  93
  94 func (curve p256CurveFast) Inverse(k *big.Int) *big.Int {
  95         if k.Cmp(p256Params.N) >= 0 {
  96                 // This should never happen.
  97                 reducedK := new(big.Int).Mod(k, p256Params.N)
  98                 k = reducedK
  99         }
 100
 101         // table will store precomputed powers of x. The 32 bytes at index
 102         // i store x^(i+1).
 103         var table [15][32]byte
 104
 105         x := fromBig(k)
 106         // This code operates in the Montgomery domain where R = 2^256 mod n
 107         // and n is the order of the scalar field. (See initP256 for the
 108         // value.) Elements in the Montgomery domain take the form a×R and
 109         // multiplication of x and y in the calculates (x × y × R^-1) mod n. RR
 110         // is R×R mod n thus the Montgomery multiplication x and RR gives x×R,
 111         // i.e. converts x into the Montgomery domain. Stored in BigEndian form
 112         RR := []byte{0x66, 0xe1, 0x2d, 0x94, 0xf3, 0xd9, 0x56, 0x20, 0x28, 0x45, 0xb2, 0x39, 0x2b, 0x6b, 0xec, 0x59,
 113                 0x46, 0x99, 0x79, 0x9c, 0x49, 0xbd, 0x6f, 0xa6, 0x83, 0x24, 0x4c, 0x95, 0xbe, 0x79, 0xee, 0xa2}
 114
 115         p256OrdMul(table[0][:], x, RR)
 116
 117         // Prepare the table, no need in constant time access, because the
 118         // power is not a secret. (Entry 0 is never used.)
 119         for i := 2; i < 16; i += 2 {
 120                 p256OrdSqr(table[i-1][:], table[(i/2)-1][:], 1)
 121                 p256OrdMul(table[i][:], table[i-1][:], table[0][:])
 122         }
 123
 124         copy(x, table[14][:]) // f
 125
 126         p256OrdSqr(x[0:32], x[0:32], 4)
 127         p256OrdMul(x[0:32], x[0:32], table[14][:]) // ff
 128         t := make([]byte, 32)
 129         copy(t, x)
 130
 131         p256OrdSqr(x, x, 8)
 132         p256OrdMul(x, x, t) // ffff
 133         copy(t, x)
 134
 135         p256OrdSqr(x, x, 16)
 136         p256OrdMul(x, x, t) // ffffffff
 137         copy(t, x)
 138
 139         p256OrdSqr(x, x, 64) // ffffffff0000000000000000
 140         p256OrdMul(x, x, t)  // ffffffff00000000ffffffff
 141         p256OrdSqr(x, x, 32) // ffffffff00000000ffffffff00000000
 142         p256OrdMul(x, x, t)  // ffffffff00000000ffffffffffffffff
 143
 144         // Remaining 32 windows
 145         expLo := [32]byte{0xb, 0xc, 0xe, 0x6, 0xf, 0xa, 0xa, 0xd, 0xa, 0x7, 0x1, 0x7, 0x9, 0xe, 0x8, 0x4,
 146                 0xf, 0x3, 0xb, 0x9, 0xc, 0xa, 0xc, 0x2, 0xf, 0xc, 0x6, 0x3, 0x2, 0x5, 0x4, 0xf}
 147         for i := 0; i < 32; i++ {
 148                 p256OrdSqr(x, x, 4)
 149                 p256OrdMul(x, x, table[expLo[i]-1][:])
 150         }
 151
 152         // Multiplying by one in the Montgomery domain converts a Montgomery
 153         // value out of the domain.
 154         one := []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 155                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}
 156         p256OrdMul(x, x, one)
 157
 158         return new(big.Int).SetBytes(x)
 159 }
 160
 161 // fromBig converts a *big.Int into a format used by this code.
 162 func fromBig(big *big.Int) []byte {
 163         // This could be done a lot more efficiently...
 164         res := big.Bytes()
 165         if 32 == len(res) {
 166                 return res
 167         }
 168         t := make([]byte, 32)
 169         offset := 32 - len(res)
 170         for i := len(res) - 1; i >= 0; i-- {
 171                 t[i+offset] = res[i]
 172         }
 173         return t
 174 }
 175
 176 // p256GetMultiplier makes sure byte array will have 32 byte elements, If the scalar
 177 // is equal or greater than the order of the group, it's reduced modulo that order.
 178 func p256GetMultiplier(in []byte) []byte {
 179         n := new(big.Int).SetBytes(in)
 180
 181         if n.Cmp(p256Params.N) >= 0 {
 182                 n.Mod(n, p256Params.N)
 183         }
 184         return fromBig(n)
 185 }
 186
 187 // p256MulAsm operates in a Montgomery domain with R = 2^256 mod p, where p is the
 188 // underlying field of the curve. (See initP256 for the value.) Thus rr here is
 189 // R×R mod p. See comment in Inverse about how this is used.
 190 var rr = []byte{0x00, 0x00, 0x00, 0x04, 0xff, 0xff, 0xff, 0xfd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe,
 191         0xff, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03}
 192
 193 // (This is one, in the Montgomery domain.)
 194 var one = []byte{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 195         0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}
 196
 197 func maybeReduceModP(in *big.Int) *big.Int {
 198         if in.Cmp(p256Params.P) < 0 {
 199                 return in
 200         }
 201         return new(big.Int).Mod(in, p256Params.P)
 202 }
 203
 204 func (curve p256CurveFast) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) {
 205         var r1, r2 p256Point
 206         r1.p256BaseMult(p256GetMultiplier(baseScalar))
 207
 208         copy(r2.x[:], fromBig(maybeReduceModP(bigX)))
 209         copy(r2.y[:], fromBig(maybeReduceModP(bigY)))
 210         copy(r2.z[:], one)
 211         p256MulAsm(r2.x[:], r2.x[:], rr[:])
 212         p256MulAsm(r2.y[:], r2.y[:], rr[:])
 213
 214         r2.p256ScalarMult(p256GetMultiplier(scalar))
 215         p256PointAddAsm(&r1, &r1, &r2)
 216         return r1.p256PointToAffine()
 217 }
 218
 219 func (curve p256CurveFast) ScalarBaseMult(scalar []byte) (x, y *big.Int) {
 220         var r p256Point
 221         r.p256BaseMult(p256GetMultiplier(scalar))
 222         return r.p256PointToAffine()
 223 }
 224
 225 func (curve p256CurveFast) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) {
 226         var r p256Point
 227         copy(r.x[:], fromBig(maybeReduceModP(bigX)))
 228         copy(r.y[:], fromBig(maybeReduceModP(bigY)))
 229         copy(r.z[:], one)
 230         p256MulAsm(r.x[:], r.x[:], rr[:])
 231         p256MulAsm(r.y[:], r.y[:], rr[:])
 232         r.p256ScalarMult(p256GetMultiplier(scalar))
 233         return r.p256PointToAffine()
 234 }
 235
 236 func (p *p256Point) p256PointToAffine() (x, y *big.Int) {
 237         zInv := make([]byte, 32)
 238         zInvSq := make([]byte, 32)
 239
 240         p256Inverse(zInv, p.z[:])
 241         p256Sqr(zInvSq, zInv)
 242         p256MulAsm(zInv, zInv, zInvSq)
 243
 244         p256MulAsm(zInvSq, p.x[:], zInvSq)
 245         p256MulAsm(zInv, p.y[:], zInv)
 246
 247         p256FromMont(zInvSq, zInvSq)
 248         p256FromMont(zInv, zInv)
 249
 250         return new(big.Int).SetBytes(zInvSq), new(big.Int).SetBytes(zInv)
 251 }
 252
 253 // p256Inverse sets out to in^-1 mod p.
 254 func p256Inverse(out, in []byte) {
 255         var stack [6 * 32]byte
 256         p2 := stack[32*0 : 32*0+32]
 257         p4 := stack[32*1 : 32*1+32]
 258         p8 := stack[32*2 : 32*2+32]
 259         p16 := stack[32*3 : 32*3+32]
 260         p32 := stack[32*4 : 32*4+32]
 261
 262         p256Sqr(out, in)
 263         p256MulAsm(p2, out, in) // 3*p
 264
 265         p256Sqr(out, p2)
 266         p256Sqr(out, out)
 267         p256MulAsm(p4, out, p2) // f*p
 268
 269         p256Sqr(out, p4)
 270         p256Sqr(out, out)
 271         p256Sqr(out, out)
 272         p256Sqr(out, out)
 273         p256MulAsm(p8, out, p4) // ff*p
 274
 275         p256Sqr(out, p8)
 276
 277         for i := 0; i < 7; i++ {
 278                 p256Sqr(out, out)
 279         }
 280         p256MulAsm(p16, out, p8) // ffff*p
 281
 282         p256Sqr(out, p16)
 283         for i := 0; i < 15; i++ {
 284                 p256Sqr(out, out)
 285         }
 286         p256MulAsm(p32, out, p16) // ffffffff*p
 287
 288         p256Sqr(out, p32)
 289
 290         for i := 0; i < 31; i++ {
 291                 p256Sqr(out, out)
 292         }
 293         p256MulAsm(out, out, in)
 294
 295         for i := 0; i < 32*4; i++ {
 296                 p256Sqr(out, out)
 297         }
 298         p256MulAsm(out, out, p32)
 299
 300         for i := 0; i < 32; i++ {
 301                 p256Sqr(out, out)
 302         }
 303         p256MulAsm(out, out, p32)
 304
 305         for i := 0; i < 16; i++ {
 306                 p256Sqr(out, out)
 307         }
 308         p256MulAsm(out, out, p16)
 309
 310         for i := 0; i < 8; i++ {
 311                 p256Sqr(out, out)
 312         }
 313         p256MulAsm(out, out, p8)
 314
 315         p256Sqr(out, out)
 316         p256Sqr(out, out)
 317         p256Sqr(out, out)
 318         p256Sqr(out, out)
 319         p256MulAsm(out, out, p4)
 320
 321         p256Sqr(out, out)
 322         p256Sqr(out, out)
 323         p256MulAsm(out, out, p2)
 324
 325         p256Sqr(out, out)
 326         p256Sqr(out, out)
 327         p256MulAsm(out, out, in)
 328 }
 329
 330 func boothW5(in uint) (int, int) {
 331         var s uint = ^((in >> 5) - 1)
 332         var d uint = (1 << 6) - in - 1
 333         d = (d & s) | (in & (^s))
 334         d = (d >> 1) + (d & 1)
 335         return int(d), int(s & 1)
 336 }
 337
 338 func boothW7(in uint) (int, int) {
 339         var s uint = ^((in >> 7) - 1)
 340         var d uint = (1 << 8) - in - 1
 341         d = (d & s) | (in & (^s))
 342         d = (d >> 1) + (d & 1)
 343         return int(d), int(s & 1)
 344 }
 345
 346 func initTable() {
 347         p256PreFast = new([37][64]p256Point) //z coordinate not used
 348         basePoint := p256Point{
 349                 x: [32]byte{0x18, 0x90, 0x5f, 0x76, 0xa5, 0x37, 0x55, 0xc6, 0x79, 0xfb, 0x73, 0x2b, 0x77, 0x62, 0x25, 0x10,
 350                         0x75, 0xba, 0x95, 0xfc, 0x5f, 0xed, 0xb6, 0x01, 0x79, 0xe7, 0x30, 0xd4, 0x18, 0xa9, 0x14, 0x3c}, //(p256.x*2^256)%p
 351                 y: [32]byte{0x85, 0x71, 0xff, 0x18, 0x25, 0x88, 0x5d, 0x85, 0xd2, 0xe8, 0x86, 0x88, 0xdd, 0x21, 0xf3, 0x25,
 352                         0x8b, 0x4a, 0xb8, 0xe4, 0xba, 0x19, 0xe4, 0x5c, 0xdd, 0xf2, 0x53, 0x57, 0xce, 0x95, 0x56, 0x0a}, //(p256.y*2^256)%p
 353                 z: [32]byte{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
 354                         0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}, //(p256.z*2^256)%p
 355         }
 356
 357         t1 := new(p256Point)
 358         t2 := new(p256Point)
 359         *t2 = basePoint
 360
 361         zInv := make([]byte, 32)
 362         zInvSq := make([]byte, 32)
 363         for j := 0; j < 64; j++ {
 364                 *t1 = *t2
 365                 for i := 0; i < 37; i++ {
 366                         // The window size is 7 so we need to double 7 times.
 367                         if i != 0 {
 368                                 for k := 0; k < 7; k++ {
 369                                         p256PointDoubleAsm(t1, t1)
 370                                 }
 371                         }
 372                         // Convert the point to affine form. (Its values are
 373                         // still in Montgomery form however.)
 374                         p256Inverse(zInv, t1.z[:])
 375                         p256Sqr(zInvSq, zInv)
 376                         p256MulAsm(zInv, zInv, zInvSq)
 377
 378                         p256MulAsm(t1.x[:], t1.x[:], zInvSq)
 379                         p256MulAsm(t1.y[:], t1.y[:], zInv)
 380
 381                         copy(t1.z[:], basePoint.z[:])
 382                         // Update the table entry
 383                         copy(p256PreFast[i][j].x[:], t1.x[:])
 384                         copy(p256PreFast[i][j].y[:], t1.y[:])
 385                 }
 386                 if j == 0 {
 387                         p256PointDoubleAsm(t2, &basePoint)
 388                 } else {
 389                         p256PointAddAsm(t2, t2, &basePoint)
 390                 }
 391         }
 392 }
 393
 394 func (p *p256Point) p256BaseMult(scalar []byte) {
 395         wvalue := (uint(scalar[31]) << 1) & 0xff
 396         sel, sign := boothW7(uint(wvalue))
 397         p256SelectBase(p, p256PreFast[0][:], sel)
 398         p256NegCond(p, sign)
 399
 400         copy(p.z[:], one[:])
 401         var t0 p256Point
 402
 403         copy(t0.z[:], one[:])
 404
 405         index := uint(6)
 406         zero := sel
 407
 408         for i := 1; i < 37; i++ {
 409                 if index < 247 {
 410                         wvalue = ((uint(scalar[31-index/8]) >> (index % 8)) + (uint(scalar[31-index/8-1]) << (8 - (index % 8)))) & 0xff
 411                 } else {
 412                         wvalue = (uint(scalar[31-index/8]) >> (index % 8)) & 0xff
 413                 }
 414                 index += 7
 415                 sel, sign = boothW7(uint(wvalue))
 416                 p256SelectBase(&t0, p256PreFast[i][:], sel)
 417                 p256PointAddAffineAsm(p, p, &t0, sign, sel, zero)
 418                 zero |= sel
 419         }
 420 }
 421
 422 func (p *p256Point) p256ScalarMult(scalar []byte) {
 423         // precomp is a table of precomputed points that stores powers of p
 424         // from p^1 to p^16.
 425         var precomp [16]p256Point
 426         var t0, t1, t2, t3 p256Point
 427
 428         // Prepare the table
 429         *&precomp[0] = *p
 430
 431         p256PointDoubleAsm(&t0, p)
 432         p256PointDoubleAsm(&t1, &t0)
 433         p256PointDoubleAsm(&t2, &t1)
 434         p256PointDoubleAsm(&t3, &t2)
 435         *&precomp[1] = t0  // 2
 436         *&precomp[3] = t1  // 4
 437         *&precomp[7] = t2  // 8
 438         *&precomp[15] = t3 // 16
 439
 440         p256PointAddAsm(&t0, &t0, p)
 441         p256PointAddAsm(&t1, &t1, p)
 442         p256PointAddAsm(&t2, &t2, p)
 443         *&precomp[2] = t0 // 3
 444         *&precomp[4] = t1 // 5
 445         *&precomp[8] = t2 // 9
 446
 447         p256PointDoubleAsm(&t0, &t0)
 448         p256PointDoubleAsm(&t1, &t1)
 449         *&precomp[5] = t0 // 6
 450         *&precomp[9] = t1 // 10
 451
 452         p256PointAddAsm(&t2, &t0, p)
 453         p256PointAddAsm(&t1, &t1, p)
 454         *&precomp[6] = t2  // 7
 455         *&precomp[10] = t1 // 11
 456
 457         p256PointDoubleAsm(&t0, &t0)
 458         p256PointDoubleAsm(&t2, &t2)
 459         *&precomp[11] = t0 // 12
 460         *&precomp[13] = t2 // 14
 461
 462         p256PointAddAsm(&t0, &t0, p)
 463         p256PointAddAsm(&t2, &t2, p)
 464         *&precomp[12] = t0 // 13
 465         *&precomp[14] = t2 // 15
 466
 467         // Start scanning the window from top bit
 468         index := uint(254)
 469         var sel, sign int
 470
 471         wvalue := (uint(scalar[31-index/8]) >> (index % 8)) & 0x3f
 472         sel, _ = boothW5(uint(wvalue))
 473         p256Select(p, precomp[:], sel)
 474         zero := sel
 475
 476         for index > 4 {
 477                 index -= 5
 478                 p256PointDoubleAsm(p, p)
 479                 p256PointDoubleAsm(p, p)
 480                 p256PointDoubleAsm(p, p)
 481                 p256PointDoubleAsm(p, p)
 482                 p256PointDoubleAsm(p, p)
 483
 484                 if index < 247 {
 485                         wvalue = ((uint(scalar[31-index/8]) >> (index % 8)) + (uint(scalar[31-index/8-1]) << (8 - (index % 8)))) & 0x3f
 486                 } else {
 487                         wvalue = (uint(scalar[31-index/8]) >> (index % 8)) & 0x3f
 488                 }
 489
 490                 sel, sign = boothW5(uint(wvalue))
 491
 492                 p256Select(&t0, precomp[:], sel)
 493                 p256NegCond(&t0, sign)
 494                 p256PointAddAsm(&t1, p, &t0)
 495                 p256MovCond(&t1, &t1, p, sel)
 496                 p256MovCond(p, &t1, &t0, zero)
 497                 zero |= sel
 498         }
 499
 500         p256PointDoubleAsm(p, p)
 501         p256PointDoubleAsm(p, p)
 502         p256PointDoubleAsm(p, p)
 503         p256PointDoubleAsm(p, p)
 504         p256PointDoubleAsm(p, p)
 505
 506         wvalue = (uint(scalar[31]) << 1) & 0x3f
 507         sel, sign = boothW5(uint(wvalue))
 508
 509         p256Select(&t0, precomp[:], sel)
 510         p256NegCond(&t0, sign)
 511         p256PointAddAsm(&t1, p, &t0)
 512         p256MovCond(&t1, &t1, p, sel)
 513         p256MovCond(p, &t1, &t0, zero)
 514 }