libgo/go/crypto/elliptic/p256_asm.go

   1 // Copyright 2015 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 // This file contains the Go wrapper for the constant-time, 64-bit assembly
   6 // implementation of P256. The optimizations performed here are described in
   7 // detail in:
   8 // S.Gueron and V.Krasnov, "Fast prime field elliptic-curve cryptography with
   9 //                          256-bit primes"
  10 // https://link.springer.com/article/10.1007%2Fs13389-014-0090-x
  11 // https://eprint.iacr.org/2013/816.pdf
  12
  13 // +build ignore_for_gccgo
  14 // +build amd64 arm64
  15
  16 package elliptic
  17
  18 import (
  19         "math/big"
  20         "sync"
  21 )
  22
  23 type (
  24         p256Curve struct {
  25                 *CurveParams
  26         }
  27
  28         p256Point struct {
  29                 xyz [12]uint64
  30         }
  31 )
  32
  33 var (
  34         p256            p256Curve
  35         p256Precomputed *[43][32 * 8]uint64
  36         precomputeOnce  sync.Once
  37 )
  38
  39 func initP256() {
  40         // See FIPS 186-3, section D.2.3
  41         p256.CurveParams = &CurveParams{Name: "P-256"}
  42         p256.P, _ = new(big.Int).SetString("115792089210356248762697446949407573530086143415290314195533631308867097853951", 10)
  43         p256.N, _ = new(big.Int).SetString("115792089210356248762697446949407573529996955224135760342422259061068512044369", 10)
  44         p256.B, _ = new(big.Int).SetString("5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b", 16)
  45         p256.Gx, _ = new(big.Int).SetString("6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296", 16)
  46         p256.Gy, _ = new(big.Int).SetString("4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5", 16)
  47         p256.BitSize = 256
  48 }
  49
  50 func (curve p256Curve) Params() *CurveParams {
  51         return curve.CurveParams
  52 }
  53
  54 // Functions implemented in p256_asm_*64.s
  55 // Montgomery multiplication modulo P256
  56 //go:noescape
  57 func p256Mul(res, in1, in2 []uint64)
  58
  59 // Montgomery square modulo P256, repeated n times (n >= 1)
  60 //go:noescape
  61 func p256Sqr(res, in []uint64, n int)
  62
  63 // Montgomery multiplication by 1
  64 //go:noescape
  65 func p256FromMont(res, in []uint64)
  66
  67 // iff cond == 1  val <- -val
  68 //go:noescape
  69 func p256NegCond(val []uint64, cond int)
  70
  71 // if cond == 0 res <- b; else res <- a
  72 //go:noescape
  73 func p256MovCond(res, a, b []uint64, cond int)
  74
  75 // Endianness swap
  76 //go:noescape
  77 func p256BigToLittle(res []uint64, in []byte)
  78
  79 //go:noescape
  80 func p256LittleToBig(res []byte, in []uint64)
  81
  82 // Constant time table access
  83 //go:noescape
  84 func p256Select(point, table []uint64, idx int)
  85
  86 //go:noescape
  87 func p256SelectBase(point, table []uint64, idx int)
  88
  89 // Montgomery multiplication modulo Ord(G)
  90 //go:noescape
  91 func p256OrdMul(res, in1, in2 []uint64)
  92
  93 // Montgomery square modulo Ord(G), repeated n times
  94 //go:noescape
  95 func p256OrdSqr(res, in []uint64, n int)
  96
  97 // Point add with in2 being affine point
  98 // If sign == 1 -> in2 = -in2
  99 // If sel == 0 -> res = in1
 100 // if zero == 0 -> res = in2
 101 //go:noescape
 102 func p256PointAddAffineAsm(res, in1, in2 []uint64, sign, sel, zero int)
 103
 104 // Point add. Returns one if the two input points were equal and zero
 105 // otherwise. (Note that, due to the way that the equations work out, some
 106 // representations of ∞ are considered equal to everything by this function.)
 107 //go:noescape
 108 func p256PointAddAsm(res, in1, in2 []uint64) int
 109
 110 // Point double
 111 //go:noescape
 112 func p256PointDoubleAsm(res, in []uint64)
 113
 114 func (curve p256Curve) Inverse(k *big.Int) *big.Int {
 115         if k.Sign() < 0 {
 116                 // This should never happen.
 117                 k = new(big.Int).Neg(k)
 118         }
 119
 120         if k.Cmp(p256.N) >= 0 {
 121                 // This should never happen.
 122                 k = new(big.Int).Mod(k, p256.N)
 123         }
 124
 125         // table will store precomputed powers of x.
 126         var table [4 * 9]uint64
 127         var (
 128                 _1      = table[4*0 : 4*1]
 129                 _11     = table[4*1 : 4*2]
 130                 _101    = table[4*2 : 4*3]
 131                 _111    = table[4*3 : 4*4]
 132                 _1111   = table[4*4 : 4*5]
 133                 _10101  = table[4*5 : 4*6]
 134                 _101111 = table[4*6 : 4*7]
 135                 x       = table[4*7 : 4*8]
 136                 t       = table[4*8 : 4*9]
 137         )
 138
 139         fromBig(x[:], k)
 140         // This code operates in the Montgomery domain where R = 2^256 mod n
 141         // and n is the order of the scalar field. (See initP256 for the
 142         // value.) Elements in the Montgomery domain take the form a×R and
 143         // multiplication of x and y in the calculates (x × y × R^-1) mod n. RR
 144         // is R×R mod n thus the Montgomery multiplication x and RR gives x×R,
 145         // i.e. converts x into the Montgomery domain.
 146         // Window values borrowed from https://briansmith.org/ecc-inversion-addition-chains-01#p256_scalar_inversion
 147         RR := []uint64{0x83244c95be79eea2, 0x4699799c49bd6fa6, 0x2845b2392b6bec59, 0x66e12d94f3d95620}
 148         p256OrdMul(_1, x, RR)      // _1
 149         p256OrdSqr(x, _1, 1)       // _10
 150         p256OrdMul(_11, x, _1)     // _11
 151         p256OrdMul(_101, x, _11)   // _101
 152         p256OrdMul(_111, x, _101)  // _111
 153         p256OrdSqr(x, _101, 1)     // _1010
 154         p256OrdMul(_1111, _101, x) // _1111
 155
 156         p256OrdSqr(t, x, 1)          // _10100
 157         p256OrdMul(_10101, t, _1)    // _10101
 158         p256OrdSqr(x, _10101, 1)     // _101010
 159         p256OrdMul(_101111, _101, x) // _101111
 160         p256OrdMul(x, _10101, x)     // _111111 = x6
 161         p256OrdSqr(t, x, 2)          // _11111100
 162         p256OrdMul(t, t, _11)        // _11111111 = x8
 163         p256OrdSqr(x, t, 8)          // _ff00
 164         p256OrdMul(x, x, t)          // _ffff = x16
 165         p256OrdSqr(t, x, 16)         // _ffff0000
 166         p256OrdMul(t, t, x)          // _ffffffff = x32
 167
 168         p256OrdSqr(x, t, 64)
 169         p256OrdMul(x, x, t)
 170         p256OrdSqr(x, x, 32)
 171         p256OrdMul(x, x, t)
 172
 173         sqrs := []uint8{
 174                 6, 5, 4, 5, 5,
 175                 4, 3, 3, 5, 9,
 176                 6, 2, 5, 6, 5,
 177                 4, 5, 5, 3, 10,
 178                 2, 5, 5, 3, 7, 6}
 179         muls := [][]uint64{
 180                 _101111, _111, _11, _1111, _10101,
 181                 _101, _101, _101, _111, _101111,
 182                 _1111, _1, _1, _1111, _111,
 183                 _111, _111, _101, _11, _101111,
 184                 _11, _11, _11, _1, _10101, _1111}
 185
 186         for i, s := range sqrs {
 187                 p256OrdSqr(x, x, int(s))
 188                 p256OrdMul(x, x, muls[i])
 189         }
 190
 191         // Multiplying by one in the Montgomery domain converts a Montgomery
 192         // value out of the domain.
 193         one := []uint64{1, 0, 0, 0}
 194         p256OrdMul(x, x, one)
 195
 196         xOut := make([]byte, 32)
 197         p256LittleToBig(xOut, x)
 198         return new(big.Int).SetBytes(xOut)
 199 }
 200
 201 // fromBig converts a *big.Int into a format used by this code.
 202 func fromBig(out []uint64, big *big.Int) {
 203         for i := range out {
 204                 out[i] = 0
 205         }
 206
 207         for i, v := range big.Bits() {
 208                 out[i] = uint64(v)
 209         }
 210 }
 211
 212 // p256GetScalar endian-swaps the big-endian scalar value from in and writes it
 213 // to out. If the scalar is equal or greater than the order of the group, it's
 214 // reduced modulo that order.
 215 func p256GetScalar(out []uint64, in []byte) {
 216         n := new(big.Int).SetBytes(in)
 217
 218         if n.Cmp(p256.N) >= 0 {
 219                 n.Mod(n, p256.N)
 220         }
 221         fromBig(out, n)
 222 }
 223
 224 // p256Mul operates in a Montgomery domain with R = 2^256 mod p, where p is the
 225 // underlying field of the curve. (See initP256 for the value.) Thus rr here is
 226 // R×R mod p. See comment in Inverse about how this is used.
 227 var rr = []uint64{0x0000000000000003, 0xfffffffbffffffff, 0xfffffffffffffffe, 0x00000004fffffffd}
 228
 229 func maybeReduceModP(in *big.Int) *big.Int {
 230         if in.Cmp(p256.P) < 0 {
 231                 return in
 232         }
 233         return new(big.Int).Mod(in, p256.P)
 234 }
 235
 236 func (curve p256Curve) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) {
 237         scalarReversed := make([]uint64, 4)
 238         var r1, r2 p256Point
 239         p256GetScalar(scalarReversed, baseScalar)
 240         r1IsInfinity := scalarIsZero(scalarReversed)
 241         r1.p256BaseMult(scalarReversed)
 242
 243         p256GetScalar(scalarReversed, scalar)
 244         r2IsInfinity := scalarIsZero(scalarReversed)
 245         fromBig(r2.xyz[0:4], maybeReduceModP(bigX))
 246         fromBig(r2.xyz[4:8], maybeReduceModP(bigY))
 247         p256Mul(r2.xyz[0:4], r2.xyz[0:4], rr[:])
 248         p256Mul(r2.xyz[4:8], r2.xyz[4:8], rr[:])
 249
 250         // This sets r2's Z value to 1, in the Montgomery domain.
 251         r2.xyz[8] = 0x0000000000000001
 252         r2.xyz[9] = 0xffffffff00000000
 253         r2.xyz[10] = 0xffffffffffffffff
 254         r2.xyz[11] = 0x00000000fffffffe
 255
 256         r2.p256ScalarMult(scalarReversed)
 257
 258         var sum, double p256Point
 259         pointsEqual := p256PointAddAsm(sum.xyz[:], r1.xyz[:], r2.xyz[:])
 260         p256PointDoubleAsm(double.xyz[:], r1.xyz[:])
 261         sum.CopyConditional(&double, pointsEqual)
 262         sum.CopyConditional(&r1, r2IsInfinity)
 263         sum.CopyConditional(&r2, r1IsInfinity)
 264
 265         return sum.p256PointToAffine()
 266 }
 267
 268 func (curve p256Curve) ScalarBaseMult(scalar []byte) (x, y *big.Int) {
 269         scalarReversed := make([]uint64, 4)
 270         p256GetScalar(scalarReversed, scalar)
 271
 272         var r p256Point
 273         r.p256BaseMult(scalarReversed)
 274         return r.p256PointToAffine()
 275 }
 276
 277 func (curve p256Curve) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) {
 278         scalarReversed := make([]uint64, 4)
 279         p256GetScalar(scalarReversed, scalar)
 280
 281         var r p256Point
 282         fromBig(r.xyz[0:4], maybeReduceModP(bigX))
 283         fromBig(r.xyz[4:8], maybeReduceModP(bigY))
 284         p256Mul(r.xyz[0:4], r.xyz[0:4], rr[:])
 285         p256Mul(r.xyz[4:8], r.xyz[4:8], rr[:])
 286         // This sets r2's Z value to 1, in the Montgomery domain.
 287         r.xyz[8] = 0x0000000000000001
 288         r.xyz[9] = 0xffffffff00000000
 289         r.xyz[10] = 0xffffffffffffffff
 290         r.xyz[11] = 0x00000000fffffffe
 291
 292         r.p256ScalarMult(scalarReversed)
 293         return r.p256PointToAffine()
 294 }
 295
 296 // uint64IsZero returns 1 if x is zero and zero otherwise.
 297 func uint64IsZero(x uint64) int {
 298         x = ^x
 299         x &= x >> 32
 300         x &= x >> 16
 301         x &= x >> 8
 302         x &= x >> 4
 303         x &= x >> 2
 304         x &= x >> 1
 305         return int(x & 1)
 306 }
 307
 308 // scalarIsZero returns 1 if scalar represents the zero value, and zero
 309 // otherwise.
 310 func scalarIsZero(scalar []uint64) int {
 311         return uint64IsZero(scalar[0] | scalar[1] | scalar[2] | scalar[3])
 312 }
 313
 314 func (p *p256Point) p256PointToAffine() (x, y *big.Int) {
 315         zInv := make([]uint64, 4)
 316         zInvSq := make([]uint64, 4)
 317         p256Inverse(zInv, p.xyz[8:12])
 318         p256Sqr(zInvSq, zInv, 1)
 319         p256Mul(zInv, zInv, zInvSq)
 320
 321         p256Mul(zInvSq, p.xyz[0:4], zInvSq)
 322         p256Mul(zInv, p.xyz[4:8], zInv)
 323
 324         p256FromMont(zInvSq, zInvSq)
 325         p256FromMont(zInv, zInv)
 326
 327         xOut := make([]byte, 32)
 328         yOut := make([]byte, 32)
 329         p256LittleToBig(xOut, zInvSq)
 330         p256LittleToBig(yOut, zInv)
 331
 332         return new(big.Int).SetBytes(xOut), new(big.Int).SetBytes(yOut)
 333 }
 334
 335 // CopyConditional copies overwrites p with src if v == 1, and leaves p
 336 // unchanged if v == 0.
 337 func (p *p256Point) CopyConditional(src *p256Point, v int) {
 338         pMask := uint64(v) - 1
 339         srcMask := ^pMask
 340
 341         for i, n := range p.xyz {
 342                 p.xyz[i] = (n & pMask) | (src.xyz[i] & srcMask)
 343         }
 344 }
 345
 346 // p256Inverse sets out to in^-1 mod p.
 347 func p256Inverse(out, in []uint64) {
 348         var stack [6 * 4]uint64
 349         p2 := stack[4*0 : 4*0+4]
 350         p4 := stack[4*1 : 4*1+4]
 351         p8 := stack[4*2 : 4*2+4]
 352         p16 := stack[4*3 : 4*3+4]
 353         p32 := stack[4*4 : 4*4+4]
 354
 355         p256Sqr(out, in, 1)
 356         p256Mul(p2, out, in) // 3*p
 357
 358         p256Sqr(out, p2, 2)
 359         p256Mul(p4, out, p2) // f*p
 360
 361         p256Sqr(out, p4, 4)
 362         p256Mul(p8, out, p4) // ff*p
 363
 364         p256Sqr(out, p8, 8)
 365         p256Mul(p16, out, p8) // ffff*p
 366
 367         p256Sqr(out, p16, 16)
 368         p256Mul(p32, out, p16) // ffffffff*p
 369
 370         p256Sqr(out, p32, 32)
 371         p256Mul(out, out, in)
 372
 373         p256Sqr(out, out, 128)
 374         p256Mul(out, out, p32)
 375
 376         p256Sqr(out, out, 32)
 377         p256Mul(out, out, p32)
 378
 379         p256Sqr(out, out, 16)
 380         p256Mul(out, out, p16)
 381
 382         p256Sqr(out, out, 8)
 383         p256Mul(out, out, p8)
 384
 385         p256Sqr(out, out, 4)
 386         p256Mul(out, out, p4)
 387
 388         p256Sqr(out, out, 2)
 389         p256Mul(out, out, p2)
 390
 391         p256Sqr(out, out, 2)
 392         p256Mul(out, out, in)
 393 }
 394
 395 func (p *p256Point) p256StorePoint(r *[16 * 4 * 3]uint64, index int) {
 396         copy(r[index*12:], p.xyz[:])
 397 }
 398
 399 func boothW5(in uint) (int, int) {
 400         var s uint = ^((in >> 5) - 1)
 401         var d uint = (1 << 6) - in - 1
 402         d = (d & s) | (in & (^s))
 403         d = (d >> 1) + (d & 1)
 404         return int(d), int(s & 1)
 405 }
 406
 407 func boothW6(in uint) (int, int) {
 408         var s uint = ^((in >> 6) - 1)
 409         var d uint = (1 << 7) - in - 1
 410         d = (d & s) | (in & (^s))
 411         d = (d >> 1) + (d & 1)
 412         return int(d), int(s & 1)
 413 }
 414
 415 func initTable() {
 416         p256Precomputed = new([43][32 * 8]uint64)
 417
 418         basePoint := []uint64{
 419                 0x79e730d418a9143c, 0x75ba95fc5fedb601, 0x79fb732b77622510, 0x18905f76a53755c6,
 420                 0xddf25357ce95560a, 0x8b4ab8e4ba19e45c, 0xd2e88688dd21f325, 0x8571ff1825885d85,
 421                 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe,
 422         }
 423         t1 := make([]uint64, 12)
 424         t2 := make([]uint64, 12)
 425         copy(t2, basePoint)
 426
 427         zInv := make([]uint64, 4)
 428         zInvSq := make([]uint64, 4)
 429         for j := 0; j < 32; j++ {
 430                 copy(t1, t2)
 431                 for i := 0; i < 43; i++ {
 432                         // The window size is 6 so we need to double 6 times.
 433                         if i != 0 {
 434                                 for k := 0; k < 6; k++ {
 435                                         p256PointDoubleAsm(t1, t1)
 436                                 }
 437                         }
 438                         // Convert the point to affine form. (Its values are
 439                         // still in Montgomery form however.)
 440                         p256Inverse(zInv, t1[8:12])
 441                         p256Sqr(zInvSq, zInv, 1)
 442                         p256Mul(zInv, zInv, zInvSq)
 443
 444                         p256Mul(t1[:4], t1[:4], zInvSq)
 445                         p256Mul(t1[4:8], t1[4:8], zInv)
 446
 447                         copy(t1[8:12], basePoint[8:12])
 448                         // Update the table entry
 449                         copy(p256Precomputed[i][j*8:], t1[:8])
 450                 }
 451                 if j == 0 {
 452                         p256PointDoubleAsm(t2, basePoint)
 453                 } else {
 454                         p256PointAddAsm(t2, t2, basePoint)
 455                 }
 456         }
 457 }
 458
 459 func (p *p256Point) p256BaseMult(scalar []uint64) {
 460         precomputeOnce.Do(initTable)
 461
 462         wvalue := (scalar[0] << 1) & 0x7f
 463         sel, sign := boothW6(uint(wvalue))
 464         p256SelectBase(p.xyz[0:8], p256Precomputed[0][0:], sel)
 465         p256NegCond(p.xyz[4:8], sign)
 466
 467         // (This is one, in the Montgomery domain.)
 468         p.xyz[8] = 0x0000000000000001
 469         p.xyz[9] = 0xffffffff00000000
 470         p.xyz[10] = 0xffffffffffffffff
 471         p.xyz[11] = 0x00000000fffffffe
 472
 473         var t0 p256Point
 474         // (This is one, in the Montgomery domain.)
 475         t0.xyz[8] = 0x0000000000000001
 476         t0.xyz[9] = 0xffffffff00000000
 477         t0.xyz[10] = 0xffffffffffffffff
 478         t0.xyz[11] = 0x00000000fffffffe
 479
 480         index := uint(5)
 481         zero := sel
 482
 483         for i := 1; i < 43; i++ {
 484                 if index < 192 {
 485                         wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x7f
 486                 } else {
 487                         wvalue = (scalar[index/64] >> (index % 64)) & 0x7f
 488                 }
 489                 index += 6
 490                 sel, sign = boothW6(uint(wvalue))
 491                 p256SelectBase(t0.xyz[0:8], p256Precomputed[i][0:], sel)
 492                 p256PointAddAffineAsm(p.xyz[0:12], p.xyz[0:12], t0.xyz[0:8], sign, sel, zero)
 493                 zero |= sel
 494         }
 495 }
 496
 497 func (p *p256Point) p256ScalarMult(scalar []uint64) {
 498         // precomp is a table of precomputed points that stores powers of p
 499         // from p^1 to p^16.
 500         var precomp [16 * 4 * 3]uint64
 501         var t0, t1, t2, t3 p256Point
 502
 503         // Prepare the table
 504         p.p256StorePoint(&precomp, 0) // 1
 505
 506         p256PointDoubleAsm(t0.xyz[:], p.xyz[:])
 507         p256PointDoubleAsm(t1.xyz[:], t0.xyz[:])
 508         p256PointDoubleAsm(t2.xyz[:], t1.xyz[:])
 509         p256PointDoubleAsm(t3.xyz[:], t2.xyz[:])
 510         t0.p256StorePoint(&precomp, 1)  // 2
 511         t1.p256StorePoint(&precomp, 3)  // 4
 512         t2.p256StorePoint(&precomp, 7)  // 8
 513         t3.p256StorePoint(&precomp, 15) // 16
 514
 515         p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:])
 516         p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:])
 517         p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:])
 518         t0.p256StorePoint(&precomp, 2) // 3
 519         t1.p256StorePoint(&precomp, 4) // 5
 520         t2.p256StorePoint(&precomp, 8) // 9
 521
 522         p256PointDoubleAsm(t0.xyz[:], t0.xyz[:])
 523         p256PointDoubleAsm(t1.xyz[:], t1.xyz[:])
 524         t0.p256StorePoint(&precomp, 5) // 6
 525         t1.p256StorePoint(&precomp, 9) // 10
 526
 527         p256PointAddAsm(t2.xyz[:], t0.xyz[:], p.xyz[:])
 528         p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:])
 529         t2.p256StorePoint(&precomp, 6)  // 7
 530         t1.p256StorePoint(&precomp, 10) // 11
 531
 532         p256PointDoubleAsm(t0.xyz[:], t0.xyz[:])
 533         p256PointDoubleAsm(t2.xyz[:], t2.xyz[:])
 534         t0.p256StorePoint(&precomp, 11) // 12
 535         t2.p256StorePoint(&precomp, 13) // 14
 536
 537         p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:])
 538         p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:])
 539         t0.p256StorePoint(&precomp, 12) // 13
 540         t2.p256StorePoint(&precomp, 14) // 15
 541
 542         // Start scanning the window from top bit
 543         index := uint(254)
 544         var sel, sign int
 545
 546         wvalue := (scalar[index/64] >> (index % 64)) & 0x3f
 547         sel, _ = boothW5(uint(wvalue))
 548
 549         p256Select(p.xyz[0:12], precomp[0:], sel)
 550         zero := sel
 551
 552         for index > 4 {
 553                 index -= 5
 554                 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
 555                 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
 556                 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
 557                 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
 558                 p256PointDoubleAsm(p.xyz[:], p.xyz[:])
 559
 560                 if index < 192 {
 561                         wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x3f
 562                 } else {
 563                         wvalue = (scalar[index/64] >> (index % 64)) & 0x3f
 564                 }
 565
 566                 sel, sign = boothW5(uint(wvalue))
 567
 568                 p256Select(t0.xyz[0:], precomp[0:], sel)
 569                 p256NegCond(t0.xyz[4:8], sign)
 570                 p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:])
 571                 p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel)
 572                 p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero)
 573                 zero |= sel
 574         }
 575
 576         p256PointDoubleAsm(p.xyz[:], p.xyz[:])
 577         p256PointDoubleAsm(p.xyz[:], p.xyz[:])
 578         p256PointDoubleAsm(p.xyz[:], p.xyz[:])
 579         p256PointDoubleAsm(p.xyz[:], p.xyz[:])
 580         p256PointDoubleAsm(p.xyz[:], p.xyz[:])
 581
 582         wvalue = (scalar[0] << 1) & 0x3f
 583         sel, sign = boothW5(uint(wvalue))
 584
 585         p256Select(t0.xyz[0:], precomp[0:], sel)
 586         p256NegCond(t0.xyz[4:8], sign)
 587         p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:])
 588         p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel)
 589         p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero)
 590 }