1 // Copyright 2013 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // We have an implementation in amd64 assembly so this code is only run on
6 // non-amd64 platforms. The amd64 assembly does not support gccgo.
7 // +build !amd64 gccgo appengine
15 // This code is a port of the public domain, "ref10" implementation of
16 // curve25519 from SUPERCOP 20130419 by D. J. Bernstein.
18 // fieldElement represents an element of the field GF(2^255 - 19). An element
19 // t, entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77
20 // t[3]+2^102 t[4]+...+2^230 t[9]. Bounds on each t[i] vary depending on
22 type fieldElement
[10]int32
24 func feZero(fe
*fieldElement
) {
30 func feOne(fe
*fieldElement
) {
35 func feAdd(dst
, a
, b
*fieldElement
) {
41 func feSub(dst
, a
, b
*fieldElement
) {
47 func feCopy(dst
, src
*fieldElement
) {
53 // feCSwap replaces (f,g) with (g,f) if b == 1; replaces (f,g) with (f,g) if b == 0.
55 // Preconditions: b in {0,1}.
56 func feCSwap(f
, g
*fieldElement
, b
int32) {
59 t
:= b
& (f
[i
] ^ g
[i
])
65 // load3 reads a 24-bit, little-endian value from in.
66 func load3(in
[]byte) int64 {
69 r |
= int64(in
[1]) << 8
70 r |
= int64(in
[2]) << 16
74 // load4 reads a 32-bit, little-endian value from in.
75 func load4(in
[]byte) int64 {
76 return int64(binary
.LittleEndian
.Uint32(in
))
79 func feFromBytes(dst
*fieldElement
, src
*[32]byte) {
81 h1
:= load3(src
[4:]) << 6
82 h2
:= load3(src
[7:]) << 5
83 h3
:= load3(src
[10:]) << 3
84 h4
:= load3(src
[13:]) << 2
86 h6
:= load3(src
[20:]) << 7
87 h7
:= load3(src
[23:]) << 5
88 h8
:= load3(src
[26:]) << 4
89 h9
:= load3(src
[29:]) << 2
92 carry
[9] = (h9
+ 1<<24) >> 25
95 carry
[1] = (h1
+ 1<<24) >> 25
98 carry
[3] = (h3
+ 1<<24) >> 25
101 carry
[5] = (h5
+ 1<<24) >> 25
104 carry
[7] = (h7
+ 1<<24) >> 25
108 carry
[0] = (h0
+ 1<<25) >> 26
111 carry
[2] = (h2
+ 1<<25) >> 26
114 carry
[4] = (h4
+ 1<<25) >> 26
117 carry
[6] = (h6
+ 1<<25) >> 26
120 carry
[8] = (h8
+ 1<<25) >> 26
136 // feToBytes marshals h to s.
138 // |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
140 // Write p=2^255-19; q=floor(h/p).
141 // Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
144 // Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
145 // Also have |h-2^230 h9|<2^230 so |19 2^(-255)(h-2^230 h9)|<1/4.
147 // Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
151 // Have 0<=r<=p-1=2^255-20.
152 // Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
154 // Write x=r+19(2^-255)r+y.
155 // Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
157 // Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
158 // so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
159 func feToBytes(s
*[32]byte, h
*fieldElement
) {
162 q
:= (19*h
[9] + (1 << 24)) >> 25
174 // Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20.
176 // Goal: Output h-2^255 q, which is between 0 and 2^255-20.
178 carry
[0] = h
[0] >> 26
180 h
[0] -= carry
[0] << 26
181 carry
[1] = h
[1] >> 25
183 h
[1] -= carry
[1] << 25
184 carry
[2] = h
[2] >> 26
186 h
[2] -= carry
[2] << 26
187 carry
[3] = h
[3] >> 25
189 h
[3] -= carry
[3] << 25
190 carry
[4] = h
[4] >> 26
192 h
[4] -= carry
[4] << 26
193 carry
[5] = h
[5] >> 25
195 h
[5] -= carry
[5] << 25
196 carry
[6] = h
[6] >> 26
198 h
[6] -= carry
[6] << 26
199 carry
[7] = h
[7] >> 25
201 h
[7] -= carry
[7] << 25
202 carry
[8] = h
[8] >> 26
204 h
[8] -= carry
[8] << 26
205 carry
[9] = h
[9] >> 25
206 h
[9] -= carry
[9] << 25
209 // Goal: Output h[0]+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
210 // Have h[0]+...+2^230 h[9] between 0 and 2^255-1;
211 // evidently 2^255 h10-2^255 q = 0.
212 // Goal: Output h[0]+...+2^230 h[9].
214 s
[0] = byte(h
[0] >> 0)
215 s
[1] = byte(h
[0] >> 8)
216 s
[2] = byte(h
[0] >> 16)
217 s
[3] = byte((h
[0] >> 24) |
(h
[1] << 2))
218 s
[4] = byte(h
[1] >> 6)
219 s
[5] = byte(h
[1] >> 14)
220 s
[6] = byte((h
[1] >> 22) |
(h
[2] << 3))
221 s
[7] = byte(h
[2] >> 5)
222 s
[8] = byte(h
[2] >> 13)
223 s
[9] = byte((h
[2] >> 21) |
(h
[3] << 5))
224 s
[10] = byte(h
[3] >> 3)
225 s
[11] = byte(h
[3] >> 11)
226 s
[12] = byte((h
[3] >> 19) |
(h
[4] << 6))
227 s
[13] = byte(h
[4] >> 2)
228 s
[14] = byte(h
[4] >> 10)
229 s
[15] = byte(h
[4] >> 18)
230 s
[16] = byte(h
[5] >> 0)
231 s
[17] = byte(h
[5] >> 8)
232 s
[18] = byte(h
[5] >> 16)
233 s
[19] = byte((h
[5] >> 24) |
(h
[6] << 1))
234 s
[20] = byte(h
[6] >> 7)
235 s
[21] = byte(h
[6] >> 15)
236 s
[22] = byte((h
[6] >> 23) |
(h
[7] << 3))
237 s
[23] = byte(h
[7] >> 5)
238 s
[24] = byte(h
[7] >> 13)
239 s
[25] = byte((h
[7] >> 21) |
(h
[8] << 4))
240 s
[26] = byte(h
[8] >> 4)
241 s
[27] = byte(h
[8] >> 12)
242 s
[28] = byte((h
[8] >> 20) |
(h
[9] << 6))
243 s
[29] = byte(h
[9] >> 2)
244 s
[30] = byte(h
[9] >> 10)
245 s
[31] = byte(h
[9] >> 18)
248 // feMul calculates h = f * g
249 // Can overlap h with f or g.
252 // |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
253 // |g| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
256 // |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
258 // Notes on implementation strategy:
260 // Using schoolbook multiplication.
261 // Karatsuba would save a little in some cost models.
263 // Most multiplications by 2 and 19 are 32-bit precomputations;
264 // cheaper than 64-bit postcomputations.
266 // There is one remaining multiplication by 19 in the carry chain;
267 // one *19 precomputation can be merged into this,
268 // but the resulting data flow is considerably less clean.
270 // There are 12 carries below.
271 // 10 of them are 2-way parallelizable and vectorizable.
272 // Can get away with 11 carries, but then data flow is much deeper.
274 // With tighter constraints on inputs can squeeze carries into int32.
275 func feMul(h
, f
, g
*fieldElement
) {
296 g1_19
:= 19 * g1
// 1.4*2^29
297 g2_19
:= 19 * g2
// 1.4*2^30; still ok
310 f0g0
:= int64(f0
) * int64(g0
)
311 f0g1
:= int64(f0
) * int64(g1
)
312 f0g2
:= int64(f0
) * int64(g2
)
313 f0g3
:= int64(f0
) * int64(g3
)
314 f0g4
:= int64(f0
) * int64(g4
)
315 f0g5
:= int64(f0
) * int64(g5
)
316 f0g6
:= int64(f0
) * int64(g6
)
317 f0g7
:= int64(f0
) * int64(g7
)
318 f0g8
:= int64(f0
) * int64(g8
)
319 f0g9
:= int64(f0
) * int64(g9
)
320 f1g0
:= int64(f1
) * int64(g0
)
321 f1g1_2
:= int64(f1_2
) * int64(g1
)
322 f1g2
:= int64(f1
) * int64(g2
)
323 f1g3_2
:= int64(f1_2
) * int64(g3
)
324 f1g4
:= int64(f1
) * int64(g4
)
325 f1g5_2
:= int64(f1_2
) * int64(g5
)
326 f1g6
:= int64(f1
) * int64(g6
)
327 f1g7_2
:= int64(f1_2
) * int64(g7
)
328 f1g8
:= int64(f1
) * int64(g8
)
329 f1g9_38
:= int64(f1_2
) * int64(g9_19
)
330 f2g0
:= int64(f2
) * int64(g0
)
331 f2g1
:= int64(f2
) * int64(g1
)
332 f2g2
:= int64(f2
) * int64(g2
)
333 f2g3
:= int64(f2
) * int64(g3
)
334 f2g4
:= int64(f2
) * int64(g4
)
335 f2g5
:= int64(f2
) * int64(g5
)
336 f2g6
:= int64(f2
) * int64(g6
)
337 f2g7
:= int64(f2
) * int64(g7
)
338 f2g8_19
:= int64(f2
) * int64(g8_19
)
339 f2g9_19
:= int64(f2
) * int64(g9_19
)
340 f3g0
:= int64(f3
) * int64(g0
)
341 f3g1_2
:= int64(f3_2
) * int64(g1
)
342 f3g2
:= int64(f3
) * int64(g2
)
343 f3g3_2
:= int64(f3_2
) * int64(g3
)
344 f3g4
:= int64(f3
) * int64(g4
)
345 f3g5_2
:= int64(f3_2
) * int64(g5
)
346 f3g6
:= int64(f3
) * int64(g6
)
347 f3g7_38
:= int64(f3_2
) * int64(g7_19
)
348 f3g8_19
:= int64(f3
) * int64(g8_19
)
349 f3g9_38
:= int64(f3_2
) * int64(g9_19
)
350 f4g0
:= int64(f4
) * int64(g0
)
351 f4g1
:= int64(f4
) * int64(g1
)
352 f4g2
:= int64(f4
) * int64(g2
)
353 f4g3
:= int64(f4
) * int64(g3
)
354 f4g4
:= int64(f4
) * int64(g4
)
355 f4g5
:= int64(f4
) * int64(g5
)
356 f4g6_19
:= int64(f4
) * int64(g6_19
)
357 f4g7_19
:= int64(f4
) * int64(g7_19
)
358 f4g8_19
:= int64(f4
) * int64(g8_19
)
359 f4g9_19
:= int64(f4
) * int64(g9_19
)
360 f5g0
:= int64(f5
) * int64(g0
)
361 f5g1_2
:= int64(f5_2
) * int64(g1
)
362 f5g2
:= int64(f5
) * int64(g2
)
363 f5g3_2
:= int64(f5_2
) * int64(g3
)
364 f5g4
:= int64(f5
) * int64(g4
)
365 f5g5_38
:= int64(f5_2
) * int64(g5_19
)
366 f5g6_19
:= int64(f5
) * int64(g6_19
)
367 f5g7_38
:= int64(f5_2
) * int64(g7_19
)
368 f5g8_19
:= int64(f5
) * int64(g8_19
)
369 f5g9_38
:= int64(f5_2
) * int64(g9_19
)
370 f6g0
:= int64(f6
) * int64(g0
)
371 f6g1
:= int64(f6
) * int64(g1
)
372 f6g2
:= int64(f6
) * int64(g2
)
373 f6g3
:= int64(f6
) * int64(g3
)
374 f6g4_19
:= int64(f6
) * int64(g4_19
)
375 f6g5_19
:= int64(f6
) * int64(g5_19
)
376 f6g6_19
:= int64(f6
) * int64(g6_19
)
377 f6g7_19
:= int64(f6
) * int64(g7_19
)
378 f6g8_19
:= int64(f6
) * int64(g8_19
)
379 f6g9_19
:= int64(f6
) * int64(g9_19
)
380 f7g0
:= int64(f7
) * int64(g0
)
381 f7g1_2
:= int64(f7_2
) * int64(g1
)
382 f7g2
:= int64(f7
) * int64(g2
)
383 f7g3_38
:= int64(f7_2
) * int64(g3_19
)
384 f7g4_19
:= int64(f7
) * int64(g4_19
)
385 f7g5_38
:= int64(f7_2
) * int64(g5_19
)
386 f7g6_19
:= int64(f7
) * int64(g6_19
)
387 f7g7_38
:= int64(f7_2
) * int64(g7_19
)
388 f7g8_19
:= int64(f7
) * int64(g8_19
)
389 f7g9_38
:= int64(f7_2
) * int64(g9_19
)
390 f8g0
:= int64(f8
) * int64(g0
)
391 f8g1
:= int64(f8
) * int64(g1
)
392 f8g2_19
:= int64(f8
) * int64(g2_19
)
393 f8g3_19
:= int64(f8
) * int64(g3_19
)
394 f8g4_19
:= int64(f8
) * int64(g4_19
)
395 f8g5_19
:= int64(f8
) * int64(g5_19
)
396 f8g6_19
:= int64(f8
) * int64(g6_19
)
397 f8g7_19
:= int64(f8
) * int64(g7_19
)
398 f8g8_19
:= int64(f8
) * int64(g8_19
)
399 f8g9_19
:= int64(f8
) * int64(g9_19
)
400 f9g0
:= int64(f9
) * int64(g0
)
401 f9g1_38
:= int64(f9_2
) * int64(g1_19
)
402 f9g2_19
:= int64(f9
) * int64(g2_19
)
403 f9g3_38
:= int64(f9_2
) * int64(g3_19
)
404 f9g4_19
:= int64(f9
) * int64(g4_19
)
405 f9g5_38
:= int64(f9_2
) * int64(g5_19
)
406 f9g6_19
:= int64(f9
) * int64(g6_19
)
407 f9g7_38
:= int64(f9_2
) * int64(g7_19
)
408 f9g8_19
:= int64(f9
) * int64(g8_19
)
409 f9g9_38
:= int64(f9_2
) * int64(g9_19
)
410 h0
:= f0g0
+ f1g9_38
+ f2g8_19
+ f3g7_38
+ f4g6_19
+ f5g5_38
+ f6g4_19
+ f7g3_38
+ f8g2_19
+ f9g1_38
411 h1
:= f0g1
+ f1g0
+ f2g9_19
+ f3g8_19
+ f4g7_19
+ f5g6_19
+ f6g5_19
+ f7g4_19
+ f8g3_19
+ f9g2_19
412 h2
:= f0g2
+ f1g1_2
+ f2g0
+ f3g9_38
+ f4g8_19
+ f5g7_38
+ f6g6_19
+ f7g5_38
+ f8g4_19
+ f9g3_38
413 h3
:= f0g3
+ f1g2
+ f2g1
+ f3g0
+ f4g9_19
+ f5g8_19
+ f6g7_19
+ f7g6_19
+ f8g5_19
+ f9g4_19
414 h4
:= f0g4
+ f1g3_2
+ f2g2
+ f3g1_2
+ f4g0
+ f5g9_38
+ f6g8_19
+ f7g7_38
+ f8g6_19
+ f9g5_38
415 h5
:= f0g5
+ f1g4
+ f2g3
+ f3g2
+ f4g1
+ f5g0
+ f6g9_19
+ f7g8_19
+ f8g7_19
+ f9g6_19
416 h6
:= f0g6
+ f1g5_2
+ f2g4
+ f3g3_2
+ f4g2
+ f5g1_2
+ f6g0
+ f7g9_38
+ f8g8_19
+ f9g7_38
417 h7
:= f0g7
+ f1g6
+ f2g5
+ f3g4
+ f4g3
+ f5g2
+ f6g1
+ f7g0
+ f8g9_19
+ f9g8_19
418 h8
:= f0g8
+ f1g7_2
+ f2g6
+ f3g5_2
+ f4g4
+ f5g3_2
+ f6g2
+ f7g1_2
+ f8g0
+ f9g9_38
419 h9
:= f0g9
+ f1g8
+ f2g7
+ f3g6
+ f4g5
+ f5g4
+ f6g3
+ f7g2
+ f8g1
+ f9g0
422 // |h0| <= (1.1*1.1*2^52*(1+19+19+19+19)+1.1*1.1*2^50*(38+38+38+38+38))
423 // i.e. |h0| <= 1.2*2^59; narrower ranges for h2, h4, h6, h8
424 // |h1| <= (1.1*1.1*2^51*(1+1+19+19+19+19+19+19+19+19))
425 // i.e. |h1| <= 1.5*2^58; narrower ranges for h3, h5, h7, h9
427 carry
[0] = (h0
+ (1 << 25)) >> 26
430 carry
[4] = (h4
+ (1 << 25)) >> 26
438 carry
[1] = (h1
+ (1 << 24)) >> 25
441 carry
[5] = (h5
+ (1 << 24)) >> 25
444 // |h1| <= 2^24; from now on fits into int32
445 // |h5| <= 2^24; from now on fits into int32
449 carry
[2] = (h2
+ (1 << 25)) >> 26
452 carry
[6] = (h6
+ (1 << 25)) >> 26
455 // |h2| <= 2^25; from now on fits into int32 unchanged
456 // |h6| <= 2^25; from now on fits into int32 unchanged
460 carry
[3] = (h3
+ (1 << 24)) >> 25
463 carry
[7] = (h7
+ (1 << 24)) >> 25
466 // |h3| <= 2^24; from now on fits into int32 unchanged
467 // |h7| <= 2^24; from now on fits into int32 unchanged
471 carry
[4] = (h4
+ (1 << 25)) >> 26
474 carry
[8] = (h8
+ (1 << 25)) >> 26
477 // |h4| <= 2^25; from now on fits into int32 unchanged
478 // |h8| <= 2^25; from now on fits into int32 unchanged
482 carry
[9] = (h9
+ (1 << 24)) >> 25
485 // |h9| <= 2^24; from now on fits into int32 unchanged
488 carry
[0] = (h0
+ (1 << 25)) >> 26
491 // |h0| <= 2^25; from now on fits into int32 unchanged
506 // feSquare calculates h = f*f. Can overlap h with f.
509 // |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
512 // |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
513 func feSquare(h
, f
*fieldElement
) {
532 f5_38
:= 38 * f5
// 1.31*2^30
533 f6_19
:= 19 * f6
// 1.31*2^30
534 f7_38
:= 38 * f7
// 1.31*2^30
535 f8_19
:= 19 * f8
// 1.31*2^30
536 f9_38
:= 38 * f9
// 1.31*2^30
537 f0f0
:= int64(f0
) * int64(f0
)
538 f0f1_2
:= int64(f0_2
) * int64(f1
)
539 f0f2_2
:= int64(f0_2
) * int64(f2
)
540 f0f3_2
:= int64(f0_2
) * int64(f3
)
541 f0f4_2
:= int64(f0_2
) * int64(f4
)
542 f0f5_2
:= int64(f0_2
) * int64(f5
)
543 f0f6_2
:= int64(f0_2
) * int64(f6
)
544 f0f7_2
:= int64(f0_2
) * int64(f7
)
545 f0f8_2
:= int64(f0_2
) * int64(f8
)
546 f0f9_2
:= int64(f0_2
) * int64(f9
)
547 f1f1_2
:= int64(f1_2
) * int64(f1
)
548 f1f2_2
:= int64(f1_2
) * int64(f2
)
549 f1f3_4
:= int64(f1_2
) * int64(f3_2
)
550 f1f4_2
:= int64(f1_2
) * int64(f4
)
551 f1f5_4
:= int64(f1_2
) * int64(f5_2
)
552 f1f6_2
:= int64(f1_2
) * int64(f6
)
553 f1f7_4
:= int64(f1_2
) * int64(f7_2
)
554 f1f8_2
:= int64(f1_2
) * int64(f8
)
555 f1f9_76
:= int64(f1_2
) * int64(f9_38
)
556 f2f2
:= int64(f2
) * int64(f2
)
557 f2f3_2
:= int64(f2_2
) * int64(f3
)
558 f2f4_2
:= int64(f2_2
) * int64(f4
)
559 f2f5_2
:= int64(f2_2
) * int64(f5
)
560 f2f6_2
:= int64(f2_2
) * int64(f6
)
561 f2f7_2
:= int64(f2_2
) * int64(f7
)
562 f2f8_38
:= int64(f2_2
) * int64(f8_19
)
563 f2f9_38
:= int64(f2
) * int64(f9_38
)
564 f3f3_2
:= int64(f3_2
) * int64(f3
)
565 f3f4_2
:= int64(f3_2
) * int64(f4
)
566 f3f5_4
:= int64(f3_2
) * int64(f5_2
)
567 f3f6_2
:= int64(f3_2
) * int64(f6
)
568 f3f7_76
:= int64(f3_2
) * int64(f7_38
)
569 f3f8_38
:= int64(f3_2
) * int64(f8_19
)
570 f3f9_76
:= int64(f3_2
) * int64(f9_38
)
571 f4f4
:= int64(f4
) * int64(f4
)
572 f4f5_2
:= int64(f4_2
) * int64(f5
)
573 f4f6_38
:= int64(f4_2
) * int64(f6_19
)
574 f4f7_38
:= int64(f4
) * int64(f7_38
)
575 f4f8_38
:= int64(f4_2
) * int64(f8_19
)
576 f4f9_38
:= int64(f4
) * int64(f9_38
)
577 f5f5_38
:= int64(f5
) * int64(f5_38
)
578 f5f6_38
:= int64(f5_2
) * int64(f6_19
)
579 f5f7_76
:= int64(f5_2
) * int64(f7_38
)
580 f5f8_38
:= int64(f5_2
) * int64(f8_19
)
581 f5f9_76
:= int64(f5_2
) * int64(f9_38
)
582 f6f6_19
:= int64(f6
) * int64(f6_19
)
583 f6f7_38
:= int64(f6
) * int64(f7_38
)
584 f6f8_38
:= int64(f6_2
) * int64(f8_19
)
585 f6f9_38
:= int64(f6
) * int64(f9_38
)
586 f7f7_38
:= int64(f7
) * int64(f7_38
)
587 f7f8_38
:= int64(f7_2
) * int64(f8_19
)
588 f7f9_76
:= int64(f7_2
) * int64(f9_38
)
589 f8f8_19
:= int64(f8
) * int64(f8_19
)
590 f8f9_38
:= int64(f8
) * int64(f9_38
)
591 f9f9_38
:= int64(f9
) * int64(f9_38
)
592 h0
:= f0f0
+ f1f9_76
+ f2f8_38
+ f3f7_76
+ f4f6_38
+ f5f5_38
593 h1
:= f0f1_2
+ f2f9_38
+ f3f8_38
+ f4f7_38
+ f5f6_38
594 h2
:= f0f2_2
+ f1f1_2
+ f3f9_76
+ f4f8_38
+ f5f7_76
+ f6f6_19
595 h3
:= f0f3_2
+ f1f2_2
+ f4f9_38
+ f5f8_38
+ f6f7_38
596 h4
:= f0f4_2
+ f1f3_4
+ f2f2
+ f5f9_76
+ f6f8_38
+ f7f7_38
597 h5
:= f0f5_2
+ f1f4_2
+ f2f3_2
+ f6f9_38
+ f7f8_38
598 h6
:= f0f6_2
+ f1f5_4
+ f2f4_2
+ f3f3_2
+ f7f9_76
+ f8f8_19
599 h7
:= f0f7_2
+ f1f6_2
+ f2f5_2
+ f3f4_2
+ f8f9_38
600 h8
:= f0f8_2
+ f1f7_4
+ f2f6_2
+ f3f5_4
+ f4f4
+ f9f9_38
601 h9
:= f0f9_2
+ f1f8_2
+ f2f7_2
+ f3f6_2
+ f4f5_2
604 carry
[0] = (h0
+ (1 << 25)) >> 26
607 carry
[4] = (h4
+ (1 << 25)) >> 26
611 carry
[1] = (h1
+ (1 << 24)) >> 25
614 carry
[5] = (h5
+ (1 << 24)) >> 25
618 carry
[2] = (h2
+ (1 << 25)) >> 26
621 carry
[6] = (h6
+ (1 << 25)) >> 26
625 carry
[3] = (h3
+ (1 << 24)) >> 25
628 carry
[7] = (h7
+ (1 << 24)) >> 25
632 carry
[4] = (h4
+ (1 << 25)) >> 26
635 carry
[8] = (h8
+ (1 << 25)) >> 26
639 carry
[9] = (h9
+ (1 << 24)) >> 25
643 carry
[0] = (h0
+ (1 << 25)) >> 26
659 // feMul121666 calculates h = f * 121666. Can overlap h with f.
662 // |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
665 // |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
666 func feMul121666(h
, f
*fieldElement
) {
667 h0
:= int64(f
[0]) * 121666
668 h1
:= int64(f
[1]) * 121666
669 h2
:= int64(f
[2]) * 121666
670 h3
:= int64(f
[3]) * 121666
671 h4
:= int64(f
[4]) * 121666
672 h5
:= int64(f
[5]) * 121666
673 h6
:= int64(f
[6]) * 121666
674 h7
:= int64(f
[7]) * 121666
675 h8
:= int64(f
[8]) * 121666
676 h9
:= int64(f
[9]) * 121666
679 carry
[9] = (h9
+ (1 << 24)) >> 25
682 carry
[1] = (h1
+ (1 << 24)) >> 25
685 carry
[3] = (h3
+ (1 << 24)) >> 25
688 carry
[5] = (h5
+ (1 << 24)) >> 25
691 carry
[7] = (h7
+ (1 << 24)) >> 25
695 carry
[0] = (h0
+ (1 << 25)) >> 26
698 carry
[2] = (h2
+ (1 << 25)) >> 26
701 carry
[4] = (h4
+ (1 << 25)) >> 26
704 carry
[6] = (h6
+ (1 << 25)) >> 26
707 carry
[8] = (h8
+ (1 << 25)) >> 26
723 // feInvert sets out = z^-1.
724 func feInvert(out
, z
*fieldElement
) {
725 var t0
, t1
, t2
, t3 fieldElement
729 for i
= 1; i
< 1; i
++ {
733 for i
= 1; i
< 2; i
++ {
739 for i
= 1; i
< 1; i
++ {
744 for i
= 1; i
< 5; i
++ {
749 for i
= 1; i
< 10; i
++ {
754 for i
= 1; i
< 20; i
++ {
759 for i
= 1; i
< 10; i
++ {
764 for i
= 1; i
< 50; i
++ {
769 for i
= 1; i
< 100; i
++ {
774 for i
= 1; i
< 50; i
++ {
779 for i
= 1; i
< 5; i
++ {
785 func scalarMult(out
, in
, base
*[32]byte) {
793 var x1
, x2
, z2
, x3
, z3
, tmp0
, tmp1 fieldElement
794 feFromBytes(&x1
, base
)
800 for pos
:= 254; pos
>= 0; pos
-- {
801 b
:= e
[pos
/8] >> uint(pos
&7)
804 feCSwap(&x2
, &x3
, swap
)
805 feCSwap(&z2
, &z3
, swap
)
808 feSub(&tmp0
, &x3
, &z3
)
809 feSub(&tmp1
, &x2
, &z2
)
812 feMul(&z3
, &tmp0
, &x2
)
813 feMul(&z2
, &z2
, &tmp1
)
814 feSquare(&tmp0
, &tmp1
)
818 feMul(&x2
, &tmp1
, &tmp0
)
819 feSub(&tmp1
, &tmp1
, &tmp0
)
821 feMul121666(&z3
, &tmp1
)
823 feAdd(&tmp0
, &tmp0
, &z3
)
825 feMul(&z2
, &tmp1
, &tmp0
)
828 feCSwap(&x2
, &x3
, swap
)
829 feCSwap(&z2
, &z3
, swap
)