Include ed25519 and sha512 C language implementation.
[brdnet.git] / ed25519 / fe.c
blob33ffa23539f1b0617beffe9224dbe79a01c75bf8
1 #include "fixedint.h"
2 #include "fe.h"
5 /*
6 helper functions
7 */
8 static uint64_t load_3(const unsigned char *in) {
9 uint64_t result;
11 result = (uint64_t) in[0];
12 result |= ((uint64_t) in[1]) << 8;
13 result |= ((uint64_t) in[2]) << 16;
15 return result;
18 static uint64_t load_4(const unsigned char *in) {
19 uint64_t result;
21 result = (uint64_t) in[0];
22 result |= ((uint64_t) in[1]) << 8;
23 result |= ((uint64_t) in[2]) << 16;
24 result |= ((uint64_t) in[3]) << 24;
26 return result;
32 h = 0
35 void fe_0(fe h) {
36 h[0] = 0;
37 h[1] = 0;
38 h[2] = 0;
39 h[3] = 0;
40 h[4] = 0;
41 h[5] = 0;
42 h[6] = 0;
43 h[7] = 0;
44 h[8] = 0;
45 h[9] = 0;
51 h = 1
54 void fe_1(fe h) {
55 h[0] = 1;
56 h[1] = 0;
57 h[2] = 0;
58 h[3] = 0;
59 h[4] = 0;
60 h[5] = 0;
61 h[6] = 0;
62 h[7] = 0;
63 h[8] = 0;
64 h[9] = 0;
70 h = f + g
71 Can overlap h with f or g.
73 Preconditions:
74 |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
75 |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
77 Postconditions:
78 |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
81 void fe_add(fe h, const fe f, const fe g) {
82 int32_t f0 = f[0];
83 int32_t f1 = f[1];
84 int32_t f2 = f[2];
85 int32_t f3 = f[3];
86 int32_t f4 = f[4];
87 int32_t f5 = f[5];
88 int32_t f6 = f[6];
89 int32_t f7 = f[7];
90 int32_t f8 = f[8];
91 int32_t f9 = f[9];
92 int32_t g0 = g[0];
93 int32_t g1 = g[1];
94 int32_t g2 = g[2];
95 int32_t g3 = g[3];
96 int32_t g4 = g[4];
97 int32_t g5 = g[5];
98 int32_t g6 = g[6];
99 int32_t g7 = g[7];
100 int32_t g8 = g[8];
101 int32_t g9 = g[9];
102 int32_t h0 = f0 + g0;
103 int32_t h1 = f1 + g1;
104 int32_t h2 = f2 + g2;
105 int32_t h3 = f3 + g3;
106 int32_t h4 = f4 + g4;
107 int32_t h5 = f5 + g5;
108 int32_t h6 = f6 + g6;
109 int32_t h7 = f7 + g7;
110 int32_t h8 = f8 + g8;
111 int32_t h9 = f9 + g9;
113 h[0] = h0;
114 h[1] = h1;
115 h[2] = h2;
116 h[3] = h3;
117 h[4] = h4;
118 h[5] = h5;
119 h[6] = h6;
120 h[7] = h7;
121 h[8] = h8;
122 h[9] = h9;
128 Replace (f,g) with (g,g) if b == 1;
129 replace (f,g) with (f,g) if b == 0.
131 Preconditions: b in {0,1}.
134 void fe_cmov(fe f, const fe g, unsigned int b) {
135 int32_t f0 = f[0];
136 int32_t f1 = f[1];
137 int32_t f2 = f[2];
138 int32_t f3 = f[3];
139 int32_t f4 = f[4];
140 int32_t f5 = f[5];
141 int32_t f6 = f[6];
142 int32_t f7 = f[7];
143 int32_t f8 = f[8];
144 int32_t f9 = f[9];
145 int32_t g0 = g[0];
146 int32_t g1 = g[1];
147 int32_t g2 = g[2];
148 int32_t g3 = g[3];
149 int32_t g4 = g[4];
150 int32_t g5 = g[5];
151 int32_t g6 = g[6];
152 int32_t g7 = g[7];
153 int32_t g8 = g[8];
154 int32_t g9 = g[9];
155 int32_t x0 = f0 ^ g0;
156 int32_t x1 = f1 ^ g1;
157 int32_t x2 = f2 ^ g2;
158 int32_t x3 = f3 ^ g3;
159 int32_t x4 = f4 ^ g4;
160 int32_t x5 = f5 ^ g5;
161 int32_t x6 = f6 ^ g6;
162 int32_t x7 = f7 ^ g7;
163 int32_t x8 = f8 ^ g8;
164 int32_t x9 = f9 ^ g9;
166 b = (unsigned int) (- (int) b); /* silence warning */
167 x0 &= b;
168 x1 &= b;
169 x2 &= b;
170 x3 &= b;
171 x4 &= b;
172 x5 &= b;
173 x6 &= b;
174 x7 &= b;
175 x8 &= b;
176 x9 &= b;
178 f[0] = f0 ^ x0;
179 f[1] = f1 ^ x1;
180 f[2] = f2 ^ x2;
181 f[3] = f3 ^ x3;
182 f[4] = f4 ^ x4;
183 f[5] = f5 ^ x5;
184 f[6] = f6 ^ x6;
185 f[7] = f7 ^ x7;
186 f[8] = f8 ^ x8;
187 f[9] = f9 ^ x9;
191 Replace (f,g) with (g,f) if b == 1;
192 replace (f,g) with (f,g) if b == 0.
194 Preconditions: b in {0,1}.
197 void fe_cswap(fe f,fe g,unsigned int b) {
198 int32_t f0 = f[0];
199 int32_t f1 = f[1];
200 int32_t f2 = f[2];
201 int32_t f3 = f[3];
202 int32_t f4 = f[4];
203 int32_t f5 = f[5];
204 int32_t f6 = f[6];
205 int32_t f7 = f[7];
206 int32_t f8 = f[8];
207 int32_t f9 = f[9];
208 int32_t g0 = g[0];
209 int32_t g1 = g[1];
210 int32_t g2 = g[2];
211 int32_t g3 = g[3];
212 int32_t g4 = g[4];
213 int32_t g5 = g[5];
214 int32_t g6 = g[6];
215 int32_t g7 = g[7];
216 int32_t g8 = g[8];
217 int32_t g9 = g[9];
218 int32_t x0 = f0 ^ g0;
219 int32_t x1 = f1 ^ g1;
220 int32_t x2 = f2 ^ g2;
221 int32_t x3 = f3 ^ g3;
222 int32_t x4 = f4 ^ g4;
223 int32_t x5 = f5 ^ g5;
224 int32_t x6 = f6 ^ g6;
225 int32_t x7 = f7 ^ g7;
226 int32_t x8 = f8 ^ g8;
227 int32_t x9 = f9 ^ g9;
228 b = (unsigned int) (- (int) b); /* silence warning */
229 x0 &= b;
230 x1 &= b;
231 x2 &= b;
232 x3 &= b;
233 x4 &= b;
234 x5 &= b;
235 x6 &= b;
236 x7 &= b;
237 x8 &= b;
238 x9 &= b;
239 f[0] = f0 ^ x0;
240 f[1] = f1 ^ x1;
241 f[2] = f2 ^ x2;
242 f[3] = f3 ^ x3;
243 f[4] = f4 ^ x4;
244 f[5] = f5 ^ x5;
245 f[6] = f6 ^ x6;
246 f[7] = f7 ^ x7;
247 f[8] = f8 ^ x8;
248 f[9] = f9 ^ x9;
249 g[0] = g0 ^ x0;
250 g[1] = g1 ^ x1;
251 g[2] = g2 ^ x2;
252 g[3] = g3 ^ x3;
253 g[4] = g4 ^ x4;
254 g[5] = g5 ^ x5;
255 g[6] = g6 ^ x6;
256 g[7] = g7 ^ x7;
257 g[8] = g8 ^ x8;
258 g[9] = g9 ^ x9;
264 h = f
267 void fe_copy(fe h, const fe f) {
268 int32_t f0 = f[0];
269 int32_t f1 = f[1];
270 int32_t f2 = f[2];
271 int32_t f3 = f[3];
272 int32_t f4 = f[4];
273 int32_t f5 = f[5];
274 int32_t f6 = f[6];
275 int32_t f7 = f[7];
276 int32_t f8 = f[8];
277 int32_t f9 = f[9];
279 h[0] = f0;
280 h[1] = f1;
281 h[2] = f2;
282 h[3] = f3;
283 h[4] = f4;
284 h[5] = f5;
285 h[6] = f6;
286 h[7] = f7;
287 h[8] = f8;
288 h[9] = f9;
294 Ignores top bit of h.
297 void fe_frombytes(fe h, const unsigned char *s) {
298 int64_t h0 = load_4(s);
299 int64_t h1 = load_3(s + 4) << 6;
300 int64_t h2 = load_3(s + 7) << 5;
301 int64_t h3 = load_3(s + 10) << 3;
302 int64_t h4 = load_3(s + 13) << 2;
303 int64_t h5 = load_4(s + 16);
304 int64_t h6 = load_3(s + 20) << 7;
305 int64_t h7 = load_3(s + 23) << 5;
306 int64_t h8 = load_3(s + 26) << 4;
307 int64_t h9 = (load_3(s + 29) & 8388607) << 2;
308 int64_t carry0;
309 int64_t carry1;
310 int64_t carry2;
311 int64_t carry3;
312 int64_t carry4;
313 int64_t carry5;
314 int64_t carry6;
315 int64_t carry7;
316 int64_t carry8;
317 int64_t carry9;
319 carry9 = (h9 + (int64_t) (1 << 24)) >> 25;
320 h0 += carry9 * 19;
321 h9 -= carry9 << 25;
322 carry1 = (h1 + (int64_t) (1 << 24)) >> 25;
323 h2 += carry1;
324 h1 -= carry1 << 25;
325 carry3 = (h3 + (int64_t) (1 << 24)) >> 25;
326 h4 += carry3;
327 h3 -= carry3 << 25;
328 carry5 = (h5 + (int64_t) (1 << 24)) >> 25;
329 h6 += carry5;
330 h5 -= carry5 << 25;
331 carry7 = (h7 + (int64_t) (1 << 24)) >> 25;
332 h8 += carry7;
333 h7 -= carry7 << 25;
334 carry0 = (h0 + (int64_t) (1 << 25)) >> 26;
335 h1 += carry0;
336 h0 -= carry0 << 26;
337 carry2 = (h2 + (int64_t) (1 << 25)) >> 26;
338 h3 += carry2;
339 h2 -= carry2 << 26;
340 carry4 = (h4 + (int64_t) (1 << 25)) >> 26;
341 h5 += carry4;
342 h4 -= carry4 << 26;
343 carry6 = (h6 + (int64_t) (1 << 25)) >> 26;
344 h7 += carry6;
345 h6 -= carry6 << 26;
346 carry8 = (h8 + (int64_t) (1 << 25)) >> 26;
347 h9 += carry8;
348 h8 -= carry8 << 26;
350 h[0] = (int32_t) h0;
351 h[1] = (int32_t) h1;
352 h[2] = (int32_t) h2;
353 h[3] = (int32_t) h3;
354 h[4] = (int32_t) h4;
355 h[5] = (int32_t) h5;
356 h[6] = (int32_t) h6;
357 h[7] = (int32_t) h7;
358 h[8] = (int32_t) h8;
359 h[9] = (int32_t) h9;
364 void fe_invert(fe out, const fe z) {
365 fe t0;
366 fe t1;
367 fe t2;
368 fe t3;
369 int i;
371 fe_sq(t0, z);
373 for (i = 1; i < 1; ++i) {
374 fe_sq(t0, t0);
377 fe_sq(t1, t0);
379 for (i = 1; i < 2; ++i) {
380 fe_sq(t1, t1);
383 fe_mul(t1, z, t1);
384 fe_mul(t0, t0, t1);
385 fe_sq(t2, t0);
387 for (i = 1; i < 1; ++i) {
388 fe_sq(t2, t2);
391 fe_mul(t1, t1, t2);
392 fe_sq(t2, t1);
394 for (i = 1; i < 5; ++i) {
395 fe_sq(t2, t2);
398 fe_mul(t1, t2, t1);
399 fe_sq(t2, t1);
401 for (i = 1; i < 10; ++i) {
402 fe_sq(t2, t2);
405 fe_mul(t2, t2, t1);
406 fe_sq(t3, t2);
408 for (i = 1; i < 20; ++i) {
409 fe_sq(t3, t3);
412 fe_mul(t2, t3, t2);
413 fe_sq(t2, t2);
415 for (i = 1; i < 10; ++i) {
416 fe_sq(t2, t2);
419 fe_mul(t1, t2, t1);
420 fe_sq(t2, t1);
422 for (i = 1; i < 50; ++i) {
423 fe_sq(t2, t2);
426 fe_mul(t2, t2, t1);
427 fe_sq(t3, t2);
429 for (i = 1; i < 100; ++i) {
430 fe_sq(t3, t3);
433 fe_mul(t2, t3, t2);
434 fe_sq(t2, t2);
436 for (i = 1; i < 50; ++i) {
437 fe_sq(t2, t2);
440 fe_mul(t1, t2, t1);
441 fe_sq(t1, t1);
443 for (i = 1; i < 5; ++i) {
444 fe_sq(t1, t1);
447 fe_mul(out, t1, t0);
453 return 1 if f is in {1,3,5,...,q-2}
454 return 0 if f is in {0,2,4,...,q-1}
456 Preconditions:
457 |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
460 int fe_isnegative(const fe f) {
461 unsigned char s[32];
463 fe_tobytes(s, f);
465 return s[0] & 1;
471 return 1 if f == 0
472 return 0 if f != 0
474 Preconditions:
475 |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
478 int fe_isnonzero(const fe f) {
479 unsigned char s[32];
480 unsigned char r;
482 fe_tobytes(s, f);
484 r = s[0];
485 #define F(i) r |= s[i]
486 F(1);
487 F(2);
488 F(3);
489 F(4);
490 F(5);
491 F(6);
492 F(7);
493 F(8);
494 F(9);
495 F(10);
496 F(11);
497 F(12);
498 F(13);
499 F(14);
500 F(15);
501 F(16);
502 F(17);
503 F(18);
504 F(19);
505 F(20);
506 F(21);
507 F(22);
508 F(23);
509 F(24);
510 F(25);
511 F(26);
512 F(27);
513 F(28);
514 F(29);
515 F(30);
516 F(31);
517 #undef F
519 return r != 0;
525 h = f * g
526 Can overlap h with f or g.
528 Preconditions:
529 |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
530 |g| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
532 Postconditions:
533 |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
537 Notes on implementation strategy:
539 Using schoolbook multiplication.
540 Karatsuba would save a little in some cost models.
542 Most multiplications by 2 and 19 are 32-bit precomputations;
543 cheaper than 64-bit postcomputations.
545 There is one remaining multiplication by 19 in the carry chain;
546 one *19 precomputation can be merged into this,
547 but the resulting data flow is considerably less clean.
549 There are 12 carries below.
550 10 of them are 2-way parallelizable and vectorizable.
551 Can get away with 11 carries, but then data flow is much deeper.
553 With tighter constraints on inputs can squeeze carries into int32.
556 void fe_mul(fe h, const fe f, const fe g) {
557 int32_t f0 = f[0];
558 int32_t f1 = f[1];
559 int32_t f2 = f[2];
560 int32_t f3 = f[3];
561 int32_t f4 = f[4];
562 int32_t f5 = f[5];
563 int32_t f6 = f[6];
564 int32_t f7 = f[7];
565 int32_t f8 = f[8];
566 int32_t f9 = f[9];
567 int32_t g0 = g[0];
568 int32_t g1 = g[1];
569 int32_t g2 = g[2];
570 int32_t g3 = g[3];
571 int32_t g4 = g[4];
572 int32_t g5 = g[5];
573 int32_t g6 = g[6];
574 int32_t g7 = g[7];
575 int32_t g8 = g[8];
576 int32_t g9 = g[9];
577 int32_t g1_19 = 19 * g1; /* 1.959375*2^29 */
578 int32_t g2_19 = 19 * g2; /* 1.959375*2^30; still ok */
579 int32_t g3_19 = 19 * g3;
580 int32_t g4_19 = 19 * g4;
581 int32_t g5_19 = 19 * g5;
582 int32_t g6_19 = 19 * g6;
583 int32_t g7_19 = 19 * g7;
584 int32_t g8_19 = 19 * g8;
585 int32_t g9_19 = 19 * g9;
586 int32_t f1_2 = 2 * f1;
587 int32_t f3_2 = 2 * f3;
588 int32_t f5_2 = 2 * f5;
589 int32_t f7_2 = 2 * f7;
590 int32_t f9_2 = 2 * f9;
591 int64_t f0g0 = f0 * (int64_t) g0;
592 int64_t f0g1 = f0 * (int64_t) g1;
593 int64_t f0g2 = f0 * (int64_t) g2;
594 int64_t f0g3 = f0 * (int64_t) g3;
595 int64_t f0g4 = f0 * (int64_t) g4;
596 int64_t f0g5 = f0 * (int64_t) g5;
597 int64_t f0g6 = f0 * (int64_t) g6;
598 int64_t f0g7 = f0 * (int64_t) g7;
599 int64_t f0g8 = f0 * (int64_t) g8;
600 int64_t f0g9 = f0 * (int64_t) g9;
601 int64_t f1g0 = f1 * (int64_t) g0;
602 int64_t f1g1_2 = f1_2 * (int64_t) g1;
603 int64_t f1g2 = f1 * (int64_t) g2;
604 int64_t f1g3_2 = f1_2 * (int64_t) g3;
605 int64_t f1g4 = f1 * (int64_t) g4;
606 int64_t f1g5_2 = f1_2 * (int64_t) g5;
607 int64_t f1g6 = f1 * (int64_t) g6;
608 int64_t f1g7_2 = f1_2 * (int64_t) g7;
609 int64_t f1g8 = f1 * (int64_t) g8;
610 int64_t f1g9_38 = f1_2 * (int64_t) g9_19;
611 int64_t f2g0 = f2 * (int64_t) g0;
612 int64_t f2g1 = f2 * (int64_t) g1;
613 int64_t f2g2 = f2 * (int64_t) g2;
614 int64_t f2g3 = f2 * (int64_t) g3;
615 int64_t f2g4 = f2 * (int64_t) g4;
616 int64_t f2g5 = f2 * (int64_t) g5;
617 int64_t f2g6 = f2 * (int64_t) g6;
618 int64_t f2g7 = f2 * (int64_t) g7;
619 int64_t f2g8_19 = f2 * (int64_t) g8_19;
620 int64_t f2g9_19 = f2 * (int64_t) g9_19;
621 int64_t f3g0 = f3 * (int64_t) g0;
622 int64_t f3g1_2 = f3_2 * (int64_t) g1;
623 int64_t f3g2 = f3 * (int64_t) g2;
624 int64_t f3g3_2 = f3_2 * (int64_t) g3;
625 int64_t f3g4 = f3 * (int64_t) g4;
626 int64_t f3g5_2 = f3_2 * (int64_t) g5;
627 int64_t f3g6 = f3 * (int64_t) g6;
628 int64_t f3g7_38 = f3_2 * (int64_t) g7_19;
629 int64_t f3g8_19 = f3 * (int64_t) g8_19;
630 int64_t f3g9_38 = f3_2 * (int64_t) g9_19;
631 int64_t f4g0 = f4 * (int64_t) g0;
632 int64_t f4g1 = f4 * (int64_t) g1;
633 int64_t f4g2 = f4 * (int64_t) g2;
634 int64_t f4g3 = f4 * (int64_t) g3;
635 int64_t f4g4 = f4 * (int64_t) g4;
636 int64_t f4g5 = f4 * (int64_t) g5;
637 int64_t f4g6_19 = f4 * (int64_t) g6_19;
638 int64_t f4g7_19 = f4 * (int64_t) g7_19;
639 int64_t f4g8_19 = f4 * (int64_t) g8_19;
640 int64_t f4g9_19 = f4 * (int64_t) g9_19;
641 int64_t f5g0 = f5 * (int64_t) g0;
642 int64_t f5g1_2 = f5_2 * (int64_t) g1;
643 int64_t f5g2 = f5 * (int64_t) g2;
644 int64_t f5g3_2 = f5_2 * (int64_t) g3;
645 int64_t f5g4 = f5 * (int64_t) g4;
646 int64_t f5g5_38 = f5_2 * (int64_t) g5_19;
647 int64_t f5g6_19 = f5 * (int64_t) g6_19;
648 int64_t f5g7_38 = f5_2 * (int64_t) g7_19;
649 int64_t f5g8_19 = f5 * (int64_t) g8_19;
650 int64_t f5g9_38 = f5_2 * (int64_t) g9_19;
651 int64_t f6g0 = f6 * (int64_t) g0;
652 int64_t f6g1 = f6 * (int64_t) g1;
653 int64_t f6g2 = f6 * (int64_t) g2;
654 int64_t f6g3 = f6 * (int64_t) g3;
655 int64_t f6g4_19 = f6 * (int64_t) g4_19;
656 int64_t f6g5_19 = f6 * (int64_t) g5_19;
657 int64_t f6g6_19 = f6 * (int64_t) g6_19;
658 int64_t f6g7_19 = f6 * (int64_t) g7_19;
659 int64_t f6g8_19 = f6 * (int64_t) g8_19;
660 int64_t f6g9_19 = f6 * (int64_t) g9_19;
661 int64_t f7g0 = f7 * (int64_t) g0;
662 int64_t f7g1_2 = f7_2 * (int64_t) g1;
663 int64_t f7g2 = f7 * (int64_t) g2;
664 int64_t f7g3_38 = f7_2 * (int64_t) g3_19;
665 int64_t f7g4_19 = f7 * (int64_t) g4_19;
666 int64_t f7g5_38 = f7_2 * (int64_t) g5_19;
667 int64_t f7g6_19 = f7 * (int64_t) g6_19;
668 int64_t f7g7_38 = f7_2 * (int64_t) g7_19;
669 int64_t f7g8_19 = f7 * (int64_t) g8_19;
670 int64_t f7g9_38 = f7_2 * (int64_t) g9_19;
671 int64_t f8g0 = f8 * (int64_t) g0;
672 int64_t f8g1 = f8 * (int64_t) g1;
673 int64_t f8g2_19 = f8 * (int64_t) g2_19;
674 int64_t f8g3_19 = f8 * (int64_t) g3_19;
675 int64_t f8g4_19 = f8 * (int64_t) g4_19;
676 int64_t f8g5_19 = f8 * (int64_t) g5_19;
677 int64_t f8g6_19 = f8 * (int64_t) g6_19;
678 int64_t f8g7_19 = f8 * (int64_t) g7_19;
679 int64_t f8g8_19 = f8 * (int64_t) g8_19;
680 int64_t f8g9_19 = f8 * (int64_t) g9_19;
681 int64_t f9g0 = f9 * (int64_t) g0;
682 int64_t f9g1_38 = f9_2 * (int64_t) g1_19;
683 int64_t f9g2_19 = f9 * (int64_t) g2_19;
684 int64_t f9g3_38 = f9_2 * (int64_t) g3_19;
685 int64_t f9g4_19 = f9 * (int64_t) g4_19;
686 int64_t f9g5_38 = f9_2 * (int64_t) g5_19;
687 int64_t f9g6_19 = f9 * (int64_t) g6_19;
688 int64_t f9g7_38 = f9_2 * (int64_t) g7_19;
689 int64_t f9g8_19 = f9 * (int64_t) g8_19;
690 int64_t f9g9_38 = f9_2 * (int64_t) g9_19;
691 int64_t h0 = f0g0 + f1g9_38 + f2g8_19 + f3g7_38 + f4g6_19 + f5g5_38 + f6g4_19 + f7g3_38 + f8g2_19 + f9g1_38;
692 int64_t h1 = f0g1 + f1g0 + f2g9_19 + f3g8_19 + f4g7_19 + f5g6_19 + f6g5_19 + f7g4_19 + f8g3_19 + f9g2_19;
693 int64_t h2 = f0g2 + f1g1_2 + f2g0 + f3g9_38 + f4g8_19 + f5g7_38 + f6g6_19 + f7g5_38 + f8g4_19 + f9g3_38;
694 int64_t h3 = f0g3 + f1g2 + f2g1 + f3g0 + f4g9_19 + f5g8_19 + f6g7_19 + f7g6_19 + f8g5_19 + f9g4_19;
695 int64_t h4 = f0g4 + f1g3_2 + f2g2 + f3g1_2 + f4g0 + f5g9_38 + f6g8_19 + f7g7_38 + f8g6_19 + f9g5_38;
696 int64_t h5 = f0g5 + f1g4 + f2g3 + f3g2 + f4g1 + f5g0 + f6g9_19 + f7g8_19 + f8g7_19 + f9g6_19;
697 int64_t h6 = f0g6 + f1g5_2 + f2g4 + f3g3_2 + f4g2 + f5g1_2 + f6g0 + f7g9_38 + f8g8_19 + f9g7_38;
698 int64_t h7 = f0g7 + f1g6 + f2g5 + f3g4 + f4g3 + f5g2 + f6g1 + f7g0 + f8g9_19 + f9g8_19;
699 int64_t h8 = f0g8 + f1g7_2 + f2g6 + f3g5_2 + f4g4 + f5g3_2 + f6g2 + f7g1_2 + f8g0 + f9g9_38;
700 int64_t h9 = f0g9 + f1g8 + f2g7 + f3g6 + f4g5 + f5g4 + f6g3 + f7g2 + f8g1 + f9g0 ;
701 int64_t carry0;
702 int64_t carry1;
703 int64_t carry2;
704 int64_t carry3;
705 int64_t carry4;
706 int64_t carry5;
707 int64_t carry6;
708 int64_t carry7;
709 int64_t carry8;
710 int64_t carry9;
712 carry0 = (h0 + (int64_t) (1 << 25)) >> 26;
713 h1 += carry0;
714 h0 -= carry0 << 26;
715 carry4 = (h4 + (int64_t) (1 << 25)) >> 26;
716 h5 += carry4;
717 h4 -= carry4 << 26;
719 carry1 = (h1 + (int64_t) (1 << 24)) >> 25;
720 h2 += carry1;
721 h1 -= carry1 << 25;
722 carry5 = (h5 + (int64_t) (1 << 24)) >> 25;
723 h6 += carry5;
724 h5 -= carry5 << 25;
726 carry2 = (h2 + (int64_t) (1 << 25)) >> 26;
727 h3 += carry2;
728 h2 -= carry2 << 26;
729 carry6 = (h6 + (int64_t) (1 << 25)) >> 26;
730 h7 += carry6;
731 h6 -= carry6 << 26;
733 carry3 = (h3 + (int64_t) (1 << 24)) >> 25;
734 h4 += carry3;
735 h3 -= carry3 << 25;
736 carry7 = (h7 + (int64_t) (1 << 24)) >> 25;
737 h8 += carry7;
738 h7 -= carry7 << 25;
740 carry4 = (h4 + (int64_t) (1 << 25)) >> 26;
741 h5 += carry4;
742 h4 -= carry4 << 26;
743 carry8 = (h8 + (int64_t) (1 << 25)) >> 26;
744 h9 += carry8;
745 h8 -= carry8 << 26;
747 carry9 = (h9 + (int64_t) (1 << 24)) >> 25;
748 h0 += carry9 * 19;
749 h9 -= carry9 << 25;
751 carry0 = (h0 + (int64_t) (1 << 25)) >> 26;
752 h1 += carry0;
753 h0 -= carry0 << 26;
755 h[0] = (int32_t) h0;
756 h[1] = (int32_t) h1;
757 h[2] = (int32_t) h2;
758 h[3] = (int32_t) h3;
759 h[4] = (int32_t) h4;
760 h[5] = (int32_t) h5;
761 h[6] = (int32_t) h6;
762 h[7] = (int32_t) h7;
763 h[8] = (int32_t) h8;
764 h[9] = (int32_t) h9;
769 h = f * 121666
770 Can overlap h with f.
772 Preconditions:
773 |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
775 Postconditions:
776 |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
779 void fe_mul121666(fe h, fe f) {
780 int32_t f0 = f[0];
781 int32_t f1 = f[1];
782 int32_t f2 = f[2];
783 int32_t f3 = f[3];
784 int32_t f4 = f[4];
785 int32_t f5 = f[5];
786 int32_t f6 = f[6];
787 int32_t f7 = f[7];
788 int32_t f8 = f[8];
789 int32_t f9 = f[9];
790 int64_t h0 = f0 * (int64_t) 121666;
791 int64_t h1 = f1 * (int64_t) 121666;
792 int64_t h2 = f2 * (int64_t) 121666;
793 int64_t h3 = f3 * (int64_t) 121666;
794 int64_t h4 = f4 * (int64_t) 121666;
795 int64_t h5 = f5 * (int64_t) 121666;
796 int64_t h6 = f6 * (int64_t) 121666;
797 int64_t h7 = f7 * (int64_t) 121666;
798 int64_t h8 = f8 * (int64_t) 121666;
799 int64_t h9 = f9 * (int64_t) 121666;
800 int64_t carry0;
801 int64_t carry1;
802 int64_t carry2;
803 int64_t carry3;
804 int64_t carry4;
805 int64_t carry5;
806 int64_t carry6;
807 int64_t carry7;
808 int64_t carry8;
809 int64_t carry9;
811 carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
812 carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
813 carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
814 carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
815 carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
817 carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
818 carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
819 carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
820 carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
821 carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
823 h[0] = h0;
824 h[1] = h1;
825 h[2] = h2;
826 h[3] = h3;
827 h[4] = h4;
828 h[5] = h5;
829 h[6] = h6;
830 h[7] = h7;
831 h[8] = h8;
832 h[9] = h9;
837 h = -f
839 Preconditions:
840 |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
842 Postconditions:
843 |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
846 void fe_neg(fe h, const fe f) {
847 int32_t f0 = f[0];
848 int32_t f1 = f[1];
849 int32_t f2 = f[2];
850 int32_t f3 = f[3];
851 int32_t f4 = f[4];
852 int32_t f5 = f[5];
853 int32_t f6 = f[6];
854 int32_t f7 = f[7];
855 int32_t f8 = f[8];
856 int32_t f9 = f[9];
857 int32_t h0 = -f0;
858 int32_t h1 = -f1;
859 int32_t h2 = -f2;
860 int32_t h3 = -f3;
861 int32_t h4 = -f4;
862 int32_t h5 = -f5;
863 int32_t h6 = -f6;
864 int32_t h7 = -f7;
865 int32_t h8 = -f8;
866 int32_t h9 = -f9;
868 h[0] = h0;
869 h[1] = h1;
870 h[2] = h2;
871 h[3] = h3;
872 h[4] = h4;
873 h[5] = h5;
874 h[6] = h6;
875 h[7] = h7;
876 h[8] = h8;
877 h[9] = h9;
881 void fe_pow22523(fe out, const fe z) {
882 fe t0;
883 fe t1;
884 fe t2;
885 int i;
886 fe_sq(t0, z);
888 for (i = 1; i < 1; ++i) {
889 fe_sq(t0, t0);
892 fe_sq(t1, t0);
894 for (i = 1; i < 2; ++i) {
895 fe_sq(t1, t1);
898 fe_mul(t1, z, t1);
899 fe_mul(t0, t0, t1);
900 fe_sq(t0, t0);
902 for (i = 1; i < 1; ++i) {
903 fe_sq(t0, t0);
906 fe_mul(t0, t1, t0);
907 fe_sq(t1, t0);
909 for (i = 1; i < 5; ++i) {
910 fe_sq(t1, t1);
913 fe_mul(t0, t1, t0);
914 fe_sq(t1, t0);
916 for (i = 1; i < 10; ++i) {
917 fe_sq(t1, t1);
920 fe_mul(t1, t1, t0);
921 fe_sq(t2, t1);
923 for (i = 1; i < 20; ++i) {
924 fe_sq(t2, t2);
927 fe_mul(t1, t2, t1);
928 fe_sq(t1, t1);
930 for (i = 1; i < 10; ++i) {
931 fe_sq(t1, t1);
934 fe_mul(t0, t1, t0);
935 fe_sq(t1, t0);
937 for (i = 1; i < 50; ++i) {
938 fe_sq(t1, t1);
941 fe_mul(t1, t1, t0);
942 fe_sq(t2, t1);
944 for (i = 1; i < 100; ++i) {
945 fe_sq(t2, t2);
948 fe_mul(t1, t2, t1);
949 fe_sq(t1, t1);
951 for (i = 1; i < 50; ++i) {
952 fe_sq(t1, t1);
955 fe_mul(t0, t1, t0);
956 fe_sq(t0, t0);
958 for (i = 1; i < 2; ++i) {
959 fe_sq(t0, t0);
962 fe_mul(out, t0, z);
963 return;
968 h = f * f
969 Can overlap h with f.
971 Preconditions:
972 |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
974 Postconditions:
975 |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
979 See fe_mul.c for discussion of implementation strategy.
982 void fe_sq(fe h, const fe f) {
983 int32_t f0 = f[0];
984 int32_t f1 = f[1];
985 int32_t f2 = f[2];
986 int32_t f3 = f[3];
987 int32_t f4 = f[4];
988 int32_t f5 = f[5];
989 int32_t f6 = f[6];
990 int32_t f7 = f[7];
991 int32_t f8 = f[8];
992 int32_t f9 = f[9];
993 int32_t f0_2 = 2 * f0;
994 int32_t f1_2 = 2 * f1;
995 int32_t f2_2 = 2 * f2;
996 int32_t f3_2 = 2 * f3;
997 int32_t f4_2 = 2 * f4;
998 int32_t f5_2 = 2 * f5;
999 int32_t f6_2 = 2 * f6;
1000 int32_t f7_2 = 2 * f7;
1001 int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
1002 int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
1003 int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
1004 int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
1005 int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
1006 int64_t f0f0 = f0 * (int64_t) f0;
1007 int64_t f0f1_2 = f0_2 * (int64_t) f1;
1008 int64_t f0f2_2 = f0_2 * (int64_t) f2;
1009 int64_t f0f3_2 = f0_2 * (int64_t) f3;
1010 int64_t f0f4_2 = f0_2 * (int64_t) f4;
1011 int64_t f0f5_2 = f0_2 * (int64_t) f5;
1012 int64_t f0f6_2 = f0_2 * (int64_t) f6;
1013 int64_t f0f7_2 = f0_2 * (int64_t) f7;
1014 int64_t f0f8_2 = f0_2 * (int64_t) f8;
1015 int64_t f0f9_2 = f0_2 * (int64_t) f9;
1016 int64_t f1f1_2 = f1_2 * (int64_t) f1;
1017 int64_t f1f2_2 = f1_2 * (int64_t) f2;
1018 int64_t f1f3_4 = f1_2 * (int64_t) f3_2;
1019 int64_t f1f4_2 = f1_2 * (int64_t) f4;
1020 int64_t f1f5_4 = f1_2 * (int64_t) f5_2;
1021 int64_t f1f6_2 = f1_2 * (int64_t) f6;
1022 int64_t f1f7_4 = f1_2 * (int64_t) f7_2;
1023 int64_t f1f8_2 = f1_2 * (int64_t) f8;
1024 int64_t f1f9_76 = f1_2 * (int64_t) f9_38;
1025 int64_t f2f2 = f2 * (int64_t) f2;
1026 int64_t f2f3_2 = f2_2 * (int64_t) f3;
1027 int64_t f2f4_2 = f2_2 * (int64_t) f4;
1028 int64_t f2f5_2 = f2_2 * (int64_t) f5;
1029 int64_t f2f6_2 = f2_2 * (int64_t) f6;
1030 int64_t f2f7_2 = f2_2 * (int64_t) f7;
1031 int64_t f2f8_38 = f2_2 * (int64_t) f8_19;
1032 int64_t f2f9_38 = f2 * (int64_t) f9_38;
1033 int64_t f3f3_2 = f3_2 * (int64_t) f3;
1034 int64_t f3f4_2 = f3_2 * (int64_t) f4;
1035 int64_t f3f5_4 = f3_2 * (int64_t) f5_2;
1036 int64_t f3f6_2 = f3_2 * (int64_t) f6;
1037 int64_t f3f7_76 = f3_2 * (int64_t) f7_38;
1038 int64_t f3f8_38 = f3_2 * (int64_t) f8_19;
1039 int64_t f3f9_76 = f3_2 * (int64_t) f9_38;
1040 int64_t f4f4 = f4 * (int64_t) f4;
1041 int64_t f4f5_2 = f4_2 * (int64_t) f5;
1042 int64_t f4f6_38 = f4_2 * (int64_t) f6_19;
1043 int64_t f4f7_38 = f4 * (int64_t) f7_38;
1044 int64_t f4f8_38 = f4_2 * (int64_t) f8_19;
1045 int64_t f4f9_38 = f4 * (int64_t) f9_38;
1046 int64_t f5f5_38 = f5 * (int64_t) f5_38;
1047 int64_t f5f6_38 = f5_2 * (int64_t) f6_19;
1048 int64_t f5f7_76 = f5_2 * (int64_t) f7_38;
1049 int64_t f5f8_38 = f5_2 * (int64_t) f8_19;
1050 int64_t f5f9_76 = f5_2 * (int64_t) f9_38;
1051 int64_t f6f6_19 = f6 * (int64_t) f6_19;
1052 int64_t f6f7_38 = f6 * (int64_t) f7_38;
1053 int64_t f6f8_38 = f6_2 * (int64_t) f8_19;
1054 int64_t f6f9_38 = f6 * (int64_t) f9_38;
1055 int64_t f7f7_38 = f7 * (int64_t) f7_38;
1056 int64_t f7f8_38 = f7_2 * (int64_t) f8_19;
1057 int64_t f7f9_76 = f7_2 * (int64_t) f9_38;
1058 int64_t f8f8_19 = f8 * (int64_t) f8_19;
1059 int64_t f8f9_38 = f8 * (int64_t) f9_38;
1060 int64_t f9f9_38 = f9 * (int64_t) f9_38;
1061 int64_t h0 = f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38;
1062 int64_t h1 = f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38;
1063 int64_t h2 = f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19;
1064 int64_t h3 = f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38;
1065 int64_t h4 = f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38;
1066 int64_t h5 = f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38;
1067 int64_t h6 = f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19;
1068 int64_t h7 = f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38;
1069 int64_t h8 = f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38;
1070 int64_t h9 = f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2;
1071 int64_t carry0;
1072 int64_t carry1;
1073 int64_t carry2;
1074 int64_t carry3;
1075 int64_t carry4;
1076 int64_t carry5;
1077 int64_t carry6;
1078 int64_t carry7;
1079 int64_t carry8;
1080 int64_t carry9;
1081 carry0 = (h0 + (int64_t) (1 << 25)) >> 26;
1082 h1 += carry0;
1083 h0 -= carry0 << 26;
1084 carry4 = (h4 + (int64_t) (1 << 25)) >> 26;
1085 h5 += carry4;
1086 h4 -= carry4 << 26;
1087 carry1 = (h1 + (int64_t) (1 << 24)) >> 25;
1088 h2 += carry1;
1089 h1 -= carry1 << 25;
1090 carry5 = (h5 + (int64_t) (1 << 24)) >> 25;
1091 h6 += carry5;
1092 h5 -= carry5 << 25;
1093 carry2 = (h2 + (int64_t) (1 << 25)) >> 26;
1094 h3 += carry2;
1095 h2 -= carry2 << 26;
1096 carry6 = (h6 + (int64_t) (1 << 25)) >> 26;
1097 h7 += carry6;
1098 h6 -= carry6 << 26;
1099 carry3 = (h3 + (int64_t) (1 << 24)) >> 25;
1100 h4 += carry3;
1101 h3 -= carry3 << 25;
1102 carry7 = (h7 + (int64_t) (1 << 24)) >> 25;
1103 h8 += carry7;
1104 h7 -= carry7 << 25;
1105 carry4 = (h4 + (int64_t) (1 << 25)) >> 26;
1106 h5 += carry4;
1107 h4 -= carry4 << 26;
1108 carry8 = (h8 + (int64_t) (1 << 25)) >> 26;
1109 h9 += carry8;
1110 h8 -= carry8 << 26;
1111 carry9 = (h9 + (int64_t) (1 << 24)) >> 25;
1112 h0 += carry9 * 19;
1113 h9 -= carry9 << 25;
1114 carry0 = (h0 + (int64_t) (1 << 25)) >> 26;
1115 h1 += carry0;
1116 h0 -= carry0 << 26;
1117 h[0] = (int32_t) h0;
1118 h[1] = (int32_t) h1;
1119 h[2] = (int32_t) h2;
1120 h[3] = (int32_t) h3;
1121 h[4] = (int32_t) h4;
1122 h[5] = (int32_t) h5;
1123 h[6] = (int32_t) h6;
1124 h[7] = (int32_t) h7;
1125 h[8] = (int32_t) h8;
1126 h[9] = (int32_t) h9;
1131 h = 2 * f * f
1132 Can overlap h with f.
1134 Preconditions:
1135 |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
1137 Postconditions:
1138 |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
1142 See fe_mul.c for discussion of implementation strategy.
1145 void fe_sq2(fe h, const fe f) {
1146 int32_t f0 = f[0];
1147 int32_t f1 = f[1];
1148 int32_t f2 = f[2];
1149 int32_t f3 = f[3];
1150 int32_t f4 = f[4];
1151 int32_t f5 = f[5];
1152 int32_t f6 = f[6];
1153 int32_t f7 = f[7];
1154 int32_t f8 = f[8];
1155 int32_t f9 = f[9];
1156 int32_t f0_2 = 2 * f0;
1157 int32_t f1_2 = 2 * f1;
1158 int32_t f2_2 = 2 * f2;
1159 int32_t f3_2 = 2 * f3;
1160 int32_t f4_2 = 2 * f4;
1161 int32_t f5_2 = 2 * f5;
1162 int32_t f6_2 = 2 * f6;
1163 int32_t f7_2 = 2 * f7;
1164 int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
1165 int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
1166 int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
1167 int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
1168 int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
1169 int64_t f0f0 = f0 * (int64_t) f0;
1170 int64_t f0f1_2 = f0_2 * (int64_t) f1;
1171 int64_t f0f2_2 = f0_2 * (int64_t) f2;
1172 int64_t f0f3_2 = f0_2 * (int64_t) f3;
1173 int64_t f0f4_2 = f0_2 * (int64_t) f4;
1174 int64_t f0f5_2 = f0_2 * (int64_t) f5;
1175 int64_t f0f6_2 = f0_2 * (int64_t) f6;
1176 int64_t f0f7_2 = f0_2 * (int64_t) f7;
1177 int64_t f0f8_2 = f0_2 * (int64_t) f8;
1178 int64_t f0f9_2 = f0_2 * (int64_t) f9;
1179 int64_t f1f1_2 = f1_2 * (int64_t) f1;
1180 int64_t f1f2_2 = f1_2 * (int64_t) f2;
1181 int64_t f1f3_4 = f1_2 * (int64_t) f3_2;
1182 int64_t f1f4_2 = f1_2 * (int64_t) f4;
1183 int64_t f1f5_4 = f1_2 * (int64_t) f5_2;
1184 int64_t f1f6_2 = f1_2 * (int64_t) f6;
1185 int64_t f1f7_4 = f1_2 * (int64_t) f7_2;
1186 int64_t f1f8_2 = f1_2 * (int64_t) f8;
1187 int64_t f1f9_76 = f1_2 * (int64_t) f9_38;
1188 int64_t f2f2 = f2 * (int64_t) f2;
1189 int64_t f2f3_2 = f2_2 * (int64_t) f3;
1190 int64_t f2f4_2 = f2_2 * (int64_t) f4;
1191 int64_t f2f5_2 = f2_2 * (int64_t) f5;
1192 int64_t f2f6_2 = f2_2 * (int64_t) f6;
1193 int64_t f2f7_2 = f2_2 * (int64_t) f7;
1194 int64_t f2f8_38 = f2_2 * (int64_t) f8_19;
1195 int64_t f2f9_38 = f2 * (int64_t) f9_38;
1196 int64_t f3f3_2 = f3_2 * (int64_t) f3;
1197 int64_t f3f4_2 = f3_2 * (int64_t) f4;
1198 int64_t f3f5_4 = f3_2 * (int64_t) f5_2;
1199 int64_t f3f6_2 = f3_2 * (int64_t) f6;
1200 int64_t f3f7_76 = f3_2 * (int64_t) f7_38;
1201 int64_t f3f8_38 = f3_2 * (int64_t) f8_19;
1202 int64_t f3f9_76 = f3_2 * (int64_t) f9_38;
1203 int64_t f4f4 = f4 * (int64_t) f4;
1204 int64_t f4f5_2 = f4_2 * (int64_t) f5;
1205 int64_t f4f6_38 = f4_2 * (int64_t) f6_19;
1206 int64_t f4f7_38 = f4 * (int64_t) f7_38;
1207 int64_t f4f8_38 = f4_2 * (int64_t) f8_19;
1208 int64_t f4f9_38 = f4 * (int64_t) f9_38;
1209 int64_t f5f5_38 = f5 * (int64_t) f5_38;
1210 int64_t f5f6_38 = f5_2 * (int64_t) f6_19;
1211 int64_t f5f7_76 = f5_2 * (int64_t) f7_38;
1212 int64_t f5f8_38 = f5_2 * (int64_t) f8_19;
1213 int64_t f5f9_76 = f5_2 * (int64_t) f9_38;
1214 int64_t f6f6_19 = f6 * (int64_t) f6_19;
1215 int64_t f6f7_38 = f6 * (int64_t) f7_38;
1216 int64_t f6f8_38 = f6_2 * (int64_t) f8_19;
1217 int64_t f6f9_38 = f6 * (int64_t) f9_38;
1218 int64_t f7f7_38 = f7 * (int64_t) f7_38;
1219 int64_t f7f8_38 = f7_2 * (int64_t) f8_19;
1220 int64_t f7f9_76 = f7_2 * (int64_t) f9_38;
1221 int64_t f8f8_19 = f8 * (int64_t) f8_19;
1222 int64_t f8f9_38 = f8 * (int64_t) f9_38;
1223 int64_t f9f9_38 = f9 * (int64_t) f9_38;
1224 int64_t h0 = f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38;
1225 int64_t h1 = f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38;
1226 int64_t h2 = f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19;
1227 int64_t h3 = f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38;
1228 int64_t h4 = f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38;
1229 int64_t h5 = f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38;
1230 int64_t h6 = f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19;
1231 int64_t h7 = f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38;
1232 int64_t h8 = f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38;
1233 int64_t h9 = f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2;
1234 int64_t carry0;
1235 int64_t carry1;
1236 int64_t carry2;
1237 int64_t carry3;
1238 int64_t carry4;
1239 int64_t carry5;
1240 int64_t carry6;
1241 int64_t carry7;
1242 int64_t carry8;
1243 int64_t carry9;
1244 h0 += h0;
1245 h1 += h1;
1246 h2 += h2;
1247 h3 += h3;
1248 h4 += h4;
1249 h5 += h5;
1250 h6 += h6;
1251 h7 += h7;
1252 h8 += h8;
1253 h9 += h9;
1254 carry0 = (h0 + (int64_t) (1 << 25)) >> 26;
1255 h1 += carry0;
1256 h0 -= carry0 << 26;
1257 carry4 = (h4 + (int64_t) (1 << 25)) >> 26;
1258 h5 += carry4;
1259 h4 -= carry4 << 26;
1260 carry1 = (h1 + (int64_t) (1 << 24)) >> 25;
1261 h2 += carry1;
1262 h1 -= carry1 << 25;
1263 carry5 = (h5 + (int64_t) (1 << 24)) >> 25;
1264 h6 += carry5;
1265 h5 -= carry5 << 25;
1266 carry2 = (h2 + (int64_t) (1 << 25)) >> 26;
1267 h3 += carry2;
1268 h2 -= carry2 << 26;
1269 carry6 = (h6 + (int64_t) (1 << 25)) >> 26;
1270 h7 += carry6;
1271 h6 -= carry6 << 26;
1272 carry3 = (h3 + (int64_t) (1 << 24)) >> 25;
1273 h4 += carry3;
1274 h3 -= carry3 << 25;
1275 carry7 = (h7 + (int64_t) (1 << 24)) >> 25;
1276 h8 += carry7;
1277 h7 -= carry7 << 25;
1278 carry4 = (h4 + (int64_t) (1 << 25)) >> 26;
1279 h5 += carry4;
1280 h4 -= carry4 << 26;
1281 carry8 = (h8 + (int64_t) (1 << 25)) >> 26;
1282 h9 += carry8;
1283 h8 -= carry8 << 26;
1284 carry9 = (h9 + (int64_t) (1 << 24)) >> 25;
1285 h0 += carry9 * 19;
1286 h9 -= carry9 << 25;
1287 carry0 = (h0 + (int64_t) (1 << 25)) >> 26;
1288 h1 += carry0;
1289 h0 -= carry0 << 26;
1290 h[0] = (int32_t) h0;
1291 h[1] = (int32_t) h1;
1292 h[2] = (int32_t) h2;
1293 h[3] = (int32_t) h3;
1294 h[4] = (int32_t) h4;
1295 h[5] = (int32_t) h5;
1296 h[6] = (int32_t) h6;
1297 h[7] = (int32_t) h7;
1298 h[8] = (int32_t) h8;
1299 h[9] = (int32_t) h9;
1304 h = f - g
1305 Can overlap h with f or g.
1307 Preconditions:
1308 |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
1309 |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
1311 Postconditions:
1312 |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
1315 void fe_sub(fe h, const fe f, const fe g) {
1316 int32_t f0 = f[0];
1317 int32_t f1 = f[1];
1318 int32_t f2 = f[2];
1319 int32_t f3 = f[3];
1320 int32_t f4 = f[4];
1321 int32_t f5 = f[5];
1322 int32_t f6 = f[6];
1323 int32_t f7 = f[7];
1324 int32_t f8 = f[8];
1325 int32_t f9 = f[9];
1326 int32_t g0 = g[0];
1327 int32_t g1 = g[1];
1328 int32_t g2 = g[2];
1329 int32_t g3 = g[3];
1330 int32_t g4 = g[4];
1331 int32_t g5 = g[5];
1332 int32_t g6 = g[6];
1333 int32_t g7 = g[7];
1334 int32_t g8 = g[8];
1335 int32_t g9 = g[9];
1336 int32_t h0 = f0 - g0;
1337 int32_t h1 = f1 - g1;
1338 int32_t h2 = f2 - g2;
1339 int32_t h3 = f3 - g3;
1340 int32_t h4 = f4 - g4;
1341 int32_t h5 = f5 - g5;
1342 int32_t h6 = f6 - g6;
1343 int32_t h7 = f7 - g7;
1344 int32_t h8 = f8 - g8;
1345 int32_t h9 = f9 - g9;
1347 h[0] = h0;
1348 h[1] = h1;
1349 h[2] = h2;
1350 h[3] = h3;
1351 h[4] = h4;
1352 h[5] = h5;
1353 h[6] = h6;
1354 h[7] = h7;
1355 h[8] = h8;
1356 h[9] = h9;
1362 Preconditions:
1363 |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
1365 Write p=2^255-19; q=floor(h/p).
1366 Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
1368 Proof:
1369 Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
1370 Also have |h-2^230 h9|<2^231 so |19 2^(-255)(h-2^230 h9)|<1/4.
1372 Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
1373 Then 0<y<1.
1375 Write r=h-pq.
1376 Have 0<=r<=p-1=2^255-20.
1377 Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
1379 Write x=r+19(2^-255)r+y.
1380 Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
1382 Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
1383 so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
1386 void fe_tobytes(unsigned char *s, const fe h) {
1387 int32_t h0 = h[0];
1388 int32_t h1 = h[1];
1389 int32_t h2 = h[2];
1390 int32_t h3 = h[3];
1391 int32_t h4 = h[4];
1392 int32_t h5 = h[5];
1393 int32_t h6 = h[6];
1394 int32_t h7 = h[7];
1395 int32_t h8 = h[8];
1396 int32_t h9 = h[9];
1397 int32_t q;
1398 int32_t carry0;
1399 int32_t carry1;
1400 int32_t carry2;
1401 int32_t carry3;
1402 int32_t carry4;
1403 int32_t carry5;
1404 int32_t carry6;
1405 int32_t carry7;
1406 int32_t carry8;
1407 int32_t carry9;
1408 q = (19 * h9 + (((int32_t) 1) << 24)) >> 25;
1409 q = (h0 + q) >> 26;
1410 q = (h1 + q) >> 25;
1411 q = (h2 + q) >> 26;
1412 q = (h3 + q) >> 25;
1413 q = (h4 + q) >> 26;
1414 q = (h5 + q) >> 25;
1415 q = (h6 + q) >> 26;
1416 q = (h7 + q) >> 25;
1417 q = (h8 + q) >> 26;
1418 q = (h9 + q) >> 25;
1419 /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */
1420 h0 += 19 * q;
1421 /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */
1422 carry0 = h0 >> 26;
1423 h1 += carry0;
1424 h0 -= carry0 << 26;
1425 carry1 = h1 >> 25;
1426 h2 += carry1;
1427 h1 -= carry1 << 25;
1428 carry2 = h2 >> 26;
1429 h3 += carry2;
1430 h2 -= carry2 << 26;
1431 carry3 = h3 >> 25;
1432 h4 += carry3;
1433 h3 -= carry3 << 25;
1434 carry4 = h4 >> 26;
1435 h5 += carry4;
1436 h4 -= carry4 << 26;
1437 carry5 = h5 >> 25;
1438 h6 += carry5;
1439 h5 -= carry5 << 25;
1440 carry6 = h6 >> 26;
1441 h7 += carry6;
1442 h6 -= carry6 << 26;
1443 carry7 = h7 >> 25;
1444 h8 += carry7;
1445 h7 -= carry7 << 25;
1446 carry8 = h8 >> 26;
1447 h9 += carry8;
1448 h8 -= carry8 << 26;
1449 carry9 = h9 >> 25;
1450 h9 -= carry9 << 25;
1452 /* h10 = carry9 */
1454 Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
1455 Have h0+...+2^230 h9 between 0 and 2^255-1;
1456 evidently 2^255 h10-2^255 q = 0.
1457 Goal: Output h0+...+2^230 h9.
1459 s[0] = (unsigned char) (h0 >> 0);
1460 s[1] = (unsigned char) (h0 >> 8);
1461 s[2] = (unsigned char) (h0 >> 16);
1462 s[3] = (unsigned char) ((h0 >> 24) | (h1 << 2));
1463 s[4] = (unsigned char) (h1 >> 6);
1464 s[5] = (unsigned char) (h1 >> 14);
1465 s[6] = (unsigned char) ((h1 >> 22) | (h2 << 3));
1466 s[7] = (unsigned char) (h2 >> 5);
1467 s[8] = (unsigned char) (h2 >> 13);
1468 s[9] = (unsigned char) ((h2 >> 21) | (h3 << 5));
1469 s[10] = (unsigned char) (h3 >> 3);
1470 s[11] = (unsigned char) (h3 >> 11);
1471 s[12] = (unsigned char) ((h3 >> 19) | (h4 << 6));
1472 s[13] = (unsigned char) (h4 >> 2);
1473 s[14] = (unsigned char) (h4 >> 10);
1474 s[15] = (unsigned char) (h4 >> 18);
1475 s[16] = (unsigned char) (h5 >> 0);
1476 s[17] = (unsigned char) (h5 >> 8);
1477 s[18] = (unsigned char) (h5 >> 16);
1478 s[19] = (unsigned char) ((h5 >> 24) | (h6 << 1));
1479 s[20] = (unsigned char) (h6 >> 7);
1480 s[21] = (unsigned char) (h6 >> 15);
1481 s[22] = (unsigned char) ((h6 >> 23) | (h7 << 3));
1482 s[23] = (unsigned char) (h7 >> 5);
1483 s[24] = (unsigned char) (h7 >> 13);
1484 s[25] = (unsigned char) ((h7 >> 21) | (h8 << 4));
1485 s[26] = (unsigned char) (h8 >> 4);
1486 s[27] = (unsigned char) (h8 >> 12);
1487 s[28] = (unsigned char) ((h8 >> 20) | (h9 << 6));
1488 s[29] = (unsigned char) (h9 >> 2);
1489 s[30] = (unsigned char) (h9 >> 10);
1490 s[31] = (unsigned char) (h9 >> 18);