libsodium: Needed for Dnscrypto-proxy Release 1.3.0
[tomato.git] / release / src / router / libsodium / src / libsodium / crypto_onetimeauth / poly1305 / 53 / auth_poly1305_53.c
bloba24698f315a2b855d904fbdda3293d073f3b0701
1 /*
2 20080910
3 D. J. Bernstein
4 Public domain.
5 */
7 #ifdef HAVE_FENV_H
8 # include <fenv.h>
9 #endif
10 #include <stdint.h>
11 #include <stdlib.h>
13 #include "api.h"
14 #include "crypto_onetimeauth_poly1305_53.h"
15 #include "utils.h"
17 #ifdef HAVE_FENV_H
18 # pragma STDC FENV_ACCESS ON
19 #endif
21 typedef uint8_t uchar;
22 typedef int32_t int32;
23 typedef uint32_t uint32;
24 typedef int64_t int64;
25 typedef uint64_t uint64;
27 static const double poly1305_53_constants[] = {
28 0.00000000558793544769287109375 /* alpham80 = 3 2^(-29) */
29 , 24.0 /* alpham48 = 3 2^3 */
30 , 103079215104.0 /* alpham16 = 3 2^35 */
31 , 6755399441055744.0 /* alpha0 = 3 2^51 */
32 , 1770887431076116955136.0 /* alpha18 = 3 2^69 */
33 , 29014219670751100192948224.0 /* alpha32 = 3 2^83 */
34 , 7605903601369376408980219232256.0 /* alpha50 = 3 2^101 */
35 , 124615124604835863084731911901282304.0 /* alpha64 = 3 2^115 */
36 , 32667107224410092492483962313449748299776.0 /* alpha82 = 3 2^133 */
37 , 535217884764734955396857238543560676143529984.0 /* alpha96 = 3 2^147 */
38 , 35076039295941670036888435985190792471742381031424.0 /* alpha112 = 3 2^163 */
39 , 9194973245195333150150082162901855101712434733101613056.0 /* alpha130 = 3 2^181 */
40 , 0.0000000000000000000000000000000000000036734198463196484624023016788195177431833298649127735047148490821200539357960224151611328125 /* scale = 5 2^(-130) */
41 , 6755408030990331.0 /* offset0 = alpha0 + 2^33 - 5 */
42 , 29014256564239239022116864.0 /* offset1 = alpha32 + 2^65 - 2^33 */
43 , 124615283061160854719918951570079744.0 /* offset2 = alpha64 + 2^97 - 2^65 */
44 , 535219245894202480694386063513315216128475136.0 /* offset3 = alpha96 + 2^130 - 2^97 */
45 } ;
47 int crypto_onetimeauth(unsigned char *out,const unsigned char *m,unsigned long long l,const unsigned char *k)
49 register const unsigned char *r = k;
50 register const unsigned char *s = k + 16;
51 double r0high_stack;
52 double r1high_stack;
53 double r1low_stack;
54 double sr1high_stack;
55 double r2low_stack;
56 double sr2high_stack;
57 double r0low_stack;
58 double sr1low_stack;
59 double r2high_stack;
60 double sr2low_stack;
61 double r3high_stack;
62 double sr3high_stack;
63 double r3low_stack;
64 double sr3low_stack;
65 int64 d0;
66 int64 d1;
67 int64 d2;
68 int64 d3;
69 register double scale;
70 register double alpha0;
71 register double alpha32;
72 register double alpha64;
73 register double alpha96;
74 register double alpha130;
75 register double h0;
76 register double h1;
77 register double h2;
78 register double h3;
79 register double h4;
80 register double h5;
81 register double h6;
82 register double h7;
83 register double y7;
84 register double y6;
85 register double y1;
86 register double y0;
87 register double y5;
88 register double y4;
89 register double x7;
90 register double x6;
91 register double x1;
92 register double x0;
93 register double y3;
94 register double y2;
95 register double r3low;
96 register double r0low;
97 register double r3high;
98 register double r0high;
99 register double sr1low;
100 register double x5;
101 register double r3lowx0;
102 register double sr1high;
103 register double x4;
104 register double r0lowx6;
105 register double r1low;
106 register double x3;
107 register double r3highx0;
108 register double r1high;
109 register double x2;
110 register double r0highx6;
111 register double sr2low;
112 register double r0lowx0;
113 register double sr2high;
114 register double sr1lowx6;
115 register double r2low;
116 register double r0highx0;
117 register double r2high;
118 register double sr1highx6;
119 register double sr3low;
120 register double r1lowx0;
121 register double sr3high;
122 register double sr2lowx6;
123 register double r1highx0;
124 register double sr2highx6;
125 register double r2lowx0;
126 register double sr3lowx6;
127 register double r2highx0;
128 register double sr3highx6;
129 register double r1highx4;
130 register double r1lowx4;
131 register double r0highx4;
132 register double r0lowx4;
133 register double sr3highx4;
134 register double sr3lowx4;
135 register double sr2highx4;
136 register double sr2lowx4;
137 register double r0lowx2;
138 register double r0highx2;
139 register double r1lowx2;
140 register double r1highx2;
141 register double r2lowx2;
142 register double r2highx2;
143 register double sr3lowx2;
144 register double sr3highx2;
145 register double z0;
146 register double z1;
147 register double z2;
148 register double z3;
149 register int64 r0;
150 register int64 r1;
151 register int64 r2;
152 register int64 r3;
153 register uint32 r00;
154 register uint32 r01;
155 register uint32 r02;
156 register uint32 r03;
157 register uint32 r10;
158 register uint32 r11;
159 register uint32 r12;
160 register uint32 r13;
161 register uint32 r20;
162 register uint32 r21;
163 register uint32 r22;
164 register uint32 r23;
165 register uint32 r30;
166 register uint32 r31;
167 register uint32 r32;
168 register uint32 r33;
169 register int64 m0;
170 register int64 m1;
171 register int64 m2;
172 register int64 m3;
173 register uint32 m00;
174 register uint32 m01;
175 register uint32 m02;
176 register uint32 m03;
177 register uint32 m10;
178 register uint32 m11;
179 register uint32 m12;
180 register uint32 m13;
181 register uint32 m20;
182 register uint32 m21;
183 register uint32 m22;
184 register uint32 m23;
185 register uint32 m30;
186 register uint32 m31;
187 register uint32 m32;
188 register uint64 m33;
189 register char *constants;
190 register int32 lbelow2;
191 register int32 lbelow3;
192 register int32 lbelow4;
193 register int32 lbelow5;
194 register int32 lbelow6;
195 register int32 lbelow7;
196 register int32 lbelow8;
197 register int32 lbelow9;
198 register int32 lbelow10;
199 register int32 lbelow11;
200 register int32 lbelow12;
201 register int32 lbelow13;
202 register int32 lbelow14;
203 register int32 lbelow15;
204 register double alpham80;
205 register double alpham48;
206 register double alpham16;
207 register double alpha18;
208 register double alpha50;
209 register double alpha82;
210 register double alpha112;
211 register double offset0;
212 register double offset1;
213 register double offset2;
214 register double offset3;
215 register uint32 s00;
216 register uint32 s01;
217 register uint32 s02;
218 register uint32 s03;
219 register uint32 s10;
220 register uint32 s11;
221 register uint32 s12;
222 register uint32 s13;
223 register uint32 s20;
224 register uint32 s21;
225 register uint32 s22;
226 register uint32 s23;
227 register uint32 s30;
228 register uint32 s31;
229 register uint32 s32;
230 register uint32 s33;
231 register uint64 bits32;
232 register uint64 f;
233 register uint64 f0;
234 register uint64 f1;
235 register uint64 f2;
236 register uint64 f3;
237 register uint64 f4;
238 register uint64 g;
239 register uint64 g0;
240 register uint64 g1;
241 register uint64 g2;
242 register uint64 g3;
243 register uint64 g4;
245 #ifdef HAVE_FENV_H
246 const int previous_rounding_mode = fegetround();
247 if (previous_rounding_mode != FE_TONEAREST) {
248 if (fesetround(FE_TONEAREST) != 0) {
249 return -1;
252 #endif
254 r00 = *(uchar *) (r + 0);
255 constants = (char *) &poly1305_53_constants;
257 r01 = *(uchar *) (r + 1);
259 r02 = *(uchar *) (r + 2);
260 r0 = 2151;
262 r03 = *(uchar *) (r + 3); r03 &= 15;
263 r0 <<= 51;
265 r10 = *(uchar *) (r + 4); r10 &= 252;
266 r01 <<= 8;
267 r0 += r00;
269 r11 = *(uchar *) (r + 5);
270 r02 <<= 16;
271 r0 += r01;
273 r12 = *(uchar *) (r + 6);
274 r03 <<= 24;
275 r0 += r02;
277 r13 = *(uchar *) (r + 7); r13 &= 15;
278 r1 = 2215;
279 r0 += r03;
281 d0 = r0;
282 r1 <<= 51;
283 r2 = 2279;
285 r20 = *(uchar *) (r + 8); r20 &= 252;
286 r11 <<= 8;
287 r1 += r10;
289 r21 = *(uchar *) (r + 9);
290 r12 <<= 16;
291 r1 += r11;
293 r22 = *(uchar *) (r + 10);
294 r13 <<= 24;
295 r1 += r12;
297 r23 = *(uchar *) (r + 11); r23 &= 15;
298 r2 <<= 51;
299 r1 += r13;
301 d1 = r1;
302 r21 <<= 8;
303 r2 += r20;
305 r30 = *(uchar *) (r + 12); r30 &= 252;
306 r22 <<= 16;
307 r2 += r21;
309 r31 = *(uchar *) (r + 13);
310 r23 <<= 24;
311 r2 += r22;
313 r32 = *(uchar *) (r + 14);
314 r2 += r23;
315 r3 = 2343;
317 d2 = r2;
318 r3 <<= 51;
319 alpha32 = *(double *) (constants + 40);
321 r33 = *(uchar *) (r + 15); r33 &= 15;
322 r31 <<= 8;
323 r3 += r30;
325 r32 <<= 16;
326 r3 += r31;
328 r33 <<= 24;
329 r3 += r32;
331 r3 += r33;
332 h0 = alpha32 - alpha32;
334 d3 = r3;
335 h1 = alpha32 - alpha32;
337 alpha0 = *(double *) (constants + 24);
338 h2 = alpha32 - alpha32;
340 alpha64 = *(double *) (constants + 56);
341 h3 = alpha32 - alpha32;
343 alpha18 = *(double *) (constants + 32);
344 h4 = alpha32 - alpha32;
346 r0low = *(double *) &d0;
347 h5 = alpha32 - alpha32;
349 r1low = *(double *) &d1;
350 h6 = alpha32 - alpha32;
352 r2low = *(double *) &d2;
353 h7 = alpha32 - alpha32;
355 alpha50 = *(double *) (constants + 48);
356 r0low -= alpha0;
358 alpha82 = *(double *) (constants + 64);
359 r1low -= alpha32;
361 scale = *(double *) (constants + 96);
362 r2low -= alpha64;
364 alpha96 = *(double *) (constants + 72);
365 r0high = r0low + alpha18;
367 r3low = *(double *) &d3;
369 alpham80 = *(double *) (constants + 0);
370 r1high = r1low + alpha50;
371 sr1low = scale * r1low;
373 alpham48 = *(double *) (constants + 8);
374 r2high = r2low + alpha82;
375 sr2low = scale * r2low;
377 r0high -= alpha18;
378 r0high_stack = r0high;
380 r3low -= alpha96;
382 r1high -= alpha50;
383 r1high_stack = r1high;
385 sr1high = sr1low + alpham80;
387 alpha112 = *(double *) (constants + 80);
388 r0low -= r0high;
390 alpham16 = *(double *) (constants + 16);
391 r2high -= alpha82;
392 sr3low = scale * r3low;
394 alpha130 = *(double *) (constants + 88);
395 sr2high = sr2low + alpham48;
397 r1low -= r1high;
398 r1low_stack = r1low;
400 sr1high -= alpham80;
401 sr1high_stack = sr1high;
403 r2low -= r2high;
404 r2low_stack = r2low;
406 sr2high -= alpham48;
407 sr2high_stack = sr2high;
409 r3high = r3low + alpha112;
410 r0low_stack = r0low;
412 sr1low -= sr1high;
413 sr1low_stack = sr1low;
415 sr3high = sr3low + alpham16;
416 r2high_stack = r2high;
418 sr2low -= sr2high;
419 sr2low_stack = sr2low;
421 r3high -= alpha112;
422 r3high_stack = r3high;
425 sr3high -= alpham16;
426 sr3high_stack = sr3high;
429 r3low -= r3high;
430 r3low_stack = r3low;
433 sr3low -= sr3high;
434 sr3low_stack = sr3low;
436 if (l < 16) goto addatmost15bytes;
438 m00 = *(uchar *) (m + 0);
439 m0 = 2151;
441 m0 <<= 51;
442 m1 = 2215;
443 m01 = *(uchar *) (m + 1);
445 m1 <<= 51;
446 m2 = 2279;
447 m02 = *(uchar *) (m + 2);
449 m2 <<= 51;
450 m3 = 2343;
451 m03 = *(uchar *) (m + 3);
453 m10 = *(uchar *) (m + 4);
454 m01 <<= 8;
455 m0 += m00;
457 m11 = *(uchar *) (m + 5);
458 m02 <<= 16;
459 m0 += m01;
461 m12 = *(uchar *) (m + 6);
462 m03 <<= 24;
463 m0 += m02;
465 m13 = *(uchar *) (m + 7);
466 m3 <<= 51;
467 m0 += m03;
469 m20 = *(uchar *) (m + 8);
470 m11 <<= 8;
471 m1 += m10;
473 m21 = *(uchar *) (m + 9);
474 m12 <<= 16;
475 m1 += m11;
477 m22 = *(uchar *) (m + 10);
478 m13 <<= 24;
479 m1 += m12;
481 m23 = *(uchar *) (m + 11);
482 m1 += m13;
484 m30 = *(uchar *) (m + 12);
485 m21 <<= 8;
486 m2 += m20;
488 m31 = *(uchar *) (m + 13);
489 m22 <<= 16;
490 m2 += m21;
492 m32 = *(uchar *) (m + 14);
493 m23 <<= 24;
494 m2 += m22;
496 m33 = *(uchar *) (m + 15);
497 m2 += m23;
499 d0 = m0;
500 m31 <<= 8;
501 m3 += m30;
503 d1 = m1;
504 m32 <<= 16;
505 m3 += m31;
507 d2 = m2;
508 m33 += 256;
510 m33 <<= 24;
511 m3 += m32;
513 m3 += m33;
514 d3 = m3;
516 m += 16;
517 l -= 16;
519 z0 = *(double *) &d0;
521 z1 = *(double *) &d1;
523 z2 = *(double *) &d2;
525 z3 = *(double *) &d3;
527 z0 -= alpha0;
529 z1 -= alpha32;
531 z2 -= alpha64;
533 z3 -= alpha96;
535 h0 += z0;
537 h1 += z1;
539 h3 += z2;
541 h5 += z3;
543 if (l < 16) goto multiplyaddatmost15bytes;
545 multiplyaddatleast16bytes:;
547 m2 = 2279;
548 m20 = *(uchar *) (m + 8);
549 y7 = h7 + alpha130;
551 m2 <<= 51;
552 m3 = 2343;
553 m21 = *(uchar *) (m + 9);
554 y6 = h6 + alpha130;
556 m3 <<= 51;
557 m0 = 2151;
558 m22 = *(uchar *) (m + 10);
559 y1 = h1 + alpha32;
561 m0 <<= 51;
562 m1 = 2215;
563 m23 = *(uchar *) (m + 11);
564 y0 = h0 + alpha32;
566 m1 <<= 51;
567 m30 = *(uchar *) (m + 12);
568 y7 -= alpha130;
570 m21 <<= 8;
571 m2 += m20;
572 m31 = *(uchar *) (m + 13);
573 y6 -= alpha130;
575 m22 <<= 16;
576 m2 += m21;
577 m32 = *(uchar *) (m + 14);
578 y1 -= alpha32;
580 m23 <<= 24;
581 m2 += m22;
582 m33 = *(uchar *) (m + 15);
583 y0 -= alpha32;
585 m2 += m23;
586 m00 = *(uchar *) (m + 0);
587 y5 = h5 + alpha96;
589 m31 <<= 8;
590 m3 += m30;
591 m01 = *(uchar *) (m + 1);
592 y4 = h4 + alpha96;
594 m32 <<= 16;
595 m02 = *(uchar *) (m + 2);
596 x7 = h7 - y7;
597 y7 *= scale;
599 m33 += 256;
600 m03 = *(uchar *) (m + 3);
601 x6 = h6 - y6;
602 y6 *= scale;
604 m33 <<= 24;
605 m3 += m31;
606 m10 = *(uchar *) (m + 4);
607 x1 = h1 - y1;
609 m01 <<= 8;
610 m3 += m32;
611 m11 = *(uchar *) (m + 5);
612 x0 = h0 - y0;
614 m3 += m33;
615 m0 += m00;
616 m12 = *(uchar *) (m + 6);
617 y5 -= alpha96;
619 m02 <<= 16;
620 m0 += m01;
621 m13 = *(uchar *) (m + 7);
622 y4 -= alpha96;
624 m03 <<= 24;
625 m0 += m02;
626 d2 = m2;
627 x1 += y7;
629 m0 += m03;
630 d3 = m3;
631 x0 += y6;
633 m11 <<= 8;
634 m1 += m10;
635 d0 = m0;
636 x7 += y5;
638 m12 <<= 16;
639 m1 += m11;
640 x6 += y4;
642 m13 <<= 24;
643 m1 += m12;
644 y3 = h3 + alpha64;
646 m1 += m13;
647 d1 = m1;
648 y2 = h2 + alpha64;
650 x0 += x1;
652 x6 += x7;
654 y3 -= alpha64;
655 r3low = r3low_stack;
657 y2 -= alpha64;
658 r0low = r0low_stack;
660 x5 = h5 - y5;
661 r3lowx0 = r3low * x0;
662 r3high = r3high_stack;
664 x4 = h4 - y4;
665 r0lowx6 = r0low * x6;
666 r0high = r0high_stack;
668 x3 = h3 - y3;
669 r3highx0 = r3high * x0;
670 sr1low = sr1low_stack;
672 x2 = h2 - y2;
673 r0highx6 = r0high * x6;
674 sr1high = sr1high_stack;
676 x5 += y3;
677 r0lowx0 = r0low * x0;
678 r1low = r1low_stack;
680 h6 = r3lowx0 + r0lowx6;
681 sr1lowx6 = sr1low * x6;
682 r1high = r1high_stack;
684 x4 += y2;
685 r0highx0 = r0high * x0;
686 sr2low = sr2low_stack;
688 h7 = r3highx0 + r0highx6;
689 sr1highx6 = sr1high * x6;
690 sr2high = sr2high_stack;
692 x3 += y1;
693 r1lowx0 = r1low * x0;
694 r2low = r2low_stack;
696 h0 = r0lowx0 + sr1lowx6;
697 sr2lowx6 = sr2low * x6;
698 r2high = r2high_stack;
700 x2 += y0;
701 r1highx0 = r1high * x0;
702 sr3low = sr3low_stack;
704 h1 = r0highx0 + sr1highx6;
705 sr2highx6 = sr2high * x6;
706 sr3high = sr3high_stack;
708 x4 += x5;
709 r2lowx0 = r2low * x0;
710 z2 = *(double *) &d2;
712 h2 = r1lowx0 + sr2lowx6;
713 sr3lowx6 = sr3low * x6;
715 x2 += x3;
716 r2highx0 = r2high * x0;
717 z3 = *(double *) &d3;
719 h3 = r1highx0 + sr2highx6;
720 sr3highx6 = sr3high * x6;
722 r1highx4 = r1high * x4;
723 z2 -= alpha64;
725 h4 = r2lowx0 + sr3lowx6;
726 r1lowx4 = r1low * x4;
728 r0highx4 = r0high * x4;
729 z3 -= alpha96;
731 h5 = r2highx0 + sr3highx6;
732 r0lowx4 = r0low * x4;
734 h7 += r1highx4;
735 sr3highx4 = sr3high * x4;
737 h6 += r1lowx4;
738 sr3lowx4 = sr3low * x4;
740 h5 += r0highx4;
741 sr2highx4 = sr2high * x4;
743 h4 += r0lowx4;
744 sr2lowx4 = sr2low * x4;
746 h3 += sr3highx4;
747 r0lowx2 = r0low * x2;
749 h2 += sr3lowx4;
750 r0highx2 = r0high * x2;
752 h1 += sr2highx4;
753 r1lowx2 = r1low * x2;
755 h0 += sr2lowx4;
756 r1highx2 = r1high * x2;
758 h2 += r0lowx2;
759 r2lowx2 = r2low * x2;
761 h3 += r0highx2;
762 r2highx2 = r2high * x2;
764 h4 += r1lowx2;
765 sr3lowx2 = sr3low * x2;
767 h5 += r1highx2;
768 sr3highx2 = sr3high * x2;
769 alpha0 = *(double *) (constants + 24);
771 m += 16;
772 h6 += r2lowx2;
774 l -= 16;
775 h7 += r2highx2;
777 z1 = *(double *) &d1;
778 h0 += sr3lowx2;
780 z0 = *(double *) &d0;
781 h1 += sr3highx2;
783 z1 -= alpha32;
785 z0 -= alpha0;
787 h5 += z3;
789 h3 += z2;
791 h1 += z1;
793 h0 += z0;
795 if (l >= 16) goto multiplyaddatleast16bytes;
797 multiplyaddatmost15bytes:;
799 y7 = h7 + alpha130;
801 y6 = h6 + alpha130;
803 y1 = h1 + alpha32;
805 y0 = h0 + alpha32;
807 y7 -= alpha130;
809 y6 -= alpha130;
811 y1 -= alpha32;
813 y0 -= alpha32;
815 y5 = h5 + alpha96;
817 y4 = h4 + alpha96;
819 x7 = h7 - y7;
820 y7 *= scale;
822 x6 = h6 - y6;
823 y6 *= scale;
825 x1 = h1 - y1;
827 x0 = h0 - y0;
829 y5 -= alpha96;
831 y4 -= alpha96;
833 x1 += y7;
835 x0 += y6;
837 x7 += y5;
839 x6 += y4;
841 y3 = h3 + alpha64;
843 y2 = h2 + alpha64;
845 x0 += x1;
847 x6 += x7;
849 y3 -= alpha64;
850 r3low = r3low_stack;
852 y2 -= alpha64;
853 r0low = r0low_stack;
855 x5 = h5 - y5;
856 r3lowx0 = r3low * x0;
857 r3high = r3high_stack;
859 x4 = h4 - y4;
860 r0lowx6 = r0low * x6;
861 r0high = r0high_stack;
863 x3 = h3 - y3;
864 r3highx0 = r3high * x0;
865 sr1low = sr1low_stack;
867 x2 = h2 - y2;
868 r0highx6 = r0high * x6;
869 sr1high = sr1high_stack;
871 x5 += y3;
872 r0lowx0 = r0low * x0;
873 r1low = r1low_stack;
875 h6 = r3lowx0 + r0lowx6;
876 sr1lowx6 = sr1low * x6;
877 r1high = r1high_stack;
879 x4 += y2;
880 r0highx0 = r0high * x0;
881 sr2low = sr2low_stack;
883 h7 = r3highx0 + r0highx6;
884 sr1highx6 = sr1high * x6;
885 sr2high = sr2high_stack;
887 x3 += y1;
888 r1lowx0 = r1low * x0;
889 r2low = r2low_stack;
891 h0 = r0lowx0 + sr1lowx6;
892 sr2lowx6 = sr2low * x6;
893 r2high = r2high_stack;
895 x2 += y0;
896 r1highx0 = r1high * x0;
897 sr3low = sr3low_stack;
899 h1 = r0highx0 + sr1highx6;
900 sr2highx6 = sr2high * x6;
901 sr3high = sr3high_stack;
903 x4 += x5;
904 r2lowx0 = r2low * x0;
906 h2 = r1lowx0 + sr2lowx6;
907 sr3lowx6 = sr3low * x6;
909 x2 += x3;
910 r2highx0 = r2high * x0;
912 h3 = r1highx0 + sr2highx6;
913 sr3highx6 = sr3high * x6;
915 r1highx4 = r1high * x4;
917 h4 = r2lowx0 + sr3lowx6;
918 r1lowx4 = r1low * x4;
920 r0highx4 = r0high * x4;
922 h5 = r2highx0 + sr3highx6;
923 r0lowx4 = r0low * x4;
925 h7 += r1highx4;
926 sr3highx4 = sr3high * x4;
928 h6 += r1lowx4;
929 sr3lowx4 = sr3low * x4;
931 h5 += r0highx4;
932 sr2highx4 = sr2high * x4;
934 h4 += r0lowx4;
935 sr2lowx4 = sr2low * x4;
937 h3 += sr3highx4;
938 r0lowx2 = r0low * x2;
940 h2 += sr3lowx4;
941 r0highx2 = r0high * x2;
943 h1 += sr2highx4;
944 r1lowx2 = r1low * x2;
946 h0 += sr2lowx4;
947 r1highx2 = r1high * x2;
949 h2 += r0lowx2;
950 r2lowx2 = r2low * x2;
952 h3 += r0highx2;
953 r2highx2 = r2high * x2;
955 h4 += r1lowx2;
956 sr3lowx2 = sr3low * x2;
958 h5 += r1highx2;
959 sr3highx2 = sr3high * x2;
961 h6 += r2lowx2;
963 h7 += r2highx2;
965 h0 += sr3lowx2;
967 h1 += sr3highx2;
969 addatmost15bytes:;
971 if (l == 0) goto nomorebytes;
973 lbelow2 = l - 2;
975 lbelow3 = l - 3;
977 lbelow2 >>= 31;
978 lbelow4 = l - 4;
980 m00 = *(uchar *) (m + 0);
981 lbelow3 >>= 31;
982 m += lbelow2;
984 m01 = *(uchar *) (m + 1);
985 lbelow4 >>= 31;
986 m += lbelow3;
988 m02 = *(uchar *) (m + 2);
989 m += lbelow4;
990 m0 = 2151;
992 m03 = *(uchar *) (m + 3);
993 m0 <<= 51;
994 m1 = 2215;
996 m0 += m00;
997 m01 &= ~lbelow2;
999 m02 &= ~lbelow3;
1000 m01 -= lbelow2;
1002 m01 <<= 8;
1003 m03 &= ~lbelow4;
1005 m0 += m01;
1006 lbelow2 -= lbelow3;
1008 m02 += lbelow2;
1009 lbelow3 -= lbelow4;
1011 m02 <<= 16;
1012 m03 += lbelow3;
1014 m03 <<= 24;
1015 m0 += m02;
1017 m0 += m03;
1018 lbelow5 = l - 5;
1020 lbelow6 = l - 6;
1021 lbelow7 = l - 7;
1023 lbelow5 >>= 31;
1024 lbelow8 = l - 8;
1026 lbelow6 >>= 31;
1027 m += lbelow5;
1029 m10 = *(uchar *) (m + 4);
1030 lbelow7 >>= 31;
1031 m += lbelow6;
1033 m11 = *(uchar *) (m + 5);
1034 lbelow8 >>= 31;
1035 m += lbelow7;
1037 m12 = *(uchar *) (m + 6);
1038 m1 <<= 51;
1039 m += lbelow8;
1041 m13 = *(uchar *) (m + 7);
1042 m10 &= ~lbelow5;
1043 lbelow4 -= lbelow5;
1045 m10 += lbelow4;
1046 lbelow5 -= lbelow6;
1048 m11 &= ~lbelow6;
1049 m11 += lbelow5;
1051 m11 <<= 8;
1052 m1 += m10;
1054 m1 += m11;
1055 m12 &= ~lbelow7;
1057 lbelow6 -= lbelow7;
1058 m13 &= ~lbelow8;
1060 m12 += lbelow6;
1061 lbelow7 -= lbelow8;
1063 m12 <<= 16;
1064 m13 += lbelow7;
1066 m13 <<= 24;
1067 m1 += m12;
1069 m1 += m13;
1070 m2 = 2279;
1072 lbelow9 = l - 9;
1073 m3 = 2343;
1075 lbelow10 = l - 10;
1076 lbelow11 = l - 11;
1078 lbelow9 >>= 31;
1079 lbelow12 = l - 12;
1081 lbelow10 >>= 31;
1082 m += lbelow9;
1084 m20 = *(uchar *) (m + 8);
1085 lbelow11 >>= 31;
1086 m += lbelow10;
1088 m21 = *(uchar *) (m + 9);
1089 lbelow12 >>= 31;
1090 m += lbelow11;
1092 m22 = *(uchar *) (m + 10);
1093 m2 <<= 51;
1094 m += lbelow12;
1096 m23 = *(uchar *) (m + 11);
1097 m20 &= ~lbelow9;
1098 lbelow8 -= lbelow9;
1100 m20 += lbelow8;
1101 lbelow9 -= lbelow10;
1103 m21 &= ~lbelow10;
1104 m21 += lbelow9;
1106 m21 <<= 8;
1107 m2 += m20;
1109 m2 += m21;
1110 m22 &= ~lbelow11;
1112 lbelow10 -= lbelow11;
1113 m23 &= ~lbelow12;
1115 m22 += lbelow10;
1116 lbelow11 -= lbelow12;
1118 m22 <<= 16;
1119 m23 += lbelow11;
1121 m23 <<= 24;
1122 m2 += m22;
1124 m3 <<= 51;
1125 lbelow13 = l - 13;
1127 lbelow13 >>= 31;
1128 lbelow14 = l - 14;
1130 lbelow14 >>= 31;
1131 m += lbelow13;
1132 lbelow15 = l - 15;
1134 m30 = *(uchar *) (m + 12);
1135 lbelow15 >>= 31;
1136 m += lbelow14;
1138 m31 = *(uchar *) (m + 13);
1139 m += lbelow15;
1140 m2 += m23;
1142 m32 = *(uchar *) (m + 14);
1143 m30 &= ~lbelow13;
1144 lbelow12 -= lbelow13;
1146 m30 += lbelow12;
1147 lbelow13 -= lbelow14;
1149 m3 += m30;
1150 m31 &= ~lbelow14;
1152 m31 += lbelow13;
1153 m32 &= ~lbelow15;
1155 m31 <<= 8;
1156 lbelow14 -= lbelow15;
1158 m3 += m31;
1159 m32 += lbelow14;
1160 d0 = m0;
1162 m32 <<= 16;
1163 m33 = lbelow15 + 1;
1164 d1 = m1;
1166 m33 <<= 24;
1167 m3 += m32;
1168 d2 = m2;
1170 m3 += m33;
1171 d3 = m3;
1173 alpha0 = *(double *) (constants + 24);
1175 z3 = *(double *) &d3;
1177 z2 = *(double *) &d2;
1179 z1 = *(double *) &d1;
1181 z0 = *(double *) &d0;
1183 z3 -= alpha96;
1185 z2 -= alpha64;
1187 z1 -= alpha32;
1189 z0 -= alpha0;
1191 h5 += z3;
1193 h3 += z2;
1195 h1 += z1;
1197 h0 += z0;
1199 y7 = h7 + alpha130;
1201 y6 = h6 + alpha130;
1203 y1 = h1 + alpha32;
1205 y0 = h0 + alpha32;
1207 y7 -= alpha130;
1209 y6 -= alpha130;
1211 y1 -= alpha32;
1213 y0 -= alpha32;
1215 y5 = h5 + alpha96;
1217 y4 = h4 + alpha96;
1219 x7 = h7 - y7;
1220 y7 *= scale;
1222 x6 = h6 - y6;
1223 y6 *= scale;
1225 x1 = h1 - y1;
1227 x0 = h0 - y0;
1229 y5 -= alpha96;
1231 y4 -= alpha96;
1233 x1 += y7;
1235 x0 += y6;
1237 x7 += y5;
1239 x6 += y4;
1241 y3 = h3 + alpha64;
1243 y2 = h2 + alpha64;
1245 x0 += x1;
1247 x6 += x7;
1249 y3 -= alpha64;
1250 r3low = r3low_stack;
1252 y2 -= alpha64;
1253 r0low = r0low_stack;
1255 x5 = h5 - y5;
1256 r3lowx0 = r3low * x0;
1257 r3high = r3high_stack;
1259 x4 = h4 - y4;
1260 r0lowx6 = r0low * x6;
1261 r0high = r0high_stack;
1263 x3 = h3 - y3;
1264 r3highx0 = r3high * x0;
1265 sr1low = sr1low_stack;
1267 x2 = h2 - y2;
1268 r0highx6 = r0high * x6;
1269 sr1high = sr1high_stack;
1271 x5 += y3;
1272 r0lowx0 = r0low * x0;
1273 r1low = r1low_stack;
1275 h6 = r3lowx0 + r0lowx6;
1276 sr1lowx6 = sr1low * x6;
1277 r1high = r1high_stack;
1279 x4 += y2;
1280 r0highx0 = r0high * x0;
1281 sr2low = sr2low_stack;
1283 h7 = r3highx0 + r0highx6;
1284 sr1highx6 = sr1high * x6;
1285 sr2high = sr2high_stack;
1287 x3 += y1;
1288 r1lowx0 = r1low * x0;
1289 r2low = r2low_stack;
1291 h0 = r0lowx0 + sr1lowx6;
1292 sr2lowx6 = sr2low * x6;
1293 r2high = r2high_stack;
1295 x2 += y0;
1296 r1highx0 = r1high * x0;
1297 sr3low = sr3low_stack;
1299 h1 = r0highx0 + sr1highx6;
1300 sr2highx6 = sr2high * x6;
1301 sr3high = sr3high_stack;
1303 x4 += x5;
1304 r2lowx0 = r2low * x0;
1306 h2 = r1lowx0 + sr2lowx6;
1307 sr3lowx6 = sr3low * x6;
1309 x2 += x3;
1310 r2highx0 = r2high * x0;
1312 h3 = r1highx0 + sr2highx6;
1313 sr3highx6 = sr3high * x6;
1315 r1highx4 = r1high * x4;
1317 h4 = r2lowx0 + sr3lowx6;
1318 r1lowx4 = r1low * x4;
1320 r0highx4 = r0high * x4;
1322 h5 = r2highx0 + sr3highx6;
1323 r0lowx4 = r0low * x4;
1325 h7 += r1highx4;
1326 sr3highx4 = sr3high * x4;
1328 h6 += r1lowx4;
1329 sr3lowx4 = sr3low * x4;
1331 h5 += r0highx4;
1332 sr2highx4 = sr2high * x4;
1334 h4 += r0lowx4;
1335 sr2lowx4 = sr2low * x4;
1337 h3 += sr3highx4;
1338 r0lowx2 = r0low * x2;
1340 h2 += sr3lowx4;
1341 r0highx2 = r0high * x2;
1343 h1 += sr2highx4;
1344 r1lowx2 = r1low * x2;
1346 h0 += sr2lowx4;
1347 r1highx2 = r1high * x2;
1349 h2 += r0lowx2;
1350 r2lowx2 = r2low * x2;
1352 h3 += r0highx2;
1353 r2highx2 = r2high * x2;
1355 h4 += r1lowx2;
1356 sr3lowx2 = sr3low * x2;
1358 h5 += r1highx2;
1359 sr3highx2 = sr3high * x2;
1361 h6 += r2lowx2;
1363 h7 += r2highx2;
1365 h0 += sr3lowx2;
1367 h1 += sr3highx2;
1370 nomorebytes:;
1372 offset0 = *(double *) (constants + 104);
1373 y7 = h7 + alpha130;
1375 offset1 = *(double *) (constants + 112);
1376 y0 = h0 + alpha32;
1378 offset2 = *(double *) (constants + 120);
1379 y1 = h1 + alpha32;
1381 offset3 = *(double *) (constants + 128);
1382 y2 = h2 + alpha64;
1384 y7 -= alpha130;
1386 y3 = h3 + alpha64;
1388 y4 = h4 + alpha96;
1390 y5 = h5 + alpha96;
1392 x7 = h7 - y7;
1393 y7 *= scale;
1395 y0 -= alpha32;
1397 y1 -= alpha32;
1399 y2 -= alpha64;
1401 h6 += x7;
1403 y3 -= alpha64;
1405 y4 -= alpha96;
1407 y5 -= alpha96;
1409 y6 = h6 + alpha130;
1411 x0 = h0 - y0;
1413 x1 = h1 - y1;
1415 x2 = h2 - y2;
1417 y6 -= alpha130;
1419 x0 += y7;
1421 x3 = h3 - y3;
1423 x4 = h4 - y4;
1425 x5 = h5 - y5;
1427 x6 = h6 - y6;
1429 y6 *= scale;
1431 x2 += y0;
1433 x3 += y1;
1435 x4 += y2;
1437 x0 += y6;
1439 x5 += y3;
1441 x6 += y4;
1443 x2 += x3;
1445 x0 += x1;
1447 x4 += x5;
1449 x6 += y5;
1451 x2 += offset1;
1452 *(double *) &d1 = x2;
1454 x0 += offset0;
1455 *(double *) &d0 = x0;
1457 x4 += offset2;
1458 *(double *) &d2 = x4;
1460 x6 += offset3;
1461 *(double *) &d3 = x6;
1466 f0 = d0;
1468 f1 = d1;
1469 bits32 = -1;
1471 f2 = d2;
1472 bits32 >>= 32;
1474 f3 = d3;
1475 f = f0 >> 32;
1477 f0 &= bits32;
1478 f &= 255;
1480 f1 += f;
1481 g0 = f0 + 5;
1483 g = g0 >> 32;
1484 g0 &= bits32;
1486 f = f1 >> 32;
1487 f1 &= bits32;
1489 f &= 255;
1490 g1 = f1 + g;
1492 g = g1 >> 32;
1493 f2 += f;
1495 f = f2 >> 32;
1496 g1 &= bits32;
1498 f2 &= bits32;
1499 f &= 255;
1501 f3 += f;
1502 g2 = f2 + g;
1504 g = g2 >> 32;
1505 g2 &= bits32;
1507 f4 = f3 >> 32;
1508 f3 &= bits32;
1510 f4 &= 255;
1511 g3 = f3 + g;
1513 g = g3 >> 32;
1514 g3 &= bits32;
1516 g4 = f4 + g;
1518 g4 = g4 - 4;
1519 s00 = *(uchar *) (s + 0);
1521 f = (int64) g4 >> 63;
1522 s01 = *(uchar *) (s + 1);
1524 f0 &= f;
1525 g0 &= ~f;
1526 s02 = *(uchar *) (s + 2);
1528 f1 &= f;
1529 f0 |= g0;
1530 s03 = *(uchar *) (s + 3);
1532 g1 &= ~f;
1533 f2 &= f;
1534 s10 = *(uchar *) (s + 4);
1536 f3 &= f;
1537 g2 &= ~f;
1538 s11 = *(uchar *) (s + 5);
1540 g3 &= ~f;
1541 f1 |= g1;
1542 s12 = *(uchar *) (s + 6);
1544 f2 |= g2;
1545 f3 |= g3;
1546 s13 = *(uchar *) (s + 7);
1548 s01 <<= 8;
1549 f0 += s00;
1550 s20 = *(uchar *) (s + 8);
1552 s02 <<= 16;
1553 f0 += s01;
1554 s21 = *(uchar *) (s + 9);
1556 s03 <<= 24;
1557 f0 += s02;
1558 s22 = *(uchar *) (s + 10);
1560 s11 <<= 8;
1561 f1 += s10;
1562 s23 = *(uchar *) (s + 11);
1564 s12 <<= 16;
1565 f1 += s11;
1566 s30 = *(uchar *) (s + 12);
1568 s13 <<= 24;
1569 f1 += s12;
1570 s31 = *(uchar *) (s + 13);
1572 f0 += s03;
1573 f1 += s13;
1574 s32 = *(uchar *) (s + 14);
1576 s21 <<= 8;
1577 f2 += s20;
1578 s33 = *(uchar *) (s + 15);
1580 s22 <<= 16;
1581 f2 += s21;
1583 s23 <<= 24;
1584 f2 += s22;
1586 s31 <<= 8;
1587 f3 += s30;
1589 s32 <<= 16;
1590 f3 += s31;
1592 s33 <<= 24;
1593 f3 += s32;
1595 f2 += s23;
1596 f3 += s33;
1598 *(uchar *) (out + 0) = f0;
1599 f0 >>= 8;
1600 *(uchar *) (out + 1) = f0;
1601 f0 >>= 8;
1602 *(uchar *) (out + 2) = f0;
1603 f0 >>= 8;
1604 *(uchar *) (out + 3) = f0;
1605 f0 >>= 8;
1606 f1 += f0;
1608 *(uchar *) (out + 4) = f1;
1609 f1 >>= 8;
1610 *(uchar *) (out + 5) = f1;
1611 f1 >>= 8;
1612 *(uchar *) (out + 6) = f1;
1613 f1 >>= 8;
1614 *(uchar *) (out + 7) = f1;
1615 f1 >>= 8;
1616 f2 += f1;
1618 *(uchar *) (out + 8) = f2;
1619 f2 >>= 8;
1620 *(uchar *) (out + 9) = f2;
1621 f2 >>= 8;
1622 *(uchar *) (out + 10) = f2;
1623 f2 >>= 8;
1624 *(uchar *) (out + 11) = f2;
1625 f2 >>= 8;
1626 f3 += f2;
1628 *(uchar *) (out + 12) = f3;
1629 f3 >>= 8;
1630 *(uchar *) (out + 13) = f3;
1631 f3 >>= 8;
1632 *(uchar *) (out + 14) = f3;
1633 f3 >>= 8;
1634 *(uchar *) (out + 15) = f3;
1636 #ifdef HAVE_FENV_H
1637 if (previous_rounding_mode != FE_TONEAREST &&
1638 fesetround(previous_rounding_mode) != 0) {
1639 abort();
1641 #endif
1643 return 0;
1646 const char *
1647 crypto_onetimeauth_poly1305_implementation_name(void)
1649 return "53";
1652 struct crypto_onetimeauth_poly1305_implementation
1653 crypto_onetimeauth_poly1305_53_implementation = {
1654 _SODIUM_C99(.implementation_name =) crypto_onetimeauth_poly1305_implementation_name,
1655 _SODIUM_C99(.onetimeauth =) crypto_onetimeauth,
1656 _SODIUM_C99(.onetimeauth_verify =) crypto_onetimeauth_verify