Changes to update Tomato RAF.
[tomato.git] / release / src / router / dnscrypt / src / libnacl / crypto_onetimeauth / poly1305 / 53 / auth.c
bloba4a9c3f6a333fa198e73cb43d245c298cae49085
1 /*
2 20080910
3 D. J. Bernstein
4 Public domain.
5 */
7 #include "crypto_onetimeauth.h"
9 typedef unsigned char uchar;
10 typedef int int32;
11 typedef unsigned int uint32;
12 typedef long long int64;
13 typedef unsigned long long uint64;
15 static const double poly1305_53_constants[] = {
16 0.00000000558793544769287109375 /* alpham80 = 3 2^(-29) */
17 , 24.0 /* alpham48 = 3 2^3 */
18 , 103079215104.0 /* alpham16 = 3 2^35 */
19 , 6755399441055744.0 /* alpha0 = 3 2^51 */
20 , 1770887431076116955136.0 /* alpha18 = 3 2^69 */
21 , 29014219670751100192948224.0 /* alpha32 = 3 2^83 */
22 , 7605903601369376408980219232256.0 /* alpha50 = 3 2^101 */
23 , 124615124604835863084731911901282304.0 /* alpha64 = 3 2^115 */
24 , 32667107224410092492483962313449748299776.0 /* alpha82 = 3 2^133 */
25 , 535217884764734955396857238543560676143529984.0 /* alpha96 = 3 2^147 */
26 , 35076039295941670036888435985190792471742381031424.0 /* alpha112 = 3 2^163 */
27 , 9194973245195333150150082162901855101712434733101613056.0 /* alpha130 = 3 2^181 */
28 , 0.0000000000000000000000000000000000000036734198463196484624023016788195177431833298649127735047148490821200539357960224151611328125 /* scale = 5 2^(-130) */
29 , 6755408030990331.0 /* offset0 = alpha0 + 2^33 - 5 */
30 , 29014256564239239022116864.0 /* offset1 = alpha32 + 2^65 - 2^33 */
31 , 124615283061160854719918951570079744.0 /* offset2 = alpha64 + 2^97 - 2^65 */
32 , 535219245894202480694386063513315216128475136.0 /* offset3 = alpha96 + 2^130 - 2^97 */
33 } ;
35 int crypto_onetimeauth(unsigned char *out,const unsigned char *m,unsigned long long l,const unsigned char *k)
37 register const unsigned char *r = k;
38 register const unsigned char *s = k + 16;
39 double r0high_stack;
40 double r1high_stack;
41 double r1low_stack;
42 double sr1high_stack;
43 double r2low_stack;
44 double sr2high_stack;
45 double r0low_stack;
46 double sr1low_stack;
47 double r2high_stack;
48 double sr2low_stack;
49 double r3high_stack;
50 double sr3high_stack;
51 double r3low_stack;
52 double sr3low_stack;
53 int64 d0;
54 int64 d1;
55 int64 d2;
56 int64 d3;
57 register double scale;
58 register double alpha0;
59 register double alpha32;
60 register double alpha64;
61 register double alpha96;
62 register double alpha130;
63 register double h0;
64 register double h1;
65 register double h2;
66 register double h3;
67 register double h4;
68 register double h5;
69 register double h6;
70 register double h7;
71 register double y7;
72 register double y6;
73 register double y1;
74 register double y0;
75 register double y5;
76 register double y4;
77 register double x7;
78 register double x6;
79 register double x1;
80 register double x0;
81 register double y3;
82 register double y2;
83 register double r3low;
84 register double r0low;
85 register double r3high;
86 register double r0high;
87 register double sr1low;
88 register double x5;
89 register double r3lowx0;
90 register double sr1high;
91 register double x4;
92 register double r0lowx6;
93 register double r1low;
94 register double x3;
95 register double r3highx0;
96 register double r1high;
97 register double x2;
98 register double r0highx6;
99 register double sr2low;
100 register double r0lowx0;
101 register double sr2high;
102 register double sr1lowx6;
103 register double r2low;
104 register double r0highx0;
105 register double r2high;
106 register double sr1highx6;
107 register double sr3low;
108 register double r1lowx0;
109 register double sr3high;
110 register double sr2lowx6;
111 register double r1highx0;
112 register double sr2highx6;
113 register double r2lowx0;
114 register double sr3lowx6;
115 register double r2highx0;
116 register double sr3highx6;
117 register double r1highx4;
118 register double r1lowx4;
119 register double r0highx4;
120 register double r0lowx4;
121 register double sr3highx4;
122 register double sr3lowx4;
123 register double sr2highx4;
124 register double sr2lowx4;
125 register double r0lowx2;
126 register double r0highx2;
127 register double r1lowx2;
128 register double r1highx2;
129 register double r2lowx2;
130 register double r2highx2;
131 register double sr3lowx2;
132 register double sr3highx2;
133 register double z0;
134 register double z1;
135 register double z2;
136 register double z3;
137 register int64 r0;
138 register int64 r1;
139 register int64 r2;
140 register int64 r3;
141 register uint32 r00;
142 register uint32 r01;
143 register uint32 r02;
144 register uint32 r03;
145 register uint32 r10;
146 register uint32 r11;
147 register uint32 r12;
148 register uint32 r13;
149 register uint32 r20;
150 register uint32 r21;
151 register uint32 r22;
152 register uint32 r23;
153 register uint32 r30;
154 register uint32 r31;
155 register uint32 r32;
156 register uint32 r33;
157 register int64 m0;
158 register int64 m1;
159 register int64 m2;
160 register int64 m3;
161 register uint32 m00;
162 register uint32 m01;
163 register uint32 m02;
164 register uint32 m03;
165 register uint32 m10;
166 register uint32 m11;
167 register uint32 m12;
168 register uint32 m13;
169 register uint32 m20;
170 register uint32 m21;
171 register uint32 m22;
172 register uint32 m23;
173 register uint32 m30;
174 register uint32 m31;
175 register uint32 m32;
176 register uint64 m33;
177 register char *constants;
178 register int32 lbelow2;
179 register int32 lbelow3;
180 register int32 lbelow4;
181 register int32 lbelow5;
182 register int32 lbelow6;
183 register int32 lbelow7;
184 register int32 lbelow8;
185 register int32 lbelow9;
186 register int32 lbelow10;
187 register int32 lbelow11;
188 register int32 lbelow12;
189 register int32 lbelow13;
190 register int32 lbelow14;
191 register int32 lbelow15;
192 register double alpham80;
193 register double alpham48;
194 register double alpham16;
195 register double alpha18;
196 register double alpha50;
197 register double alpha82;
198 register double alpha112;
199 register double offset0;
200 register double offset1;
201 register double offset2;
202 register double offset3;
203 register uint32 s00;
204 register uint32 s01;
205 register uint32 s02;
206 register uint32 s03;
207 register uint32 s10;
208 register uint32 s11;
209 register uint32 s12;
210 register uint32 s13;
211 register uint32 s20;
212 register uint32 s21;
213 register uint32 s22;
214 register uint32 s23;
215 register uint32 s30;
216 register uint32 s31;
217 register uint32 s32;
218 register uint32 s33;
219 register uint64 bits32;
220 register uint64 f;
221 register uint64 f0;
222 register uint64 f1;
223 register uint64 f2;
224 register uint64 f3;
225 register uint64 f4;
226 register uint64 g;
227 register uint64 g0;
228 register uint64 g1;
229 register uint64 g2;
230 register uint64 g3;
231 register uint64 g4;
233 r00 = *(uchar *) (r + 0);
234 constants = (char *) &poly1305_53_constants;
236 r01 = *(uchar *) (r + 1);
238 r02 = *(uchar *) (r + 2);
239 r0 = 2151;
241 r03 = *(uchar *) (r + 3); r03 &= 15;
242 r0 <<= 51;
244 r10 = *(uchar *) (r + 4); r10 &= 252;
245 r01 <<= 8;
246 r0 += r00;
248 r11 = *(uchar *) (r + 5);
249 r02 <<= 16;
250 r0 += r01;
252 r12 = *(uchar *) (r + 6);
253 r03 <<= 24;
254 r0 += r02;
256 r13 = *(uchar *) (r + 7); r13 &= 15;
257 r1 = 2215;
258 r0 += r03;
260 d0 = r0;
261 r1 <<= 51;
262 r2 = 2279;
264 r20 = *(uchar *) (r + 8); r20 &= 252;
265 r11 <<= 8;
266 r1 += r10;
268 r21 = *(uchar *) (r + 9);
269 r12 <<= 16;
270 r1 += r11;
272 r22 = *(uchar *) (r + 10);
273 r13 <<= 24;
274 r1 += r12;
276 r23 = *(uchar *) (r + 11); r23 &= 15;
277 r2 <<= 51;
278 r1 += r13;
280 d1 = r1;
281 r21 <<= 8;
282 r2 += r20;
284 r30 = *(uchar *) (r + 12); r30 &= 252;
285 r22 <<= 16;
286 r2 += r21;
288 r31 = *(uchar *) (r + 13);
289 r23 <<= 24;
290 r2 += r22;
292 r32 = *(uchar *) (r + 14);
293 r2 += r23;
294 r3 = 2343;
296 d2 = r2;
297 r3 <<= 51;
298 alpha32 = *(double *) (constants + 40);
300 r33 = *(uchar *) (r + 15); r33 &= 15;
301 r31 <<= 8;
302 r3 += r30;
304 r32 <<= 16;
305 r3 += r31;
307 r33 <<= 24;
308 r3 += r32;
310 r3 += r33;
311 h0 = alpha32 - alpha32;
313 d3 = r3;
314 h1 = alpha32 - alpha32;
316 alpha0 = *(double *) (constants + 24);
317 h2 = alpha32 - alpha32;
319 alpha64 = *(double *) (constants + 56);
320 h3 = alpha32 - alpha32;
322 alpha18 = *(double *) (constants + 32);
323 h4 = alpha32 - alpha32;
325 r0low = *(double *) &d0;
326 h5 = alpha32 - alpha32;
328 r1low = *(double *) &d1;
329 h6 = alpha32 - alpha32;
331 r2low = *(double *) &d2;
332 h7 = alpha32 - alpha32;
334 alpha50 = *(double *) (constants + 48);
335 r0low -= alpha0;
337 alpha82 = *(double *) (constants + 64);
338 r1low -= alpha32;
340 scale = *(double *) (constants + 96);
341 r2low -= alpha64;
343 alpha96 = *(double *) (constants + 72);
344 r0high = r0low + alpha18;
346 r3low = *(double *) &d3;
348 alpham80 = *(double *) (constants + 0);
349 r1high = r1low + alpha50;
350 sr1low = scale * r1low;
352 alpham48 = *(double *) (constants + 8);
353 r2high = r2low + alpha82;
354 sr2low = scale * r2low;
356 r0high -= alpha18;
357 r0high_stack = r0high;
359 r3low -= alpha96;
361 r1high -= alpha50;
362 r1high_stack = r1high;
364 sr1high = sr1low + alpham80;
366 alpha112 = *(double *) (constants + 80);
367 r0low -= r0high;
369 alpham16 = *(double *) (constants + 16);
370 r2high -= alpha82;
371 sr3low = scale * r3low;
373 alpha130 = *(double *) (constants + 88);
374 sr2high = sr2low + alpham48;
376 r1low -= r1high;
377 r1low_stack = r1low;
379 sr1high -= alpham80;
380 sr1high_stack = sr1high;
382 r2low -= r2high;
383 r2low_stack = r2low;
385 sr2high -= alpham48;
386 sr2high_stack = sr2high;
388 r3high = r3low + alpha112;
389 r0low_stack = r0low;
391 sr1low -= sr1high;
392 sr1low_stack = sr1low;
394 sr3high = sr3low + alpham16;
395 r2high_stack = r2high;
397 sr2low -= sr2high;
398 sr2low_stack = sr2low;
400 r3high -= alpha112;
401 r3high_stack = r3high;
404 sr3high -= alpham16;
405 sr3high_stack = sr3high;
408 r3low -= r3high;
409 r3low_stack = r3low;
412 sr3low -= sr3high;
413 sr3low_stack = sr3low;
415 if (l < 16) goto addatmost15bytes;
417 m00 = *(uchar *) (m + 0);
418 m0 = 2151;
420 m0 <<= 51;
421 m1 = 2215;
422 m01 = *(uchar *) (m + 1);
424 m1 <<= 51;
425 m2 = 2279;
426 m02 = *(uchar *) (m + 2);
428 m2 <<= 51;
429 m3 = 2343;
430 m03 = *(uchar *) (m + 3);
432 m10 = *(uchar *) (m + 4);
433 m01 <<= 8;
434 m0 += m00;
436 m11 = *(uchar *) (m + 5);
437 m02 <<= 16;
438 m0 += m01;
440 m12 = *(uchar *) (m + 6);
441 m03 <<= 24;
442 m0 += m02;
444 m13 = *(uchar *) (m + 7);
445 m3 <<= 51;
446 m0 += m03;
448 m20 = *(uchar *) (m + 8);
449 m11 <<= 8;
450 m1 += m10;
452 m21 = *(uchar *) (m + 9);
453 m12 <<= 16;
454 m1 += m11;
456 m22 = *(uchar *) (m + 10);
457 m13 <<= 24;
458 m1 += m12;
460 m23 = *(uchar *) (m + 11);
461 m1 += m13;
463 m30 = *(uchar *) (m + 12);
464 m21 <<= 8;
465 m2 += m20;
467 m31 = *(uchar *) (m + 13);
468 m22 <<= 16;
469 m2 += m21;
471 m32 = *(uchar *) (m + 14);
472 m23 <<= 24;
473 m2 += m22;
475 m33 = *(uchar *) (m + 15);
476 m2 += m23;
478 d0 = m0;
479 m31 <<= 8;
480 m3 += m30;
482 d1 = m1;
483 m32 <<= 16;
484 m3 += m31;
486 d2 = m2;
487 m33 += 256;
489 m33 <<= 24;
490 m3 += m32;
492 m3 += m33;
493 d3 = m3;
495 m += 16;
496 l -= 16;
498 z0 = *(double *) &d0;
500 z1 = *(double *) &d1;
502 z2 = *(double *) &d2;
504 z3 = *(double *) &d3;
506 z0 -= alpha0;
508 z1 -= alpha32;
510 z2 -= alpha64;
512 z3 -= alpha96;
514 h0 += z0;
516 h1 += z1;
518 h3 += z2;
520 h5 += z3;
522 if (l < 16) goto multiplyaddatmost15bytes;
524 multiplyaddatleast16bytes:;
526 m2 = 2279;
527 m20 = *(uchar *) (m + 8);
528 y7 = h7 + alpha130;
530 m2 <<= 51;
531 m3 = 2343;
532 m21 = *(uchar *) (m + 9);
533 y6 = h6 + alpha130;
535 m3 <<= 51;
536 m0 = 2151;
537 m22 = *(uchar *) (m + 10);
538 y1 = h1 + alpha32;
540 m0 <<= 51;
541 m1 = 2215;
542 m23 = *(uchar *) (m + 11);
543 y0 = h0 + alpha32;
545 m1 <<= 51;
546 m30 = *(uchar *) (m + 12);
547 y7 -= alpha130;
549 m21 <<= 8;
550 m2 += m20;
551 m31 = *(uchar *) (m + 13);
552 y6 -= alpha130;
554 m22 <<= 16;
555 m2 += m21;
556 m32 = *(uchar *) (m + 14);
557 y1 -= alpha32;
559 m23 <<= 24;
560 m2 += m22;
561 m33 = *(uchar *) (m + 15);
562 y0 -= alpha32;
564 m2 += m23;
565 m00 = *(uchar *) (m + 0);
566 y5 = h5 + alpha96;
568 m31 <<= 8;
569 m3 += m30;
570 m01 = *(uchar *) (m + 1);
571 y4 = h4 + alpha96;
573 m32 <<= 16;
574 m02 = *(uchar *) (m + 2);
575 x7 = h7 - y7;
576 y7 *= scale;
578 m33 += 256;
579 m03 = *(uchar *) (m + 3);
580 x6 = h6 - y6;
581 y6 *= scale;
583 m33 <<= 24;
584 m3 += m31;
585 m10 = *(uchar *) (m + 4);
586 x1 = h1 - y1;
588 m01 <<= 8;
589 m3 += m32;
590 m11 = *(uchar *) (m + 5);
591 x0 = h0 - y0;
593 m3 += m33;
594 m0 += m00;
595 m12 = *(uchar *) (m + 6);
596 y5 -= alpha96;
598 m02 <<= 16;
599 m0 += m01;
600 m13 = *(uchar *) (m + 7);
601 y4 -= alpha96;
603 m03 <<= 24;
604 m0 += m02;
605 d2 = m2;
606 x1 += y7;
608 m0 += m03;
609 d3 = m3;
610 x0 += y6;
612 m11 <<= 8;
613 m1 += m10;
614 d0 = m0;
615 x7 += y5;
617 m12 <<= 16;
618 m1 += m11;
619 x6 += y4;
621 m13 <<= 24;
622 m1 += m12;
623 y3 = h3 + alpha64;
625 m1 += m13;
626 d1 = m1;
627 y2 = h2 + alpha64;
629 x0 += x1;
631 x6 += x7;
633 y3 -= alpha64;
634 r3low = r3low_stack;
636 y2 -= alpha64;
637 r0low = r0low_stack;
639 x5 = h5 - y5;
640 r3lowx0 = r3low * x0;
641 r3high = r3high_stack;
643 x4 = h4 - y4;
644 r0lowx6 = r0low * x6;
645 r0high = r0high_stack;
647 x3 = h3 - y3;
648 r3highx0 = r3high * x0;
649 sr1low = sr1low_stack;
651 x2 = h2 - y2;
652 r0highx6 = r0high * x6;
653 sr1high = sr1high_stack;
655 x5 += y3;
656 r0lowx0 = r0low * x0;
657 r1low = r1low_stack;
659 h6 = r3lowx0 + r0lowx6;
660 sr1lowx6 = sr1low * x6;
661 r1high = r1high_stack;
663 x4 += y2;
664 r0highx0 = r0high * x0;
665 sr2low = sr2low_stack;
667 h7 = r3highx0 + r0highx6;
668 sr1highx6 = sr1high * x6;
669 sr2high = sr2high_stack;
671 x3 += y1;
672 r1lowx0 = r1low * x0;
673 r2low = r2low_stack;
675 h0 = r0lowx0 + sr1lowx6;
676 sr2lowx6 = sr2low * x6;
677 r2high = r2high_stack;
679 x2 += y0;
680 r1highx0 = r1high * x0;
681 sr3low = sr3low_stack;
683 h1 = r0highx0 + sr1highx6;
684 sr2highx6 = sr2high * x6;
685 sr3high = sr3high_stack;
687 x4 += x5;
688 r2lowx0 = r2low * x0;
689 z2 = *(double *) &d2;
691 h2 = r1lowx0 + sr2lowx6;
692 sr3lowx6 = sr3low * x6;
694 x2 += x3;
695 r2highx0 = r2high * x0;
696 z3 = *(double *) &d3;
698 h3 = r1highx0 + sr2highx6;
699 sr3highx6 = sr3high * x6;
701 r1highx4 = r1high * x4;
702 z2 -= alpha64;
704 h4 = r2lowx0 + sr3lowx6;
705 r1lowx4 = r1low * x4;
707 r0highx4 = r0high * x4;
708 z3 -= alpha96;
710 h5 = r2highx0 + sr3highx6;
711 r0lowx4 = r0low * x4;
713 h7 += r1highx4;
714 sr3highx4 = sr3high * x4;
716 h6 += r1lowx4;
717 sr3lowx4 = sr3low * x4;
719 h5 += r0highx4;
720 sr2highx4 = sr2high * x4;
722 h4 += r0lowx4;
723 sr2lowx4 = sr2low * x4;
725 h3 += sr3highx4;
726 r0lowx2 = r0low * x2;
728 h2 += sr3lowx4;
729 r0highx2 = r0high * x2;
731 h1 += sr2highx4;
732 r1lowx2 = r1low * x2;
734 h0 += sr2lowx4;
735 r1highx2 = r1high * x2;
737 h2 += r0lowx2;
738 r2lowx2 = r2low * x2;
740 h3 += r0highx2;
741 r2highx2 = r2high * x2;
743 h4 += r1lowx2;
744 sr3lowx2 = sr3low * x2;
746 h5 += r1highx2;
747 sr3highx2 = sr3high * x2;
748 alpha0 = *(double *) (constants + 24);
750 m += 16;
751 h6 += r2lowx2;
753 l -= 16;
754 h7 += r2highx2;
756 z1 = *(double *) &d1;
757 h0 += sr3lowx2;
759 z0 = *(double *) &d0;
760 h1 += sr3highx2;
762 z1 -= alpha32;
764 z0 -= alpha0;
766 h5 += z3;
768 h3 += z2;
770 h1 += z1;
772 h0 += z0;
774 if (l >= 16) goto multiplyaddatleast16bytes;
776 multiplyaddatmost15bytes:;
778 y7 = h7 + alpha130;
780 y6 = h6 + alpha130;
782 y1 = h1 + alpha32;
784 y0 = h0 + alpha32;
786 y7 -= alpha130;
788 y6 -= alpha130;
790 y1 -= alpha32;
792 y0 -= alpha32;
794 y5 = h5 + alpha96;
796 y4 = h4 + alpha96;
798 x7 = h7 - y7;
799 y7 *= scale;
801 x6 = h6 - y6;
802 y6 *= scale;
804 x1 = h1 - y1;
806 x0 = h0 - y0;
808 y5 -= alpha96;
810 y4 -= alpha96;
812 x1 += y7;
814 x0 += y6;
816 x7 += y5;
818 x6 += y4;
820 y3 = h3 + alpha64;
822 y2 = h2 + alpha64;
824 x0 += x1;
826 x6 += x7;
828 y3 -= alpha64;
829 r3low = r3low_stack;
831 y2 -= alpha64;
832 r0low = r0low_stack;
834 x5 = h5 - y5;
835 r3lowx0 = r3low * x0;
836 r3high = r3high_stack;
838 x4 = h4 - y4;
839 r0lowx6 = r0low * x6;
840 r0high = r0high_stack;
842 x3 = h3 - y3;
843 r3highx0 = r3high * x0;
844 sr1low = sr1low_stack;
846 x2 = h2 - y2;
847 r0highx6 = r0high * x6;
848 sr1high = sr1high_stack;
850 x5 += y3;
851 r0lowx0 = r0low * x0;
852 r1low = r1low_stack;
854 h6 = r3lowx0 + r0lowx6;
855 sr1lowx6 = sr1low * x6;
856 r1high = r1high_stack;
858 x4 += y2;
859 r0highx0 = r0high * x0;
860 sr2low = sr2low_stack;
862 h7 = r3highx0 + r0highx6;
863 sr1highx6 = sr1high * x6;
864 sr2high = sr2high_stack;
866 x3 += y1;
867 r1lowx0 = r1low * x0;
868 r2low = r2low_stack;
870 h0 = r0lowx0 + sr1lowx6;
871 sr2lowx6 = sr2low * x6;
872 r2high = r2high_stack;
874 x2 += y0;
875 r1highx0 = r1high * x0;
876 sr3low = sr3low_stack;
878 h1 = r0highx0 + sr1highx6;
879 sr2highx6 = sr2high * x6;
880 sr3high = sr3high_stack;
882 x4 += x5;
883 r2lowx0 = r2low * x0;
885 h2 = r1lowx0 + sr2lowx6;
886 sr3lowx6 = sr3low * x6;
888 x2 += x3;
889 r2highx0 = r2high * x0;
891 h3 = r1highx0 + sr2highx6;
892 sr3highx6 = sr3high * x6;
894 r1highx4 = r1high * x4;
896 h4 = r2lowx0 + sr3lowx6;
897 r1lowx4 = r1low * x4;
899 r0highx4 = r0high * x4;
901 h5 = r2highx0 + sr3highx6;
902 r0lowx4 = r0low * x4;
904 h7 += r1highx4;
905 sr3highx4 = sr3high * x4;
907 h6 += r1lowx4;
908 sr3lowx4 = sr3low * x4;
910 h5 += r0highx4;
911 sr2highx4 = sr2high * x4;
913 h4 += r0lowx4;
914 sr2lowx4 = sr2low * x4;
916 h3 += sr3highx4;
917 r0lowx2 = r0low * x2;
919 h2 += sr3lowx4;
920 r0highx2 = r0high * x2;
922 h1 += sr2highx4;
923 r1lowx2 = r1low * x2;
925 h0 += sr2lowx4;
926 r1highx2 = r1high * x2;
928 h2 += r0lowx2;
929 r2lowx2 = r2low * x2;
931 h3 += r0highx2;
932 r2highx2 = r2high * x2;
934 h4 += r1lowx2;
935 sr3lowx2 = sr3low * x2;
937 h5 += r1highx2;
938 sr3highx2 = sr3high * x2;
940 h6 += r2lowx2;
942 h7 += r2highx2;
944 h0 += sr3lowx2;
946 h1 += sr3highx2;
948 addatmost15bytes:;
950 if (l == 0) goto nomorebytes;
952 lbelow2 = l - 2;
954 lbelow3 = l - 3;
956 lbelow2 >>= 31;
957 lbelow4 = l - 4;
959 m00 = *(uchar *) (m + 0);
960 lbelow3 >>= 31;
961 m += lbelow2;
963 m01 = *(uchar *) (m + 1);
964 lbelow4 >>= 31;
965 m += lbelow3;
967 m02 = *(uchar *) (m + 2);
968 m += lbelow4;
969 m0 = 2151;
971 m03 = *(uchar *) (m + 3);
972 m0 <<= 51;
973 m1 = 2215;
975 m0 += m00;
976 m01 &= ~lbelow2;
978 m02 &= ~lbelow3;
979 m01 -= lbelow2;
981 m01 <<= 8;
982 m03 &= ~lbelow4;
984 m0 += m01;
985 lbelow2 -= lbelow3;
987 m02 += lbelow2;
988 lbelow3 -= lbelow4;
990 m02 <<= 16;
991 m03 += lbelow3;
993 m03 <<= 24;
994 m0 += m02;
996 m0 += m03;
997 lbelow5 = l - 5;
999 lbelow6 = l - 6;
1000 lbelow7 = l - 7;
1002 lbelow5 >>= 31;
1003 lbelow8 = l - 8;
1005 lbelow6 >>= 31;
1006 m += lbelow5;
1008 m10 = *(uchar *) (m + 4);
1009 lbelow7 >>= 31;
1010 m += lbelow6;
1012 m11 = *(uchar *) (m + 5);
1013 lbelow8 >>= 31;
1014 m += lbelow7;
1016 m12 = *(uchar *) (m + 6);
1017 m1 <<= 51;
1018 m += lbelow8;
1020 m13 = *(uchar *) (m + 7);
1021 m10 &= ~lbelow5;
1022 lbelow4 -= lbelow5;
1024 m10 += lbelow4;
1025 lbelow5 -= lbelow6;
1027 m11 &= ~lbelow6;
1028 m11 += lbelow5;
1030 m11 <<= 8;
1031 m1 += m10;
1033 m1 += m11;
1034 m12 &= ~lbelow7;
1036 lbelow6 -= lbelow7;
1037 m13 &= ~lbelow8;
1039 m12 += lbelow6;
1040 lbelow7 -= lbelow8;
1042 m12 <<= 16;
1043 m13 += lbelow7;
1045 m13 <<= 24;
1046 m1 += m12;
1048 m1 += m13;
1049 m2 = 2279;
1051 lbelow9 = l - 9;
1052 m3 = 2343;
1054 lbelow10 = l - 10;
1055 lbelow11 = l - 11;
1057 lbelow9 >>= 31;
1058 lbelow12 = l - 12;
1060 lbelow10 >>= 31;
1061 m += lbelow9;
1063 m20 = *(uchar *) (m + 8);
1064 lbelow11 >>= 31;
1065 m += lbelow10;
1067 m21 = *(uchar *) (m + 9);
1068 lbelow12 >>= 31;
1069 m += lbelow11;
1071 m22 = *(uchar *) (m + 10);
1072 m2 <<= 51;
1073 m += lbelow12;
1075 m23 = *(uchar *) (m + 11);
1076 m20 &= ~lbelow9;
1077 lbelow8 -= lbelow9;
1079 m20 += lbelow8;
1080 lbelow9 -= lbelow10;
1082 m21 &= ~lbelow10;
1083 m21 += lbelow9;
1085 m21 <<= 8;
1086 m2 += m20;
1088 m2 += m21;
1089 m22 &= ~lbelow11;
1091 lbelow10 -= lbelow11;
1092 m23 &= ~lbelow12;
1094 m22 += lbelow10;
1095 lbelow11 -= lbelow12;
1097 m22 <<= 16;
1098 m23 += lbelow11;
1100 m23 <<= 24;
1101 m2 += m22;
1103 m3 <<= 51;
1104 lbelow13 = l - 13;
1106 lbelow13 >>= 31;
1107 lbelow14 = l - 14;
1109 lbelow14 >>= 31;
1110 m += lbelow13;
1111 lbelow15 = l - 15;
1113 m30 = *(uchar *) (m + 12);
1114 lbelow15 >>= 31;
1115 m += lbelow14;
1117 m31 = *(uchar *) (m + 13);
1118 m += lbelow15;
1119 m2 += m23;
1121 m32 = *(uchar *) (m + 14);
1122 m30 &= ~lbelow13;
1123 lbelow12 -= lbelow13;
1125 m30 += lbelow12;
1126 lbelow13 -= lbelow14;
1128 m3 += m30;
1129 m31 &= ~lbelow14;
1131 m31 += lbelow13;
1132 m32 &= ~lbelow15;
1134 m31 <<= 8;
1135 lbelow14 -= lbelow15;
1137 m3 += m31;
1138 m32 += lbelow14;
1139 d0 = m0;
1141 m32 <<= 16;
1142 m33 = lbelow15 + 1;
1143 d1 = m1;
1145 m33 <<= 24;
1146 m3 += m32;
1147 d2 = m2;
1149 m3 += m33;
1150 d3 = m3;
1152 alpha0 = *(double *) (constants + 24);
1154 z3 = *(double *) &d3;
1156 z2 = *(double *) &d2;
1158 z1 = *(double *) &d1;
1160 z0 = *(double *) &d0;
1162 z3 -= alpha96;
1164 z2 -= alpha64;
1166 z1 -= alpha32;
1168 z0 -= alpha0;
1170 h5 += z3;
1172 h3 += z2;
1174 h1 += z1;
1176 h0 += z0;
1178 y7 = h7 + alpha130;
1180 y6 = h6 + alpha130;
1182 y1 = h1 + alpha32;
1184 y0 = h0 + alpha32;
1186 y7 -= alpha130;
1188 y6 -= alpha130;
1190 y1 -= alpha32;
1192 y0 -= alpha32;
1194 y5 = h5 + alpha96;
1196 y4 = h4 + alpha96;
1198 x7 = h7 - y7;
1199 y7 *= scale;
1201 x6 = h6 - y6;
1202 y6 *= scale;
1204 x1 = h1 - y1;
1206 x0 = h0 - y0;
1208 y5 -= alpha96;
1210 y4 -= alpha96;
1212 x1 += y7;
1214 x0 += y6;
1216 x7 += y5;
1218 x6 += y4;
1220 y3 = h3 + alpha64;
1222 y2 = h2 + alpha64;
1224 x0 += x1;
1226 x6 += x7;
1228 y3 -= alpha64;
1229 r3low = r3low_stack;
1231 y2 -= alpha64;
1232 r0low = r0low_stack;
1234 x5 = h5 - y5;
1235 r3lowx0 = r3low * x0;
1236 r3high = r3high_stack;
1238 x4 = h4 - y4;
1239 r0lowx6 = r0low * x6;
1240 r0high = r0high_stack;
1242 x3 = h3 - y3;
1243 r3highx0 = r3high * x0;
1244 sr1low = sr1low_stack;
1246 x2 = h2 - y2;
1247 r0highx6 = r0high * x6;
1248 sr1high = sr1high_stack;
1250 x5 += y3;
1251 r0lowx0 = r0low * x0;
1252 r1low = r1low_stack;
1254 h6 = r3lowx0 + r0lowx6;
1255 sr1lowx6 = sr1low * x6;
1256 r1high = r1high_stack;
1258 x4 += y2;
1259 r0highx0 = r0high * x0;
1260 sr2low = sr2low_stack;
1262 h7 = r3highx0 + r0highx6;
1263 sr1highx6 = sr1high * x6;
1264 sr2high = sr2high_stack;
1266 x3 += y1;
1267 r1lowx0 = r1low * x0;
1268 r2low = r2low_stack;
1270 h0 = r0lowx0 + sr1lowx6;
1271 sr2lowx6 = sr2low * x6;
1272 r2high = r2high_stack;
1274 x2 += y0;
1275 r1highx0 = r1high * x0;
1276 sr3low = sr3low_stack;
1278 h1 = r0highx0 + sr1highx6;
1279 sr2highx6 = sr2high * x6;
1280 sr3high = sr3high_stack;
1282 x4 += x5;
1283 r2lowx0 = r2low * x0;
1285 h2 = r1lowx0 + sr2lowx6;
1286 sr3lowx6 = sr3low * x6;
1288 x2 += x3;
1289 r2highx0 = r2high * x0;
1291 h3 = r1highx0 + sr2highx6;
1292 sr3highx6 = sr3high * x6;
1294 r1highx4 = r1high * x4;
1296 h4 = r2lowx0 + sr3lowx6;
1297 r1lowx4 = r1low * x4;
1299 r0highx4 = r0high * x4;
1301 h5 = r2highx0 + sr3highx6;
1302 r0lowx4 = r0low * x4;
1304 h7 += r1highx4;
1305 sr3highx4 = sr3high * x4;
1307 h6 += r1lowx4;
1308 sr3lowx4 = sr3low * x4;
1310 h5 += r0highx4;
1311 sr2highx4 = sr2high * x4;
1313 h4 += r0lowx4;
1314 sr2lowx4 = sr2low * x4;
1316 h3 += sr3highx4;
1317 r0lowx2 = r0low * x2;
1319 h2 += sr3lowx4;
1320 r0highx2 = r0high * x2;
1322 h1 += sr2highx4;
1323 r1lowx2 = r1low * x2;
1325 h0 += sr2lowx4;
1326 r1highx2 = r1high * x2;
1328 h2 += r0lowx2;
1329 r2lowx2 = r2low * x2;
1331 h3 += r0highx2;
1332 r2highx2 = r2high * x2;
1334 h4 += r1lowx2;
1335 sr3lowx2 = sr3low * x2;
1337 h5 += r1highx2;
1338 sr3highx2 = sr3high * x2;
1340 h6 += r2lowx2;
1342 h7 += r2highx2;
1344 h0 += sr3lowx2;
1346 h1 += sr3highx2;
1349 nomorebytes:;
1351 offset0 = *(double *) (constants + 104);
1352 y7 = h7 + alpha130;
1354 offset1 = *(double *) (constants + 112);
1355 y0 = h0 + alpha32;
1357 offset2 = *(double *) (constants + 120);
1358 y1 = h1 + alpha32;
1360 offset3 = *(double *) (constants + 128);
1361 y2 = h2 + alpha64;
1363 y7 -= alpha130;
1365 y3 = h3 + alpha64;
1367 y4 = h4 + alpha96;
1369 y5 = h5 + alpha96;
1371 x7 = h7 - y7;
1372 y7 *= scale;
1374 y0 -= alpha32;
1376 y1 -= alpha32;
1378 y2 -= alpha64;
1380 h6 += x7;
1382 y3 -= alpha64;
1384 y4 -= alpha96;
1386 y5 -= alpha96;
1388 y6 = h6 + alpha130;
1390 x0 = h0 - y0;
1392 x1 = h1 - y1;
1394 x2 = h2 - y2;
1396 y6 -= alpha130;
1398 x0 += y7;
1400 x3 = h3 - y3;
1402 x4 = h4 - y4;
1404 x5 = h5 - y5;
1406 x6 = h6 - y6;
1408 y6 *= scale;
1410 x2 += y0;
1412 x3 += y1;
1414 x4 += y2;
1416 x0 += y6;
1418 x5 += y3;
1420 x6 += y4;
1422 x2 += x3;
1424 x0 += x1;
1426 x4 += x5;
1428 x6 += y5;
1430 x2 += offset1;
1431 *(double *) &d1 = x2;
1433 x0 += offset0;
1434 *(double *) &d0 = x0;
1436 x4 += offset2;
1437 *(double *) &d2 = x4;
1439 x6 += offset3;
1440 *(double *) &d3 = x6;
1445 f0 = d0;
1447 f1 = d1;
1448 bits32 = -1;
1450 f2 = d2;
1451 bits32 >>= 32;
1453 f3 = d3;
1454 f = f0 >> 32;
1456 f0 &= bits32;
1457 f &= 255;
1459 f1 += f;
1460 g0 = f0 + 5;
1462 g = g0 >> 32;
1463 g0 &= bits32;
1465 f = f1 >> 32;
1466 f1 &= bits32;
1468 f &= 255;
1469 g1 = f1 + g;
1471 g = g1 >> 32;
1472 f2 += f;
1474 f = f2 >> 32;
1475 g1 &= bits32;
1477 f2 &= bits32;
1478 f &= 255;
1480 f3 += f;
1481 g2 = f2 + g;
1483 g = g2 >> 32;
1484 g2 &= bits32;
1486 f4 = f3 >> 32;
1487 f3 &= bits32;
1489 f4 &= 255;
1490 g3 = f3 + g;
1492 g = g3 >> 32;
1493 g3 &= bits32;
1495 g4 = f4 + g;
1497 g4 = g4 - 4;
1498 s00 = *(uchar *) (s + 0);
1500 f = (int64) g4 >> 63;
1501 s01 = *(uchar *) (s + 1);
1503 f0 &= f;
1504 g0 &= ~f;
1505 s02 = *(uchar *) (s + 2);
1507 f1 &= f;
1508 f0 |= g0;
1509 s03 = *(uchar *) (s + 3);
1511 g1 &= ~f;
1512 f2 &= f;
1513 s10 = *(uchar *) (s + 4);
1515 f3 &= f;
1516 g2 &= ~f;
1517 s11 = *(uchar *) (s + 5);
1519 g3 &= ~f;
1520 f1 |= g1;
1521 s12 = *(uchar *) (s + 6);
1523 f2 |= g2;
1524 f3 |= g3;
1525 s13 = *(uchar *) (s + 7);
1527 s01 <<= 8;
1528 f0 += s00;
1529 s20 = *(uchar *) (s + 8);
1531 s02 <<= 16;
1532 f0 += s01;
1533 s21 = *(uchar *) (s + 9);
1535 s03 <<= 24;
1536 f0 += s02;
1537 s22 = *(uchar *) (s + 10);
1539 s11 <<= 8;
1540 f1 += s10;
1541 s23 = *(uchar *) (s + 11);
1543 s12 <<= 16;
1544 f1 += s11;
1545 s30 = *(uchar *) (s + 12);
1547 s13 <<= 24;
1548 f1 += s12;
1549 s31 = *(uchar *) (s + 13);
1551 f0 += s03;
1552 f1 += s13;
1553 s32 = *(uchar *) (s + 14);
1555 s21 <<= 8;
1556 f2 += s20;
1557 s33 = *(uchar *) (s + 15);
1559 s22 <<= 16;
1560 f2 += s21;
1562 s23 <<= 24;
1563 f2 += s22;
1565 s31 <<= 8;
1566 f3 += s30;
1568 s32 <<= 16;
1569 f3 += s31;
1571 s33 <<= 24;
1572 f3 += s32;
1574 f2 += s23;
1575 f3 += s33;
1577 *(uchar *) (out + 0) = f0;
1578 f0 >>= 8;
1579 *(uchar *) (out + 1) = f0;
1580 f0 >>= 8;
1581 *(uchar *) (out + 2) = f0;
1582 f0 >>= 8;
1583 *(uchar *) (out + 3) = f0;
1584 f0 >>= 8;
1585 f1 += f0;
1587 *(uchar *) (out + 4) = f1;
1588 f1 >>= 8;
1589 *(uchar *) (out + 5) = f1;
1590 f1 >>= 8;
1591 *(uchar *) (out + 6) = f1;
1592 f1 >>= 8;
1593 *(uchar *) (out + 7) = f1;
1594 f1 >>= 8;
1595 f2 += f1;
1597 *(uchar *) (out + 8) = f2;
1598 f2 >>= 8;
1599 *(uchar *) (out + 9) = f2;
1600 f2 >>= 8;
1601 *(uchar *) (out + 10) = f2;
1602 f2 >>= 8;
1603 *(uchar *) (out + 11) = f2;
1604 f2 >>= 8;
1605 f3 += f2;
1607 *(uchar *) (out + 12) = f3;
1608 f3 >>= 8;
1609 *(uchar *) (out + 13) = f3;
1610 f3 >>= 8;
1611 *(uchar *) (out + 14) = f3;
1612 f3 >>= 8;
1613 *(uchar *) (out + 15) = f3;
1615 return 0;