2 * Public Domain poly1305 from Andrew Moon
3 * poly1305-donna-unrolled.c from https://github.com/floodyberry/poly1305-donna
6 /* $OpenBSD: poly1305.c,v 1.3 2013/12/19 22:57:13 djm Exp $ */
10 #include <sys/types.h>
17 #define mul32x32_64(a,b) ((uint64_t)(a) * (b))
19 #define U8TO32_LE(p) \
20 (((uint32_t)((p)[0])) | \
21 ((uint32_t)((p)[1]) << 8) | \
22 ((uint32_t)((p)[2]) << 16) | \
23 ((uint32_t)((p)[3]) << 24))
25 #define U32TO8_LE(p, v) \
27 (p)[0] = (uint8_t)((v)); \
28 (p)[1] = (uint8_t)((v) >> 8); \
29 (p)[2] = (uint8_t)((v) >> 16); \
30 (p)[3] = (uint8_t)((v) >> 24); \
34 poly1305_auth(unsigned char out
[POLY1305_TAGLEN
], const unsigned char *m
, size_t inlen
, const unsigned char key
[POLY1305_KEYLEN
]) {
36 uint32_t h0
,h1
,h2
,h3
,h4
;
37 uint32_t r0
,r1
,r2
,r3
,r4
;
43 uint32_t g0
,g1
,g2
,g3
,g4
;
48 t0
= U8TO32_LE(key
+0);
49 t1
= U8TO32_LE(key
+4);
50 t2
= U8TO32_LE(key
+8);
51 t3
= U8TO32_LE(key
+12);
53 /* precompute multipliers */
54 r0
= t0
& 0x3ffffff; t0
>>= 26; t0
|= t1
<< 6;
55 r1
= t0
& 0x3ffff03; t1
>>= 20; t1
|= t2
<< 12;
56 r2
= t1
& 0x3ffc0ff; t2
>>= 14; t2
|= t3
<< 18;
57 r3
= t2
& 0x3f03fff; t3
>>= 8;
73 if (inlen
< 16) goto poly1305_donna_atmost15bytes
;
74 poly1305_donna_16bytes
:
84 h1
+= ((((uint64_t)t1
<< 32) | t0
) >> 26) & 0x3ffffff;
85 h2
+= ((((uint64_t)t2
<< 32) | t1
) >> 20) & 0x3ffffff;
86 h3
+= ((((uint64_t)t3
<< 32) | t2
) >> 14) & 0x3ffffff;
87 h4
+= (t3
>> 8) | (1 << 24);
91 t
[0] = mul32x32_64(h0
,r0
) + mul32x32_64(h1
,s4
) + mul32x32_64(h2
,s3
) + mul32x32_64(h3
,s2
) + mul32x32_64(h4
,s1
);
92 t
[1] = mul32x32_64(h0
,r1
) + mul32x32_64(h1
,r0
) + mul32x32_64(h2
,s4
) + mul32x32_64(h3
,s3
) + mul32x32_64(h4
,s2
);
93 t
[2] = mul32x32_64(h0
,r2
) + mul32x32_64(h1
,r1
) + mul32x32_64(h2
,r0
) + mul32x32_64(h3
,s4
) + mul32x32_64(h4
,s3
);
94 t
[3] = mul32x32_64(h0
,r3
) + mul32x32_64(h1
,r2
) + mul32x32_64(h2
,r1
) + mul32x32_64(h3
,r0
) + mul32x32_64(h4
,s4
);
95 t
[4] = mul32x32_64(h0
,r4
) + mul32x32_64(h1
,r3
) + mul32x32_64(h2
,r2
) + mul32x32_64(h3
,r1
) + mul32x32_64(h4
,r0
);
97 h0
= (uint32_t)t
[0] & 0x3ffffff; c
= (t
[0] >> 26);
98 t
[1] += c
; h1
= (uint32_t)t
[1] & 0x3ffffff; b
= (uint32_t)(t
[1] >> 26);
99 t
[2] += b
; h2
= (uint32_t)t
[2] & 0x3ffffff; b
= (uint32_t)(t
[2] >> 26);
100 t
[3] += b
; h3
= (uint32_t)t
[3] & 0x3ffffff; b
= (uint32_t)(t
[3] >> 26);
101 t
[4] += b
; h4
= (uint32_t)t
[4] & 0x3ffffff; b
= (uint32_t)(t
[4] >> 26);
104 if (inlen
>= 16) goto poly1305_donna_16bytes
;
107 poly1305_donna_atmost15bytes
:
108 if (!inlen
) goto poly1305_donna_finish
;
110 for (j
= 0; j
< inlen
; j
++) mp
[j
] = m
[j
];
112 for (; j
< 16; j
++) mp
[j
] = 0;
115 t0
= U8TO32_LE(mp
+0);
116 t1
= U8TO32_LE(mp
+4);
117 t2
= U8TO32_LE(mp
+8);
118 t3
= U8TO32_LE(mp
+12);
120 h0
+= t0
& 0x3ffffff;
121 h1
+= ((((uint64_t)t1
<< 32) | t0
) >> 26) & 0x3ffffff;
122 h2
+= ((((uint64_t)t2
<< 32) | t1
) >> 20) & 0x3ffffff;
123 h3
+= ((((uint64_t)t3
<< 32) | t2
) >> 14) & 0x3ffffff;
126 goto poly1305_donna_mul
;
128 poly1305_donna_finish
:
129 b
= h0
>> 26; h0
= h0
& 0x3ffffff;
130 h1
+= b
; b
= h1
>> 26; h1
= h1
& 0x3ffffff;
131 h2
+= b
; b
= h2
>> 26; h2
= h2
& 0x3ffffff;
132 h3
+= b
; b
= h3
>> 26; h3
= h3
& 0x3ffffff;
133 h4
+= b
; b
= h4
>> 26; h4
= h4
& 0x3ffffff;
134 h0
+= b
* 5; b
= h0
>> 26; h0
= h0
& 0x3ffffff;
137 g0
= h0
+ 5; b
= g0
>> 26; g0
&= 0x3ffffff;
138 g1
= h1
+ b
; b
= g1
>> 26; g1
&= 0x3ffffff;
139 g2
= h2
+ b
; b
= g2
>> 26; g2
&= 0x3ffffff;
140 g3
= h3
+ b
; b
= g3
>> 26; g3
&= 0x3ffffff;
141 g4
= h4
+ b
- (1 << 26);
145 h0
= (h0
& nb
) | (g0
& b
);
146 h1
= (h1
& nb
) | (g1
& b
);
147 h2
= (h2
& nb
) | (g2
& b
);
148 h3
= (h3
& nb
) | (g3
& b
);
149 h4
= (h4
& nb
) | (g4
& b
);
151 f0
= ((h0
) | (h1
<< 26)) + (uint64_t)U8TO32_LE(&key
[16]);
152 f1
= ((h1
>> 6) | (h2
<< 20)) + (uint64_t)U8TO32_LE(&key
[20]);
153 f2
= ((h2
>> 12) | (h3
<< 14)) + (uint64_t)U8TO32_LE(&key
[24]);
154 f3
= ((h3
>> 18) | (h4
<< 8)) + (uint64_t)U8TO32_LE(&key
[28]);
156 U32TO8_LE(&out
[ 0], f0
); f1
+= (f0
>> 32);
157 U32TO8_LE(&out
[ 4], f1
); f2
+= (f1
>> 32);
158 U32TO8_LE(&out
[ 8], f2
); f3
+= (f2
>> 32);
159 U32TO8_LE(&out
[12], f3
);