busybox: update to 1.23.2
[tomato.git] / release / src-rt-6.x.4708 / router / busybox / libbb / hash_md5_sha.c
blob1f63ccdeed917fcf092e3dd1185bfc9deef843f3
1 /* vi: set sw=4 ts=4: */
2 /*
3 * Utility routines.
5 * Copyright (C) 2010 Denys Vlasenko
7 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
8 */
10 #include "libbb.h"
12 /* gcc 4.2.1 optimizes rotr64 better with inline than with macro
13 * (for rotX32, there is no difference). Why? My guess is that
14 * macro requires clever common subexpression elimination heuristics
15 * in gcc, while inline basically forces it to happen.
17 //#define rotl32(x,n) (((x) << (n)) | ((x) >> (32 - (n))))
18 static ALWAYS_INLINE uint32_t rotl32(uint32_t x, unsigned n)
20 return (x << n) | (x >> (32 - n));
22 //#define rotr32(x,n) (((x) >> (n)) | ((x) << (32 - (n))))
23 static ALWAYS_INLINE uint32_t rotr32(uint32_t x, unsigned n)
25 return (x >> n) | (x << (32 - n));
27 /* rotr64 in needed for sha512 only: */
28 //#define rotr64(x,n) (((x) >> (n)) | ((x) << (64 - (n))))
29 static ALWAYS_INLINE uint64_t rotr64(uint64_t x, unsigned n)
31 return (x >> n) | (x << (64 - n));
34 /* rotl64 only used for sha3 currently */
35 static ALWAYS_INLINE uint64_t rotl64(uint64_t x, unsigned n)
37 return (x << n) | (x >> (64 - n));
40 /* Feed data through a temporary buffer.
41 * The internal buffer remembers previous data until it has 64
42 * bytes worth to pass on.
44 static void FAST_FUNC common64_hash(md5_ctx_t *ctx, const void *buffer, size_t len)
46 unsigned bufpos = ctx->total64 & 63;
48 ctx->total64 += len;
50 while (1) {
51 unsigned remaining = 64 - bufpos;
52 if (remaining > len)
53 remaining = len;
54 /* Copy data into aligned buffer */
55 memcpy(ctx->wbuffer + bufpos, buffer, remaining);
56 len -= remaining;
57 buffer = (const char *)buffer + remaining;
58 bufpos += remaining;
59 /* Clever way to do "if (bufpos != N) break; ... ; bufpos = 0;" */
60 bufpos -= 64;
61 if (bufpos != 0)
62 break;
63 /* Buffer is filled up, process it */
64 ctx->process_block(ctx);
65 /*bufpos = 0; - already is */
69 /* Process the remaining bytes in the buffer */
70 static void FAST_FUNC common64_end(md5_ctx_t *ctx, int swap_needed)
72 unsigned bufpos = ctx->total64 & 63;
73 /* Pad the buffer to the next 64-byte boundary with 0x80,0,0,0... */
74 ctx->wbuffer[bufpos++] = 0x80;
76 /* This loop iterates either once or twice, no more, no less */
77 while (1) {
78 unsigned remaining = 64 - bufpos;
79 memset(ctx->wbuffer + bufpos, 0, remaining);
80 /* Do we have enough space for the length count? */
81 if (remaining >= 8) {
82 /* Store the 64-bit counter of bits in the buffer */
83 uint64_t t = ctx->total64 << 3;
84 if (swap_needed)
85 t = bb_bswap_64(t);
86 /* wbuffer is suitably aligned for this */
87 *(bb__aliased_uint64_t *) (&ctx->wbuffer[64 - 8]) = t;
89 ctx->process_block(ctx);
90 if (remaining >= 8)
91 break;
92 bufpos = 0;
98 * Compute MD5 checksum of strings according to the
99 * definition of MD5 in RFC 1321 from April 1992.
101 * Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
103 * Copyright (C) 1995-1999 Free Software Foundation, Inc.
104 * Copyright (C) 2001 Manuel Novoa III
105 * Copyright (C) 2003 Glenn L. McGrath
106 * Copyright (C) 2003 Erik Andersen
108 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
111 /* 0: fastest, 3: smallest */
112 #if CONFIG_MD5_SMALL < 0
113 # define MD5_SMALL 0
114 #elif CONFIG_MD5_SMALL > 3
115 # define MD5_SMALL 3
116 #else
117 # define MD5_SMALL CONFIG_MD5_SMALL
118 #endif
120 /* These are the four functions used in the four steps of the MD5 algorithm
121 * and defined in the RFC 1321. The first function is a little bit optimized
122 * (as found in Colin Plumbs public domain implementation).
123 * #define FF(b, c, d) ((b & c) | (~b & d))
125 #undef FF
126 #undef FG
127 #undef FH
128 #undef FI
129 #define FF(b, c, d) (d ^ (b & (c ^ d)))
130 #define FG(b, c, d) FF(d, b, c)
131 #define FH(b, c, d) (b ^ c ^ d)
132 #define FI(b, c, d) (c ^ (b | ~d))
134 /* Hash a single block, 64 bytes long and 4-byte aligned */
135 static void FAST_FUNC md5_process_block64(md5_ctx_t *ctx)
137 #if MD5_SMALL > 0
138 /* Before we start, one word to the strange constants.
139 They are defined in RFC 1321 as
140 T[i] = (int)(4294967296.0 * fabs(sin(i))), i=1..64
142 static const uint32_t C_array[] = {
143 /* round 1 */
144 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
145 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
146 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
147 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
148 /* round 2 */
149 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
150 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
151 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
152 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
153 /* round 3 */
154 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
155 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
156 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x4881d05,
157 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
158 /* round 4 */
159 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
160 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
161 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
162 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391
164 static const char P_array[] ALIGN1 = {
165 # if MD5_SMALL > 1
166 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, /* 1 */
167 # endif
168 1, 6, 11, 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, /* 2 */
169 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, /* 3 */
170 0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9 /* 4 */
172 #endif
173 uint32_t *words = (void*) ctx->wbuffer;
174 uint32_t A = ctx->hash[0];
175 uint32_t B = ctx->hash[1];
176 uint32_t C = ctx->hash[2];
177 uint32_t D = ctx->hash[3];
179 #if MD5_SMALL >= 2 /* 2 or 3 */
181 static const char S_array[] ALIGN1 = {
182 7, 12, 17, 22,
183 5, 9, 14, 20,
184 4, 11, 16, 23,
185 6, 10, 15, 21
187 const uint32_t *pc;
188 const char *pp;
189 const char *ps;
190 int i;
191 uint32_t temp;
193 if (BB_BIG_ENDIAN)
194 for (i = 0; i < 16; i++)
195 words[i] = SWAP_LE32(words[i]);
197 # if MD5_SMALL == 3
198 pc = C_array;
199 pp = P_array;
200 ps = S_array - 4;
202 for (i = 0; i < 64; i++) {
203 if ((i & 0x0f) == 0)
204 ps += 4;
205 temp = A;
206 switch (i >> 4) {
207 case 0:
208 temp += FF(B, C, D);
209 break;
210 case 1:
211 temp += FG(B, C, D);
212 break;
213 case 2:
214 temp += FH(B, C, D);
215 break;
216 case 3:
217 temp += FI(B, C, D);
219 temp += words[(int) (*pp++)] + *pc++;
220 temp = rotl32(temp, ps[i & 3]);
221 temp += B;
222 A = D;
223 D = C;
224 C = B;
225 B = temp;
227 # else /* MD5_SMALL == 2 */
228 pc = C_array;
229 pp = P_array;
230 ps = S_array;
232 for (i = 0; i < 16; i++) {
233 temp = A + FF(B, C, D) + words[(int) (*pp++)] + *pc++;
234 temp = rotl32(temp, ps[i & 3]);
235 temp += B;
236 A = D;
237 D = C;
238 C = B;
239 B = temp;
241 ps += 4;
242 for (i = 0; i < 16; i++) {
243 temp = A + FG(B, C, D) + words[(int) (*pp++)] + *pc++;
244 temp = rotl32(temp, ps[i & 3]);
245 temp += B;
246 A = D;
247 D = C;
248 C = B;
249 B = temp;
251 ps += 4;
252 for (i = 0; i < 16; i++) {
253 temp = A + FH(B, C, D) + words[(int) (*pp++)] + *pc++;
254 temp = rotl32(temp, ps[i & 3]);
255 temp += B;
256 A = D;
257 D = C;
258 C = B;
259 B = temp;
261 ps += 4;
262 for (i = 0; i < 16; i++) {
263 temp = A + FI(B, C, D) + words[(int) (*pp++)] + *pc++;
264 temp = rotl32(temp, ps[i & 3]);
265 temp += B;
266 A = D;
267 D = C;
268 C = B;
269 B = temp;
271 # endif
272 /* Add checksum to the starting values */
273 ctx->hash[0] += A;
274 ctx->hash[1] += B;
275 ctx->hash[2] += C;
276 ctx->hash[3] += D;
278 #else /* MD5_SMALL == 0 or 1 */
280 uint32_t A_save = A;
281 uint32_t B_save = B;
282 uint32_t C_save = C;
283 uint32_t D_save = D;
284 # if MD5_SMALL == 1
285 const uint32_t *pc;
286 const char *pp;
287 int i;
288 # endif
290 /* First round: using the given function, the context and a constant
291 the next context is computed. Because the algorithm's processing
292 unit is a 32-bit word and it is determined to work on words in
293 little endian byte order we perhaps have to change the byte order
294 before the computation. To reduce the work for the next steps
295 we save swapped words in WORDS array. */
296 # undef OP
297 # define OP(a, b, c, d, s, T) \
298 do { \
299 a += FF(b, c, d) + (*words IF_BIG_ENDIAN(= SWAP_LE32(*words))) + T; \
300 words++; \
301 a = rotl32(a, s); \
302 a += b; \
303 } while (0)
305 /* Round 1 */
306 # if MD5_SMALL == 1
307 pc = C_array;
308 for (i = 0; i < 4; i++) {
309 OP(A, B, C, D, 7, *pc++);
310 OP(D, A, B, C, 12, *pc++);
311 OP(C, D, A, B, 17, *pc++);
312 OP(B, C, D, A, 22, *pc++);
314 # else
315 OP(A, B, C, D, 7, 0xd76aa478);
316 OP(D, A, B, C, 12, 0xe8c7b756);
317 OP(C, D, A, B, 17, 0x242070db);
318 OP(B, C, D, A, 22, 0xc1bdceee);
319 OP(A, B, C, D, 7, 0xf57c0faf);
320 OP(D, A, B, C, 12, 0x4787c62a);
321 OP(C, D, A, B, 17, 0xa8304613);
322 OP(B, C, D, A, 22, 0xfd469501);
323 OP(A, B, C, D, 7, 0x698098d8);
324 OP(D, A, B, C, 12, 0x8b44f7af);
325 OP(C, D, A, B, 17, 0xffff5bb1);
326 OP(B, C, D, A, 22, 0x895cd7be);
327 OP(A, B, C, D, 7, 0x6b901122);
328 OP(D, A, B, C, 12, 0xfd987193);
329 OP(C, D, A, B, 17, 0xa679438e);
330 OP(B, C, D, A, 22, 0x49b40821);
331 # endif
332 words -= 16;
334 /* For the second to fourth round we have the possibly swapped words
335 in WORDS. Redefine the macro to take an additional first
336 argument specifying the function to use. */
337 # undef OP
338 # define OP(f, a, b, c, d, k, s, T) \
339 do { \
340 a += f(b, c, d) + words[k] + T; \
341 a = rotl32(a, s); \
342 a += b; \
343 } while (0)
345 /* Round 2 */
346 # if MD5_SMALL == 1
347 pp = P_array;
348 for (i = 0; i < 4; i++) {
349 OP(FG, A, B, C, D, (int) (*pp++), 5, *pc++);
350 OP(FG, D, A, B, C, (int) (*pp++), 9, *pc++);
351 OP(FG, C, D, A, B, (int) (*pp++), 14, *pc++);
352 OP(FG, B, C, D, A, (int) (*pp++), 20, *pc++);
354 # else
355 OP(FG, A, B, C, D, 1, 5, 0xf61e2562);
356 OP(FG, D, A, B, C, 6, 9, 0xc040b340);
357 OP(FG, C, D, A, B, 11, 14, 0x265e5a51);
358 OP(FG, B, C, D, A, 0, 20, 0xe9b6c7aa);
359 OP(FG, A, B, C, D, 5, 5, 0xd62f105d);
360 OP(FG, D, A, B, C, 10, 9, 0x02441453);
361 OP(FG, C, D, A, B, 15, 14, 0xd8a1e681);
362 OP(FG, B, C, D, A, 4, 20, 0xe7d3fbc8);
363 OP(FG, A, B, C, D, 9, 5, 0x21e1cde6);
364 OP(FG, D, A, B, C, 14, 9, 0xc33707d6);
365 OP(FG, C, D, A, B, 3, 14, 0xf4d50d87);
366 OP(FG, B, C, D, A, 8, 20, 0x455a14ed);
367 OP(FG, A, B, C, D, 13, 5, 0xa9e3e905);
368 OP(FG, D, A, B, C, 2, 9, 0xfcefa3f8);
369 OP(FG, C, D, A, B, 7, 14, 0x676f02d9);
370 OP(FG, B, C, D, A, 12, 20, 0x8d2a4c8a);
371 # endif
373 /* Round 3 */
374 # if MD5_SMALL == 1
375 for (i = 0; i < 4; i++) {
376 OP(FH, A, B, C, D, (int) (*pp++), 4, *pc++);
377 OP(FH, D, A, B, C, (int) (*pp++), 11, *pc++);
378 OP(FH, C, D, A, B, (int) (*pp++), 16, *pc++);
379 OP(FH, B, C, D, A, (int) (*pp++), 23, *pc++);
381 # else
382 OP(FH, A, B, C, D, 5, 4, 0xfffa3942);
383 OP(FH, D, A, B, C, 8, 11, 0x8771f681);
384 OP(FH, C, D, A, B, 11, 16, 0x6d9d6122);
385 OP(FH, B, C, D, A, 14, 23, 0xfde5380c);
386 OP(FH, A, B, C, D, 1, 4, 0xa4beea44);
387 OP(FH, D, A, B, C, 4, 11, 0x4bdecfa9);
388 OP(FH, C, D, A, B, 7, 16, 0xf6bb4b60);
389 OP(FH, B, C, D, A, 10, 23, 0xbebfbc70);
390 OP(FH, A, B, C, D, 13, 4, 0x289b7ec6);
391 OP(FH, D, A, B, C, 0, 11, 0xeaa127fa);
392 OP(FH, C, D, A, B, 3, 16, 0xd4ef3085);
393 OP(FH, B, C, D, A, 6, 23, 0x04881d05);
394 OP(FH, A, B, C, D, 9, 4, 0xd9d4d039);
395 OP(FH, D, A, B, C, 12, 11, 0xe6db99e5);
396 OP(FH, C, D, A, B, 15, 16, 0x1fa27cf8);
397 OP(FH, B, C, D, A, 2, 23, 0xc4ac5665);
398 # endif
400 /* Round 4 */
401 # if MD5_SMALL == 1
402 for (i = 0; i < 4; i++) {
403 OP(FI, A, B, C, D, (int) (*pp++), 6, *pc++);
404 OP(FI, D, A, B, C, (int) (*pp++), 10, *pc++);
405 OP(FI, C, D, A, B, (int) (*pp++), 15, *pc++);
406 OP(FI, B, C, D, A, (int) (*pp++), 21, *pc++);
408 # else
409 OP(FI, A, B, C, D, 0, 6, 0xf4292244);
410 OP(FI, D, A, B, C, 7, 10, 0x432aff97);
411 OP(FI, C, D, A, B, 14, 15, 0xab9423a7);
412 OP(FI, B, C, D, A, 5, 21, 0xfc93a039);
413 OP(FI, A, B, C, D, 12, 6, 0x655b59c3);
414 OP(FI, D, A, B, C, 3, 10, 0x8f0ccc92);
415 OP(FI, C, D, A, B, 10, 15, 0xffeff47d);
416 OP(FI, B, C, D, A, 1, 21, 0x85845dd1);
417 OP(FI, A, B, C, D, 8, 6, 0x6fa87e4f);
418 OP(FI, D, A, B, C, 15, 10, 0xfe2ce6e0);
419 OP(FI, C, D, A, B, 6, 15, 0xa3014314);
420 OP(FI, B, C, D, A, 13, 21, 0x4e0811a1);
421 OP(FI, A, B, C, D, 4, 6, 0xf7537e82);
422 OP(FI, D, A, B, C, 11, 10, 0xbd3af235);
423 OP(FI, C, D, A, B, 2, 15, 0x2ad7d2bb);
424 OP(FI, B, C, D, A, 9, 21, 0xeb86d391);
425 # undef OP
426 # endif
427 /* Add checksum to the starting values */
428 ctx->hash[0] = A_save + A;
429 ctx->hash[1] = B_save + B;
430 ctx->hash[2] = C_save + C;
431 ctx->hash[3] = D_save + D;
432 #endif
434 #undef FF
435 #undef FG
436 #undef FH
437 #undef FI
439 /* Initialize structure containing state of computation.
440 * (RFC 1321, 3.3: Step 3)
442 void FAST_FUNC md5_begin(md5_ctx_t *ctx)
444 ctx->hash[0] = 0x67452301;
445 ctx->hash[1] = 0xefcdab89;
446 ctx->hash[2] = 0x98badcfe;
447 ctx->hash[3] = 0x10325476;
448 ctx->total64 = 0;
449 ctx->process_block = md5_process_block64;
452 /* Used also for sha1 and sha256 */
453 void FAST_FUNC md5_hash(md5_ctx_t *ctx, const void *buffer, size_t len)
455 common64_hash(ctx, buffer, len);
458 /* Process the remaining bytes in the buffer and put result from CTX
459 * in first 16 bytes following RESBUF. The result is always in little
460 * endian byte order, so that a byte-wise output yields to the wanted
461 * ASCII representation of the message digest.
463 void FAST_FUNC md5_end(md5_ctx_t *ctx, void *resbuf)
465 /* MD5 stores total in LE, need to swap on BE arches: */
466 common64_end(ctx, /*swap_needed:*/ BB_BIG_ENDIAN);
468 /* The MD5 result is in little endian byte order */
469 if (BB_BIG_ENDIAN) {
470 ctx->hash[0] = SWAP_LE32(ctx->hash[0]);
471 ctx->hash[1] = SWAP_LE32(ctx->hash[1]);
472 ctx->hash[2] = SWAP_LE32(ctx->hash[2]);
473 ctx->hash[3] = SWAP_LE32(ctx->hash[3]);
476 memcpy(resbuf, ctx->hash, sizeof(ctx->hash[0]) * 4);
481 * SHA1 part is:
482 * Copyright 2007 Rob Landley <rob@landley.net>
484 * Based on the public domain SHA-1 in C by Steve Reid <steve@edmweb.com>
485 * from http://www.mirrors.wiretapped.net/security/cryptography/hashes/sha1/
487 * Licensed under GPLv2, see file LICENSE in this source tree.
489 * ---------------------------------------------------------------------------
491 * SHA256 and SHA512 parts are:
492 * Released into the Public Domain by Ulrich Drepper <drepper@redhat.com>.
493 * Shrank by Denys Vlasenko.
495 * ---------------------------------------------------------------------------
497 * The best way to test random blocksizes is to go to coreutils/md5_sha1_sum.c
498 * and replace "4096" with something like "2000 + time(NULL) % 2097",
499 * then rebuild and compare "shaNNNsum bigfile" results.
502 static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx)
504 static const uint32_t rconsts[] = {
505 0x5A827999, 0x6ED9EBA1, 0x8F1BBCDC, 0xCA62C1D6
507 int i, j;
508 int cnt;
509 uint32_t W[16+16];
510 uint32_t a, b, c, d, e;
512 /* On-stack work buffer frees up one register in the main loop
513 * which otherwise will be needed to hold ctx pointer */
514 for (i = 0; i < 16; i++)
515 W[i] = W[i+16] = SWAP_BE32(((uint32_t*)ctx->wbuffer)[i]);
517 a = ctx->hash[0];
518 b = ctx->hash[1];
519 c = ctx->hash[2];
520 d = ctx->hash[3];
521 e = ctx->hash[4];
523 /* 4 rounds of 20 operations each */
524 cnt = 0;
525 for (i = 0; i < 4; i++) {
526 j = 19;
527 do {
528 uint32_t work;
530 work = c ^ d;
531 if (i == 0) {
532 work = (work & b) ^ d;
533 if (j <= 3)
534 goto ge16;
535 /* Used to do SWAP_BE32 here, but this
536 * requires ctx (see comment above) */
537 work += W[cnt];
538 } else {
539 if (i == 2)
540 work = ((b | c) & d) | (b & c);
541 else /* i = 1 or 3 */
542 work ^= b;
543 ge16:
544 W[cnt] = W[cnt+16] = rotl32(W[cnt+13] ^ W[cnt+8] ^ W[cnt+2] ^ W[cnt], 1);
545 work += W[cnt];
547 work += e + rotl32(a, 5) + rconsts[i];
549 /* Rotate by one for next time */
550 e = d;
551 d = c;
552 c = /* b = */ rotl32(b, 30);
553 b = a;
554 a = work;
555 cnt = (cnt + 1) & 15;
556 } while (--j >= 0);
559 ctx->hash[0] += a;
560 ctx->hash[1] += b;
561 ctx->hash[2] += c;
562 ctx->hash[3] += d;
563 ctx->hash[4] += e;
566 /* Constants for SHA512 from FIPS 180-2:4.2.3.
567 * SHA256 constants from FIPS 180-2:4.2.2
568 * are the most significant half of first 64 elements
569 * of the same array.
571 static const uint64_t sha_K[80] = {
572 0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL,
573 0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL,
574 0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
575 0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL,
576 0xd807aa98a3030242ULL, 0x12835b0145706fbeULL,
577 0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,
578 0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL,
579 0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL,
580 0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL,
581 0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL,
582 0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL,
583 0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,
584 0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL,
585 0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL,
586 0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL,
587 0x06ca6351e003826fULL, 0x142929670a0e6e70ULL,
588 0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL,
589 0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,
590 0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL,
591 0x81c2c92e47edaee6ULL, 0x92722c851482353bULL,
592 0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL,
593 0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL,
594 0xd192e819d6ef5218ULL, 0xd69906245565a910ULL,
595 0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,
596 0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL,
597 0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL,
598 0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL,
599 0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL,
600 0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL,
601 0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,
602 0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL,
603 0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL,
604 0xca273eceea26619cULL, 0xd186b8c721c0c207ULL, /* [64]+ are used for sha512 only */
605 0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL,
606 0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL,
607 0x113f9804bef90daeULL, 0x1b710b35131c471bULL,
608 0x28db77f523047d84ULL, 0x32caab7b40c72493ULL,
609 0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL,
610 0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,
611 0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL
614 #undef Ch
615 #undef Maj
616 #undef S0
617 #undef S1
618 #undef R0
619 #undef R1
621 static void FAST_FUNC sha256_process_block64(sha256_ctx_t *ctx)
623 unsigned t;
624 uint32_t W[64], a, b, c, d, e, f, g, h;
625 const uint32_t *words = (uint32_t*) ctx->wbuffer;
627 /* Operators defined in FIPS 180-2:4.1.2. */
628 #define Ch(x, y, z) ((x & y) ^ (~x & z))
629 #define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
630 #define S0(x) (rotr32(x, 2) ^ rotr32(x, 13) ^ rotr32(x, 22))
631 #define S1(x) (rotr32(x, 6) ^ rotr32(x, 11) ^ rotr32(x, 25))
632 #define R0(x) (rotr32(x, 7) ^ rotr32(x, 18) ^ (x >> 3))
633 #define R1(x) (rotr32(x, 17) ^ rotr32(x, 19) ^ (x >> 10))
635 /* Compute the message schedule according to FIPS 180-2:6.2.2 step 2. */
636 for (t = 0; t < 16; ++t)
637 W[t] = SWAP_BE32(words[t]);
638 for (/*t = 16*/; t < 64; ++t)
639 W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16];
641 a = ctx->hash[0];
642 b = ctx->hash[1];
643 c = ctx->hash[2];
644 d = ctx->hash[3];
645 e = ctx->hash[4];
646 f = ctx->hash[5];
647 g = ctx->hash[6];
648 h = ctx->hash[7];
650 /* The actual computation according to FIPS 180-2:6.2.2 step 3. */
651 for (t = 0; t < 64; ++t) {
652 /* Need to fetch upper half of sha_K[t]
653 * (I hope compiler is clever enough to just fetch
654 * upper half)
656 uint32_t K_t = sha_K[t] >> 32;
657 uint32_t T1 = h + S1(e) + Ch(e, f, g) + K_t + W[t];
658 uint32_t T2 = S0(a) + Maj(a, b, c);
659 h = g;
660 g = f;
661 f = e;
662 e = d + T1;
663 d = c;
664 c = b;
665 b = a;
666 a = T1 + T2;
668 #undef Ch
669 #undef Maj
670 #undef S0
671 #undef S1
672 #undef R0
673 #undef R1
674 /* Add the starting values of the context according to FIPS 180-2:6.2.2
675 step 4. */
676 ctx->hash[0] += a;
677 ctx->hash[1] += b;
678 ctx->hash[2] += c;
679 ctx->hash[3] += d;
680 ctx->hash[4] += e;
681 ctx->hash[5] += f;
682 ctx->hash[6] += g;
683 ctx->hash[7] += h;
686 static void FAST_FUNC sha512_process_block128(sha512_ctx_t *ctx)
688 unsigned t;
689 uint64_t W[80];
690 /* On i386, having assignments here (not later as sha256 does)
691 * produces 99 bytes smaller code with gcc 4.3.1
693 uint64_t a = ctx->hash[0];
694 uint64_t b = ctx->hash[1];
695 uint64_t c = ctx->hash[2];
696 uint64_t d = ctx->hash[3];
697 uint64_t e = ctx->hash[4];
698 uint64_t f = ctx->hash[5];
699 uint64_t g = ctx->hash[6];
700 uint64_t h = ctx->hash[7];
701 const uint64_t *words = (uint64_t*) ctx->wbuffer;
703 /* Operators defined in FIPS 180-2:4.1.2. */
704 #define Ch(x, y, z) ((x & y) ^ (~x & z))
705 #define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
706 #define S0(x) (rotr64(x, 28) ^ rotr64(x, 34) ^ rotr64(x, 39))
707 #define S1(x) (rotr64(x, 14) ^ rotr64(x, 18) ^ rotr64(x, 41))
708 #define R0(x) (rotr64(x, 1) ^ rotr64(x, 8) ^ (x >> 7))
709 #define R1(x) (rotr64(x, 19) ^ rotr64(x, 61) ^ (x >> 6))
711 /* Compute the message schedule according to FIPS 180-2:6.3.2 step 2. */
712 for (t = 0; t < 16; ++t)
713 W[t] = SWAP_BE64(words[t]);
714 for (/*t = 16*/; t < 80; ++t)
715 W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16];
717 /* The actual computation according to FIPS 180-2:6.3.2 step 3. */
718 for (t = 0; t < 80; ++t) {
719 uint64_t T1 = h + S1(e) + Ch(e, f, g) + sha_K[t] + W[t];
720 uint64_t T2 = S0(a) + Maj(a, b, c);
721 h = g;
722 g = f;
723 f = e;
724 e = d + T1;
725 d = c;
726 c = b;
727 b = a;
728 a = T1 + T2;
730 #undef Ch
731 #undef Maj
732 #undef S0
733 #undef S1
734 #undef R0
735 #undef R1
736 /* Add the starting values of the context according to FIPS 180-2:6.3.2
737 step 4. */
738 ctx->hash[0] += a;
739 ctx->hash[1] += b;
740 ctx->hash[2] += c;
741 ctx->hash[3] += d;
742 ctx->hash[4] += e;
743 ctx->hash[5] += f;
744 ctx->hash[6] += g;
745 ctx->hash[7] += h;
749 void FAST_FUNC sha1_begin(sha1_ctx_t *ctx)
751 ctx->hash[0] = 0x67452301;
752 ctx->hash[1] = 0xefcdab89;
753 ctx->hash[2] = 0x98badcfe;
754 ctx->hash[3] = 0x10325476;
755 ctx->hash[4] = 0xc3d2e1f0;
756 ctx->total64 = 0;
757 ctx->process_block = sha1_process_block64;
760 static const uint32_t init256[] = {
763 0x6a09e667,
764 0xbb67ae85,
765 0x3c6ef372,
766 0xa54ff53a,
767 0x510e527f,
768 0x9b05688c,
769 0x1f83d9ab,
770 0x5be0cd19,
772 static const uint32_t init512_lo[] = {
775 0xf3bcc908,
776 0x84caa73b,
777 0xfe94f82b,
778 0x5f1d36f1,
779 0xade682d1,
780 0x2b3e6c1f,
781 0xfb41bd6b,
782 0x137e2179,
785 /* Initialize structure containing state of computation.
786 (FIPS 180-2:5.3.2) */
787 void FAST_FUNC sha256_begin(sha256_ctx_t *ctx)
789 memcpy(&ctx->total64, init256, sizeof(init256));
790 /*ctx->total64 = 0; - done by prepending two 32-bit zeros to init256 */
791 ctx->process_block = sha256_process_block64;
794 /* Initialize structure containing state of computation.
795 (FIPS 180-2:5.3.3) */
796 void FAST_FUNC sha512_begin(sha512_ctx_t *ctx)
798 int i;
799 /* Two extra iterations zero out ctx->total64[2] */
800 uint64_t *tp = ctx->total64;
801 for (i = 0; i < 2+8; i++)
802 tp[i] = ((uint64_t)(init256[i]) << 32) + init512_lo[i];
803 /*ctx->total64[0] = ctx->total64[1] = 0; - already done */
806 void FAST_FUNC sha512_hash(sha512_ctx_t *ctx, const void *buffer, size_t len)
808 unsigned bufpos = ctx->total64[0] & 127;
809 unsigned remaining;
811 /* First increment the byte count. FIPS 180-2 specifies the possible
812 length of the file up to 2^128 _bits_.
813 We compute the number of _bytes_ and convert to bits later. */
814 ctx->total64[0] += len;
815 if (ctx->total64[0] < len)
816 ctx->total64[1]++;
817 #if 0
818 remaining = 128 - bufpos;
820 /* Hash whole blocks */
821 while (len >= remaining) {
822 memcpy(ctx->wbuffer + bufpos, buffer, remaining);
823 buffer = (const char *)buffer + remaining;
824 len -= remaining;
825 remaining = 128;
826 bufpos = 0;
827 sha512_process_block128(ctx);
830 /* Save last, partial blosk */
831 memcpy(ctx->wbuffer + bufpos, buffer, len);
832 #else
833 while (1) {
834 remaining = 128 - bufpos;
835 if (remaining > len)
836 remaining = len;
837 /* Copy data into aligned buffer */
838 memcpy(ctx->wbuffer + bufpos, buffer, remaining);
839 len -= remaining;
840 buffer = (const char *)buffer + remaining;
841 bufpos += remaining;
842 /* Clever way to do "if (bufpos != N) break; ... ; bufpos = 0;" */
843 bufpos -= 128;
844 if (bufpos != 0)
845 break;
846 /* Buffer is filled up, process it */
847 sha512_process_block128(ctx);
848 /*bufpos = 0; - already is */
850 #endif
853 /* Used also for sha256 */
854 void FAST_FUNC sha1_end(sha1_ctx_t *ctx, void *resbuf)
856 unsigned hash_size;
858 /* SHA stores total in BE, need to swap on LE arches: */
859 common64_end(ctx, /*swap_needed:*/ BB_LITTLE_ENDIAN);
861 hash_size = (ctx->process_block == sha1_process_block64) ? 5 : 8;
862 /* This way we do not impose alignment constraints on resbuf: */
863 if (BB_LITTLE_ENDIAN) {
864 unsigned i;
865 for (i = 0; i < hash_size; ++i)
866 ctx->hash[i] = SWAP_BE32(ctx->hash[i]);
868 memcpy(resbuf, ctx->hash, sizeof(ctx->hash[0]) * hash_size);
871 void FAST_FUNC sha512_end(sha512_ctx_t *ctx, void *resbuf)
873 unsigned bufpos = ctx->total64[0] & 127;
875 /* Pad the buffer to the next 128-byte boundary with 0x80,0,0,0... */
876 ctx->wbuffer[bufpos++] = 0x80;
878 while (1) {
879 unsigned remaining = 128 - bufpos;
880 memset(ctx->wbuffer + bufpos, 0, remaining);
881 if (remaining >= 16) {
882 /* Store the 128-bit counter of bits in the buffer in BE format */
883 uint64_t t;
884 t = ctx->total64[0] << 3;
885 t = SWAP_BE64(t);
886 *(bb__aliased_uint64_t *) (&ctx->wbuffer[128 - 8]) = t;
887 t = (ctx->total64[1] << 3) | (ctx->total64[0] >> 61);
888 t = SWAP_BE64(t);
889 *(bb__aliased_uint64_t *) (&ctx->wbuffer[128 - 16]) = t;
891 sha512_process_block128(ctx);
892 if (remaining >= 16)
893 break;
894 bufpos = 0;
897 if (BB_LITTLE_ENDIAN) {
898 unsigned i;
899 for (i = 0; i < ARRAY_SIZE(ctx->hash); ++i)
900 ctx->hash[i] = SWAP_BE64(ctx->hash[i]);
902 memcpy(resbuf, ctx->hash, sizeof(ctx->hash));
907 * The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
908 * Michael Peeters and Gilles Van Assche. For more information, feedback or
909 * questions, please refer to our website: http://keccak.noekeon.org/
911 * Implementation by Ronny Van Keer,
912 * hereby denoted as "the implementer".
914 * To the extent possible under law, the implementer has waived all copyright
915 * and related or neighboring rights to the source code in this file.
916 * http://creativecommons.org/publicdomain/zero/1.0/
918 * Busybox modifications (C) Lauri Kasanen, under the GPLv2.
921 #if CONFIG_SHA3_SMALL < 0
922 # define SHA3_SMALL 0
923 #elif CONFIG_SHA3_SMALL > 1
924 # define SHA3_SMALL 1
925 #else
926 # define SHA3_SMALL CONFIG_SHA3_SMALL
927 #endif
929 #define OPTIMIZE_SHA3_FOR_32 0
931 * SHA3 can be optimized for 32-bit CPUs with bit-slicing:
932 * every 64-bit word of state[] can be split into two 32-bit words
933 * by even/odd bits. In this form, all rotations of sha3 round
934 * are 32-bit - and there are lots of them.
935 * However, it requires either splitting/combining state words
936 * before/after sha3 round (code does this now)
937 * or shuffling bits before xor'ing them into state and in sha3_end.
938 * Without shuffling, bit-slicing results in -130 bytes of code
939 * and marginal speedup (but of course it gives wrong result).
940 * With shuffling it works, but +260 code bytes, and slower.
941 * Disabled for now:
943 #if 0 /* LONG_MAX == 0x7fffffff */
944 # undef OPTIMIZE_SHA3_FOR_32
945 # define OPTIMIZE_SHA3_FOR_32 1
946 #endif
948 enum {
949 SHA3_IBLK_BYTES = 72, /* 576 bits / 8 */
952 #if OPTIMIZE_SHA3_FOR_32
953 /* This splits every 64-bit word into a pair of 32-bit words,
954 * even bits go into first word, odd bits go to second one.
955 * The conversion is done in-place.
957 static void split_halves(uint64_t *state)
959 /* Credit: Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
960 uint32_t *s32 = (uint32_t*)state;
961 uint32_t t, x0, x1;
962 int i;
963 for (i = 24; i >= 0; --i) {
964 x0 = s32[0];
965 t = (x0 ^ (x0 >> 1)) & 0x22222222; x0 = x0 ^ t ^ (t << 1);
966 t = (x0 ^ (x0 >> 2)) & 0x0C0C0C0C; x0 = x0 ^ t ^ (t << 2);
967 t = (x0 ^ (x0 >> 4)) & 0x00F000F0; x0 = x0 ^ t ^ (t << 4);
968 t = (x0 ^ (x0 >> 8)) & 0x0000FF00; x0 = x0 ^ t ^ (t << 8);
969 x1 = s32[1];
970 t = (x1 ^ (x1 >> 1)) & 0x22222222; x1 = x1 ^ t ^ (t << 1);
971 t = (x1 ^ (x1 >> 2)) & 0x0C0C0C0C; x1 = x1 ^ t ^ (t << 2);
972 t = (x1 ^ (x1 >> 4)) & 0x00F000F0; x1 = x1 ^ t ^ (t << 4);
973 t = (x1 ^ (x1 >> 8)) & 0x0000FF00; x1 = x1 ^ t ^ (t << 8);
974 *s32++ = (x0 & 0x0000FFFF) | (x1 << 16);
975 *s32++ = (x0 >> 16) | (x1 & 0xFFFF0000);
978 /* The reverse operation */
979 static void combine_halves(uint64_t *state)
981 uint32_t *s32 = (uint32_t*)state;
982 uint32_t t, x0, x1;
983 int i;
984 for (i = 24; i >= 0; --i) {
985 x0 = s32[0];
986 x1 = s32[1];
987 t = (x0 & 0x0000FFFF) | (x1 << 16);
988 x1 = (x0 >> 16) | (x1 & 0xFFFF0000);
989 x0 = t;
990 t = (x0 ^ (x0 >> 8)) & 0x0000FF00; x0 = x0 ^ t ^ (t << 8);
991 t = (x0 ^ (x0 >> 4)) & 0x00F000F0; x0 = x0 ^ t ^ (t << 4);
992 t = (x0 ^ (x0 >> 2)) & 0x0C0C0C0C; x0 = x0 ^ t ^ (t << 2);
993 t = (x0 ^ (x0 >> 1)) & 0x22222222; x0 = x0 ^ t ^ (t << 1);
994 *s32++ = x0;
995 t = (x1 ^ (x1 >> 8)) & 0x0000FF00; x1 = x1 ^ t ^ (t << 8);
996 t = (x1 ^ (x1 >> 4)) & 0x00F000F0; x1 = x1 ^ t ^ (t << 4);
997 t = (x1 ^ (x1 >> 2)) & 0x0C0C0C0C; x1 = x1 ^ t ^ (t << 2);
998 t = (x1 ^ (x1 >> 1)) & 0x22222222; x1 = x1 ^ t ^ (t << 1);
999 *s32++ = x1;
1002 #endif
1005 * In the crypto literature this function is usually called Keccak-f().
1007 static void sha3_process_block72(uint64_t *state)
1009 enum { NROUNDS = 24 };
1011 #if OPTIMIZE_SHA3_FOR_32
1013 static const uint32_t IOTA_CONST_0[NROUNDS] = {
1014 0x00000001UL,
1015 0x00000000UL,
1016 0x00000000UL,
1017 0x00000000UL,
1018 0x00000001UL,
1019 0x00000001UL,
1020 0x00000001UL,
1021 0x00000001UL,
1022 0x00000000UL,
1023 0x00000000UL,
1024 0x00000001UL,
1025 0x00000000UL,
1026 0x00000001UL,
1027 0x00000001UL,
1028 0x00000001UL,
1029 0x00000001UL,
1030 0x00000000UL,
1031 0x00000000UL,
1032 0x00000000UL,
1033 0x00000000UL,
1034 0x00000001UL,
1035 0x00000000UL,
1036 0x00000001UL,
1037 0x00000000UL,
1039 ** bits are in lsb: 0101 0000 1111 0100 1111 0001
1041 uint32_t IOTA_CONST_0bits = (uint32_t)(0x0050f4f1);
1042 static const uint32_t IOTA_CONST_1[NROUNDS] = {
1043 0x00000000UL,
1044 0x00000089UL,
1045 0x8000008bUL,
1046 0x80008080UL,
1047 0x0000008bUL,
1048 0x00008000UL,
1049 0x80008088UL,
1050 0x80000082UL,
1051 0x0000000bUL,
1052 0x0000000aUL,
1053 0x00008082UL,
1054 0x00008003UL,
1055 0x0000808bUL,
1056 0x8000000bUL,
1057 0x8000008aUL,
1058 0x80000081UL,
1059 0x80000081UL,
1060 0x80000008UL,
1061 0x00000083UL,
1062 0x80008003UL,
1063 0x80008088UL,
1064 0x80000088UL,
1065 0x00008000UL,
1066 0x80008082UL,
1069 uint32_t *const s32 = (uint32_t*)state;
1070 unsigned round;
1072 split_halves(state);
1074 for (round = 0; round < NROUNDS; round++) {
1075 unsigned x;
1077 /* Theta */
1079 uint32_t BC[20];
1080 for (x = 0; x < 10; ++x) {
1081 BC[x+10] = BC[x] = s32[x]^s32[x+10]^s32[x+20]^s32[x+30]^s32[x+40];
1083 for (x = 0; x < 10; x += 2) {
1084 uint32_t ta, tb;
1085 ta = BC[x+8] ^ rotl32(BC[x+3], 1);
1086 tb = BC[x+9] ^ BC[x+2];
1087 s32[x+0] ^= ta;
1088 s32[x+1] ^= tb;
1089 s32[x+10] ^= ta;
1090 s32[x+11] ^= tb;
1091 s32[x+20] ^= ta;
1092 s32[x+21] ^= tb;
1093 s32[x+30] ^= ta;
1094 s32[x+31] ^= tb;
1095 s32[x+40] ^= ta;
1096 s32[x+41] ^= tb;
1099 /* RhoPi */
1101 uint32_t t0a,t0b, t1a,t1b;
1102 t1a = s32[1*2+0];
1103 t1b = s32[1*2+1];
1105 #define RhoPi(PI_LANE, ROT_CONST) \
1106 t0a = s32[PI_LANE*2+0];\
1107 t0b = s32[PI_LANE*2+1];\
1108 if (ROT_CONST & 1) {\
1109 s32[PI_LANE*2+0] = rotl32(t1b, ROT_CONST/2+1);\
1110 s32[PI_LANE*2+1] = ROT_CONST == 1 ? t1a : rotl32(t1a, ROT_CONST/2+0);\
1111 } else {\
1112 s32[PI_LANE*2+0] = rotl32(t1a, ROT_CONST/2);\
1113 s32[PI_LANE*2+1] = rotl32(t1b, ROT_CONST/2);\
1115 t1a = t0a; t1b = t0b;
1117 RhoPi(10, 1)
1118 RhoPi( 7, 3)
1119 RhoPi(11, 6)
1120 RhoPi(17,10)
1121 RhoPi(18,15)
1122 RhoPi( 3,21)
1123 RhoPi( 5,28)
1124 RhoPi(16,36)
1125 RhoPi( 8,45)
1126 RhoPi(21,55)
1127 RhoPi(24, 2)
1128 RhoPi( 4,14)
1129 RhoPi(15,27)
1130 RhoPi(23,41)
1131 RhoPi(19,56)
1132 RhoPi(13, 8)
1133 RhoPi(12,25)
1134 RhoPi( 2,43)
1135 RhoPi(20,62)
1136 RhoPi(14,18)
1137 RhoPi(22,39)
1138 RhoPi( 9,61)
1139 RhoPi( 6,20)
1140 RhoPi( 1,44)
1141 #undef RhoPi
1143 /* Chi */
1144 for (x = 0; x <= 40;) {
1145 uint32_t BC0, BC1, BC2, BC3, BC4;
1146 BC0 = s32[x + 0*2];
1147 BC1 = s32[x + 1*2];
1148 BC2 = s32[x + 2*2];
1149 s32[x + 0*2] = BC0 ^ ((~BC1) & BC2);
1150 BC3 = s32[x + 3*2];
1151 s32[x + 1*2] = BC1 ^ ((~BC2) & BC3);
1152 BC4 = s32[x + 4*2];
1153 s32[x + 2*2] = BC2 ^ ((~BC3) & BC4);
1154 s32[x + 3*2] = BC3 ^ ((~BC4) & BC0);
1155 s32[x + 4*2] = BC4 ^ ((~BC0) & BC1);
1156 x++;
1157 BC0 = s32[x + 0*2];
1158 BC1 = s32[x + 1*2];
1159 BC2 = s32[x + 2*2];
1160 s32[x + 0*2] = BC0 ^ ((~BC1) & BC2);
1161 BC3 = s32[x + 3*2];
1162 s32[x + 1*2] = BC1 ^ ((~BC2) & BC3);
1163 BC4 = s32[x + 4*2];
1164 s32[x + 2*2] = BC2 ^ ((~BC3) & BC4);
1165 s32[x + 3*2] = BC3 ^ ((~BC4) & BC0);
1166 s32[x + 4*2] = BC4 ^ ((~BC0) & BC1);
1167 x += 9;
1169 /* Iota */
1170 s32[0] ^= IOTA_CONST_0bits & 1;
1171 IOTA_CONST_0bits >>= 1;
1172 s32[1] ^= IOTA_CONST_1[round];
1175 combine_halves(state);
1176 #else
1177 /* Native 64-bit algorithm */
1178 static const uint16_t IOTA_CONST[NROUNDS] = {
1179 /* Elements should be 64-bit, but top half is always zero
1180 * or 0x80000000. We encode 63rd bits in a separate word below.
1181 * Same is true for 31th bits, which lets us use 16-bit table
1182 * instead of 64-bit. The speed penalty is lost in the noise.
1184 0x0001,
1185 0x8082,
1186 0x808a,
1187 0x8000,
1188 0x808b,
1189 0x0001,
1190 0x8081,
1191 0x8009,
1192 0x008a,
1193 0x0088,
1194 0x8009,
1195 0x000a,
1196 0x808b,
1197 0x008b,
1198 0x8089,
1199 0x8003,
1200 0x8002,
1201 0x0080,
1202 0x800a,
1203 0x000a,
1204 0x8081,
1205 0x8080,
1206 0x0001,
1207 0x8008,
1209 /* bit for CONST[0] is in msb: 0011 0011 0000 0111 1101 1101 */
1210 const uint32_t IOTA_CONST_bit63 = (uint32_t)(0x3307dd00);
1211 /* bit for CONST[0] is in msb: 0001 0110 0011 1000 0001 1011 */
1212 const uint32_t IOTA_CONST_bit31 = (uint32_t)(0x16381b00);
1214 static const uint8_t ROT_CONST[24] = {
1215 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14,
1216 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44,
1218 static const uint8_t PI_LANE[24] = {
1219 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4,
1220 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1,
1222 /*static const uint8_t MOD5[10] = { 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, };*/
1224 unsigned x;
1225 unsigned round;
1227 if (BB_BIG_ENDIAN) {
1228 for (x = 0; x < 25; x++) {
1229 state[x] = SWAP_LE64(state[x]);
1233 for (round = 0; round < NROUNDS; ++round) {
1234 /* Theta */
1236 uint64_t BC[10];
1237 for (x = 0; x < 5; ++x) {
1238 BC[x + 5] = BC[x] = state[x]
1239 ^ state[x + 5] ^ state[x + 10]
1240 ^ state[x + 15] ^ state[x + 20];
1242 /* Using 2x5 vector above eliminates the need to use
1243 * BC[MOD5[x+N]] trick below to fetch BC[(x+N) % 5],
1244 * and the code is a bit _smaller_.
1246 for (x = 0; x < 5; ++x) {
1247 uint64_t temp = BC[x + 4] ^ rotl64(BC[x + 1], 1);
1248 state[x] ^= temp;
1249 state[x + 5] ^= temp;
1250 state[x + 10] ^= temp;
1251 state[x + 15] ^= temp;
1252 state[x + 20] ^= temp;
1256 /* Rho Pi */
1257 if (SHA3_SMALL) {
1258 uint64_t t1 = state[1];
1259 for (x = 0; x < 24; ++x) {
1260 uint64_t t0 = state[PI_LANE[x]];
1261 state[PI_LANE[x]] = rotl64(t1, ROT_CONST[x]);
1262 t1 = t0;
1264 } else {
1265 /* Especially large benefit for 32-bit arch (75% faster):
1266 * 64-bit rotations by non-constant usually are SLOW on those.
1267 * We resort to unrolling here.
1268 * This optimizes out PI_LANE[] and ROT_CONST[],
1269 * but generates 300-500 more bytes of code.
1271 uint64_t t0;
1272 uint64_t t1 = state[1];
1273 #define RhoPi_twice(x) \
1274 t0 = state[PI_LANE[x ]]; \
1275 state[PI_LANE[x ]] = rotl64(t1, ROT_CONST[x ]); \
1276 t1 = state[PI_LANE[x+1]]; \
1277 state[PI_LANE[x+1]] = rotl64(t0, ROT_CONST[x+1]);
1278 RhoPi_twice(0); RhoPi_twice(2);
1279 RhoPi_twice(4); RhoPi_twice(6);
1280 RhoPi_twice(8); RhoPi_twice(10);
1281 RhoPi_twice(12); RhoPi_twice(14);
1282 RhoPi_twice(16); RhoPi_twice(18);
1283 RhoPi_twice(20); RhoPi_twice(22);
1284 #undef RhoPi_twice
1286 /* Chi */
1287 # if LONG_MAX > 0x7fffffff
1288 for (x = 0; x <= 20; x += 5) {
1289 uint64_t BC0, BC1, BC2, BC3, BC4;
1290 BC0 = state[x + 0];
1291 BC1 = state[x + 1];
1292 BC2 = state[x + 2];
1293 state[x + 0] = BC0 ^ ((~BC1) & BC2);
1294 BC3 = state[x + 3];
1295 state[x + 1] = BC1 ^ ((~BC2) & BC3);
1296 BC4 = state[x + 4];
1297 state[x + 2] = BC2 ^ ((~BC3) & BC4);
1298 state[x + 3] = BC3 ^ ((~BC4) & BC0);
1299 state[x + 4] = BC4 ^ ((~BC0) & BC1);
1301 # else
1302 /* Reduced register pressure version
1303 * for register-starved 32-bit arches
1304 * (i386: -95 bytes, and it is _faster_)
1306 for (x = 0; x <= 40;) {
1307 uint32_t BC0, BC1, BC2, BC3, BC4;
1308 uint32_t *const s32 = (uint32_t*)state;
1309 # if SHA3_SMALL
1310 do_half:
1311 # endif
1312 BC0 = s32[x + 0*2];
1313 BC1 = s32[x + 1*2];
1314 BC2 = s32[x + 2*2];
1315 s32[x + 0*2] = BC0 ^ ((~BC1) & BC2);
1316 BC3 = s32[x + 3*2];
1317 s32[x + 1*2] = BC1 ^ ((~BC2) & BC3);
1318 BC4 = s32[x + 4*2];
1319 s32[x + 2*2] = BC2 ^ ((~BC3) & BC4);
1320 s32[x + 3*2] = BC3 ^ ((~BC4) & BC0);
1321 s32[x + 4*2] = BC4 ^ ((~BC0) & BC1);
1322 x++;
1323 # if SHA3_SMALL
1324 if (x & 1)
1325 goto do_half;
1326 x += 8;
1327 # else
1328 BC0 = s32[x + 0*2];
1329 BC1 = s32[x + 1*2];
1330 BC2 = s32[x + 2*2];
1331 s32[x + 0*2] = BC0 ^ ((~BC1) & BC2);
1332 BC3 = s32[x + 3*2];
1333 s32[x + 1*2] = BC1 ^ ((~BC2) & BC3);
1334 BC4 = s32[x + 4*2];
1335 s32[x + 2*2] = BC2 ^ ((~BC3) & BC4);
1336 s32[x + 3*2] = BC3 ^ ((~BC4) & BC0);
1337 s32[x + 4*2] = BC4 ^ ((~BC0) & BC1);
1338 x += 9;
1339 # endif
1341 # endif /* long is 32-bit */
1342 /* Iota */
1343 state[0] ^= IOTA_CONST[round]
1344 | (uint32_t)((IOTA_CONST_bit31 << round) & 0x80000000)
1345 | (uint64_t)((IOTA_CONST_bit63 << round) & 0x80000000) << 32;
1348 if (BB_BIG_ENDIAN) {
1349 for (x = 0; x < 25; x++) {
1350 state[x] = SWAP_LE64(state[x]);
1353 #endif
1356 void FAST_FUNC sha3_begin(sha3_ctx_t *ctx)
1358 memset(ctx, 0, sizeof(*ctx));
1361 void FAST_FUNC sha3_hash(sha3_ctx_t *ctx, const void *buffer, size_t len)
1363 #if SHA3_SMALL
1364 const uint8_t *data = buffer;
1365 unsigned bufpos = ctx->bytes_queued;
1367 while (1) {
1368 unsigned remaining = SHA3_IBLK_BYTES - bufpos;
1369 if (remaining > len)
1370 remaining = len;
1371 len -= remaining;
1372 /* XOR data into buffer */
1373 while (remaining != 0) {
1374 uint8_t *buf = (uint8_t*)ctx->state;
1375 buf[bufpos] ^= *data++;
1376 bufpos++;
1377 remaining--;
1379 /* Clever way to do "if (bufpos != N) break; ... ; bufpos = 0;" */
1380 bufpos -= SHA3_IBLK_BYTES;
1381 if (bufpos != 0)
1382 break;
1383 /* Buffer is filled up, process it */
1384 sha3_process_block72(ctx->state);
1385 /*bufpos = 0; - already is */
1387 ctx->bytes_queued = bufpos + SHA3_IBLK_BYTES;
1388 #else
1389 /* +50 bytes code size, but a bit faster because of long-sized XORs */
1390 const uint8_t *data = buffer;
1391 unsigned bufpos = ctx->bytes_queued;
1393 /* If already data in queue, continue queuing first */
1394 while (len != 0 && bufpos != 0) {
1395 uint8_t *buf = (uint8_t*)ctx->state;
1396 buf[bufpos] ^= *data++;
1397 len--;
1398 bufpos++;
1399 if (bufpos == SHA3_IBLK_BYTES) {
1400 bufpos = 0;
1401 goto do_block;
1405 /* Absorb complete blocks */
1406 while (len >= SHA3_IBLK_BYTES) {
1407 /* XOR data onto beginning of state[].
1408 * We try to be efficient - operate one word at a time, not byte.
1409 * Careful wrt unaligned access: can't just use "*(long*)data"!
1411 unsigned count = SHA3_IBLK_BYTES / sizeof(long);
1412 long *buf = (long*)ctx->state;
1413 do {
1414 long v;
1415 move_from_unaligned_long(v, (long*)data);
1416 *buf++ ^= v;
1417 data += sizeof(long);
1418 } while (--count);
1419 len -= SHA3_IBLK_BYTES;
1420 do_block:
1421 sha3_process_block72(ctx->state);
1424 /* Queue remaining data bytes */
1425 while (len != 0) {
1426 uint8_t *buf = (uint8_t*)ctx->state;
1427 buf[bufpos] ^= *data++;
1428 bufpos++;
1429 len--;
1432 ctx->bytes_queued = bufpos;
1433 #endif
1436 void FAST_FUNC sha3_end(sha3_ctx_t *ctx, void *resbuf)
1438 /* Padding */
1439 uint8_t *buf = (uint8_t*)ctx->state;
1440 buf[ctx->bytes_queued] ^= 1;
1441 buf[SHA3_IBLK_BYTES - 1] ^= 0x80;
1443 sha3_process_block72(ctx->state);
1445 /* Output */
1446 memcpy(resbuf, ctx->state, 64);