release/src/router/busybox/libbb/hash_md5_sha.c

   1 /* vi: set sw=4 ts=4: */
   2 /*
   3  * Utility routines.
   4  *
   5  * Copyright (C) 2010 Denys Vlasenko
   6  *
   7  * Licensed under GPLv2 or later, see file LICENSE in this source tree.
   8  */
   9
  10 #include "libbb.h"
  11
  12 /* gcc 4.2.1 optimizes rotr64 better with inline than with macro
  13  * (for rotX32, there is no difference). Why? My guess is that
  14  * macro requires clever common subexpression elimination heuristics
  15  * in gcc, while inline basically forces it to happen.
  16  */
  17 //#define rotl32(x,n) (((x) << (n)) | ((x) >> (32 - (n))))
  18 static ALWAYS_INLINE uint32_t rotl32(uint32_t x, unsigned n)
  19 {
  20         return (x << n) | (x >> (32 - n));
  21 }
  22 //#define rotr32(x,n) (((x) >> (n)) | ((x) << (32 - (n))))
  23 static ALWAYS_INLINE uint32_t rotr32(uint32_t x, unsigned n)
  24 {
  25         return (x >> n) | (x << (32 - n));
  26 }
  27 /* rotr64 in needed for sha512 only: */
  28 //#define rotr64(x,n) (((x) >> (n)) | ((x) << (64 - (n))))
  29 static ALWAYS_INLINE uint64_t rotr64(uint64_t x, unsigned n)
  30 {
  31         return (x >> n) | (x << (64 - n));
  32 }
  33
  34
  35 /* Feed data through a temporary buffer.
  36  * The internal buffer remembers previous data until it has 64
  37  * bytes worth to pass on.
  38  */
  39 static void FAST_FUNC common64_hash(md5_ctx_t *ctx, const void *buffer, size_t len)
  40 {
  41         unsigned bufpos = ctx->total64 & 63;
  42
  43         ctx->total64 += len;
  44
  45         while (1) {
  46                 unsigned remaining = 64 - bufpos;
  47                 if (remaining > len)
  48                         remaining = len;
  49                 /* Copy data into aligned buffer */
  50                 memcpy(ctx->wbuffer + bufpos, buffer, remaining);
  51                 len -= remaining;
  52                 buffer = (const char *)buffer + remaining;
  53                 bufpos += remaining;
  54                 /* clever way to do "if (bufpos != 64) break; ... ; bufpos = 0;" */
  55                 bufpos -= 64;
  56                 if (bufpos != 0)
  57                         break;
  58                 /* Buffer is filled up, process it */
  59                 ctx->process_block(ctx);
  60                 /*bufpos = 0; - already is */
  61         }
  62 }
  63
  64 /* Process the remaining bytes in the buffer */
  65 static void FAST_FUNC common64_end(md5_ctx_t *ctx, int swap_needed)
  66 {
  67         unsigned bufpos = ctx->total64 & 63;
  68         /* Pad the buffer to the next 64-byte boundary with 0x80,0,0,0... */
  69         ctx->wbuffer[bufpos++] = 0x80;
  70
  71         /* This loop iterates either once or twice, no more, no less */
  72         while (1) {
  73                 unsigned remaining = 64 - bufpos;
  74                 memset(ctx->wbuffer + bufpos, 0, remaining);
  75                 /* Do we have enough space for the length count? */
  76                 if (remaining >= 8) {
  77                         /* Store the 64-bit counter of bits in the buffer */
  78                         uint64_t t = ctx->total64 << 3;
  79                         if (swap_needed)
  80                                 t = bb_bswap_64(t);
  81                         /* wbuffer is suitably aligned for this */
  82                         *(uint64_t *) (&ctx->wbuffer[64 - 8]) = t;
  83                 }
  84                 ctx->process_block(ctx);
  85                 if (remaining >= 8)
  86                         break;
  87                 bufpos = 0;
  88         }
  89 }
  90
  91
  92 /*
  93  * Compute MD5 checksum of strings according to the
  94  * definition of MD5 in RFC 1321 from April 1992.
  95  *
  96  * Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
  97  *
  98  * Copyright (C) 1995-1999 Free Software Foundation, Inc.
  99  * Copyright (C) 2001 Manuel Novoa III
 100  * Copyright (C) 2003 Glenn L. McGrath
 101  * Copyright (C) 2003 Erik Andersen
 102  *
 103  * Licensed under GPLv2 or later, see file LICENSE in this source tree.
 104  */
 105
 106 /* 0: fastest, 3: smallest */
 107 #if CONFIG_MD5_SIZE_VS_SPEED < 0
 108 # define MD5_SIZE_VS_SPEED 0
 109 #elif CONFIG_MD5_SIZE_VS_SPEED > 3
 110 # define MD5_SIZE_VS_SPEED 3
 111 #else
 112 # define MD5_SIZE_VS_SPEED CONFIG_MD5_SIZE_VS_SPEED
 113 #endif
 114
 115 /* These are the four functions used in the four steps of the MD5 algorithm
 116  * and defined in the RFC 1321.  The first function is a little bit optimized
 117  * (as found in Colin Plumbs public domain implementation).
 118  * #define FF(b, c, d) ((b & c) | (~b & d))
 119  */
 120 #undef FF
 121 #undef FG
 122 #undef FH
 123 #undef FI
 124 #define FF(b, c, d) (d ^ (b & (c ^ d)))
 125 #define FG(b, c, d) FF(d, b, c)
 126 #define FH(b, c, d) (b ^ c ^ d)
 127 #define FI(b, c, d) (c ^ (b | ~d))
 128
 129 /* Hash a single block, 64 bytes long and 4-byte aligned */
 130 static void FAST_FUNC md5_process_block64(md5_ctx_t *ctx)
 131 {
 132 #if MD5_SIZE_VS_SPEED > 0
 133         /* Before we start, one word to the strange constants.
 134            They are defined in RFC 1321 as
 135            T[i] = (int)(4294967296.0 * fabs(sin(i))), i=1..64
 136          */
 137         static const uint32_t C_array[] = {
 138                 /* round 1 */
 139                 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
 140                 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
 141                 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
 142                 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
 143                 /* round 2 */
 144                 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
 145                 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
 146                 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
 147                 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
 148                 /* round 3 */
 149                 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
 150                 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
 151                 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x4881d05,
 152                 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
 153                 /* round 4 */
 154                 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
 155                 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
 156                 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
 157                 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391
 158         };
 159         static const char P_array[] ALIGN1 = {
 160 # if MD5_SIZE_VS_SPEED > 1
 161                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, /* 1 */
 162 # endif
 163                 1, 6, 11, 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, /* 2 */
 164                 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, /* 3 */
 165                 0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9  /* 4 */
 166         };
 167 #endif
 168         uint32_t *words = (void*) ctx->wbuffer;
 169         uint32_t A = ctx->hash[0];
 170         uint32_t B = ctx->hash[1];
 171         uint32_t C = ctx->hash[2];
 172         uint32_t D = ctx->hash[3];
 173
 174 #if MD5_SIZE_VS_SPEED >= 2  /* 2 or 3 */
 175
 176         static const char S_array[] ALIGN1 = {
 177                 7, 12, 17, 22,
 178                 5, 9, 14, 20,
 179                 4, 11, 16, 23,
 180                 6, 10, 15, 21
 181         };
 182         const uint32_t *pc;
 183         const char *pp;
 184         const char *ps;
 185         int i;
 186         uint32_t temp;
 187
 188 # if BB_BIG_ENDIAN
 189         for (i = 0; i < 16; i++)
 190                 words[i] = SWAP_LE32(words[i]);
 191 # endif
 192
 193 # if MD5_SIZE_VS_SPEED == 3
 194         pc = C_array;
 195         pp = P_array;
 196         ps = S_array - 4;
 197
 198         for (i = 0; i < 64; i++) {
 199                 if ((i & 0x0f) == 0)
 200                         ps += 4;
 201                 temp = A;
 202                 switch (i >> 4) {
 203                 case 0:
 204                         temp += FF(B, C, D);
 205                         break;
 206                 case 1:
 207                         temp += FG(B, C, D);
 208                         break;
 209                 case 2:
 210                         temp += FH(B, C, D);
 211                         break;
 212                 case 3:
 213                         temp += FI(B, C, D);
 214                 }
 215                 temp += words[(int) (*pp++)] + *pc++;
 216                 temp = rotl32(temp, ps[i & 3]);
 217                 temp += B;
 218                 A = D;
 219                 D = C;
 220                 C = B;
 221                 B = temp;
 222         }
 223 # else  /* MD5_SIZE_VS_SPEED == 2 */
 224         pc = C_array;
 225         pp = P_array;
 226         ps = S_array;
 227
 228         for (i = 0; i < 16; i++) {
 229                 temp = A + FF(B, C, D) + words[(int) (*pp++)] + *pc++;
 230                 temp = rotl32(temp, ps[i & 3]);
 231                 temp += B;
 232                 A = D;
 233                 D = C;
 234                 C = B;
 235                 B = temp;
 236         }
 237         ps += 4;
 238         for (i = 0; i < 16; i++) {
 239                 temp = A + FG(B, C, D) + words[(int) (*pp++)] + *pc++;
 240                 temp = rotl32(temp, ps[i & 3]);
 241                 temp += B;
 242                 A = D;
 243                 D = C;
 244                 C = B;
 245                 B = temp;
 246         }
 247         ps += 4;
 248         for (i = 0; i < 16; i++) {
 249                 temp = A + FH(B, C, D) + words[(int) (*pp++)] + *pc++;
 250                 temp = rotl32(temp, ps[i & 3]);
 251                 temp += B;
 252                 A = D;
 253                 D = C;
 254                 C = B;
 255                 B = temp;
 256         }
 257         ps += 4;
 258         for (i = 0; i < 16; i++) {
 259                 temp = A + FI(B, C, D) + words[(int) (*pp++)] + *pc++;
 260                 temp = rotl32(temp, ps[i & 3]);
 261                 temp += B;
 262                 A = D;
 263                 D = C;
 264                 C = B;
 265                 B = temp;
 266         }
 267 # endif
 268         /* Add checksum to the starting values */
 269         ctx->hash[0] += A;
 270         ctx->hash[1] += B;
 271         ctx->hash[2] += C;
 272         ctx->hash[3] += D;
 273
 274 #else  /* MD5_SIZE_VS_SPEED == 0 or 1 */
 275
 276         uint32_t A_save = A;
 277         uint32_t B_save = B;
 278         uint32_t C_save = C;
 279         uint32_t D_save = D;
 280 # if MD5_SIZE_VS_SPEED == 1
 281         const uint32_t *pc;
 282         const char *pp;
 283         int i;
 284 # endif
 285
 286         /* First round: using the given function, the context and a constant
 287            the next context is computed.  Because the algorithm's processing
 288            unit is a 32-bit word and it is determined to work on words in
 289            little endian byte order we perhaps have to change the byte order
 290            before the computation.  To reduce the work for the next steps
 291            we save swapped words in WORDS array.  */
 292 # undef OP
 293 # define OP(a, b, c, d, s, T) \
 294         do { \
 295                 a += FF(b, c, d) + (*words IF_BIG_ENDIAN(= SWAP_LE32(*words))) + T; \
 296                 words++; \
 297                 a = rotl32(a, s); \
 298                 a += b; \
 299         } while (0)
 300
 301         /* Round 1 */
 302 # if MD5_SIZE_VS_SPEED == 1
 303         pc = C_array;
 304         for (i = 0; i < 4; i++) {
 305                 OP(A, B, C, D, 7, *pc++);
 306                 OP(D, A, B, C, 12, *pc++);
 307                 OP(C, D, A, B, 17, *pc++);
 308                 OP(B, C, D, A, 22, *pc++);
 309         }
 310 # else
 311         OP(A, B, C, D, 7, 0xd76aa478);
 312         OP(D, A, B, C, 12, 0xe8c7b756);
 313         OP(C, D, A, B, 17, 0x242070db);
 314         OP(B, C, D, A, 22, 0xc1bdceee);
 315         OP(A, B, C, D, 7, 0xf57c0faf);
 316         OP(D, A, B, C, 12, 0x4787c62a);
 317         OP(C, D, A, B, 17, 0xa8304613);
 318         OP(B, C, D, A, 22, 0xfd469501);
 319         OP(A, B, C, D, 7, 0x698098d8);
 320         OP(D, A, B, C, 12, 0x8b44f7af);
 321         OP(C, D, A, B, 17, 0xffff5bb1);
 322         OP(B, C, D, A, 22, 0x895cd7be);
 323         OP(A, B, C, D, 7, 0x6b901122);
 324         OP(D, A, B, C, 12, 0xfd987193);
 325         OP(C, D, A, B, 17, 0xa679438e);
 326         OP(B, C, D, A, 22, 0x49b40821);
 327 # endif
 328         words -= 16;
 329
 330         /* For the second to fourth round we have the possibly swapped words
 331            in WORDS.  Redefine the macro to take an additional first
 332            argument specifying the function to use.  */
 333 # undef OP
 334 # define OP(f, a, b, c, d, k, s, T) \
 335         do { \
 336                 a += f(b, c, d) + words[k] + T; \
 337                 a = rotl32(a, s); \
 338                 a += b; \
 339         } while (0)
 340
 341         /* Round 2 */
 342 # if MD5_SIZE_VS_SPEED == 1
 343         pp = P_array;
 344         for (i = 0; i < 4; i++) {
 345                 OP(FG, A, B, C, D, (int) (*pp++), 5, *pc++);
 346                 OP(FG, D, A, B, C, (int) (*pp++), 9, *pc++);
 347                 OP(FG, C, D, A, B, (int) (*pp++), 14, *pc++);
 348                 OP(FG, B, C, D, A, (int) (*pp++), 20, *pc++);
 349         }
 350 # else
 351         OP(FG, A, B, C, D, 1, 5, 0xf61e2562);
 352         OP(FG, D, A, B, C, 6, 9, 0xc040b340);
 353         OP(FG, C, D, A, B, 11, 14, 0x265e5a51);
 354         OP(FG, B, C, D, A, 0, 20, 0xe9b6c7aa);
 355         OP(FG, A, B, C, D, 5, 5, 0xd62f105d);
 356         OP(FG, D, A, B, C, 10, 9, 0x02441453);
 357         OP(FG, C, D, A, B, 15, 14, 0xd8a1e681);
 358         OP(FG, B, C, D, A, 4, 20, 0xe7d3fbc8);
 359         OP(FG, A, B, C, D, 9, 5, 0x21e1cde6);
 360         OP(FG, D, A, B, C, 14, 9, 0xc33707d6);
 361         OP(FG, C, D, A, B, 3, 14, 0xf4d50d87);
 362         OP(FG, B, C, D, A, 8, 20, 0x455a14ed);
 363         OP(FG, A, B, C, D, 13, 5, 0xa9e3e905);
 364         OP(FG, D, A, B, C, 2, 9, 0xfcefa3f8);
 365         OP(FG, C, D, A, B, 7, 14, 0x676f02d9);
 366         OP(FG, B, C, D, A, 12, 20, 0x8d2a4c8a);
 367 # endif
 368
 369         /* Round 3 */
 370 # if MD5_SIZE_VS_SPEED == 1
 371         for (i = 0; i < 4; i++) {
 372                 OP(FH, A, B, C, D, (int) (*pp++), 4, *pc++);
 373                 OP(FH, D, A, B, C, (int) (*pp++), 11, *pc++);
 374                 OP(FH, C, D, A, B, (int) (*pp++), 16, *pc++);
 375                 OP(FH, B, C, D, A, (int) (*pp++), 23, *pc++);
 376         }
 377 # else
 378         OP(FH, A, B, C, D, 5, 4, 0xfffa3942);
 379         OP(FH, D, A, B, C, 8, 11, 0x8771f681);
 380         OP(FH, C, D, A, B, 11, 16, 0x6d9d6122);
 381         OP(FH, B, C, D, A, 14, 23, 0xfde5380c);
 382         OP(FH, A, B, C, D, 1, 4, 0xa4beea44);
 383         OP(FH, D, A, B, C, 4, 11, 0x4bdecfa9);
 384         OP(FH, C, D, A, B, 7, 16, 0xf6bb4b60);
 385         OP(FH, B, C, D, A, 10, 23, 0xbebfbc70);
 386         OP(FH, A, B, C, D, 13, 4, 0x289b7ec6);
 387         OP(FH, D, A, B, C, 0, 11, 0xeaa127fa);
 388         OP(FH, C, D, A, B, 3, 16, 0xd4ef3085);
 389         OP(FH, B, C, D, A, 6, 23, 0x04881d05);
 390         OP(FH, A, B, C, D, 9, 4, 0xd9d4d039);
 391         OP(FH, D, A, B, C, 12, 11, 0xe6db99e5);
 392         OP(FH, C, D, A, B, 15, 16, 0x1fa27cf8);
 393         OP(FH, B, C, D, A, 2, 23, 0xc4ac5665);
 394 # endif
 395
 396         /* Round 4 */
 397 # if MD5_SIZE_VS_SPEED == 1
 398         for (i = 0; i < 4; i++) {
 399                 OP(FI, A, B, C, D, (int) (*pp++), 6, *pc++);
 400                 OP(FI, D, A, B, C, (int) (*pp++), 10, *pc++);
 401                 OP(FI, C, D, A, B, (int) (*pp++), 15, *pc++);
 402                 OP(FI, B, C, D, A, (int) (*pp++), 21, *pc++);
 403         }
 404 # else
 405         OP(FI, A, B, C, D, 0, 6, 0xf4292244);
 406         OP(FI, D, A, B, C, 7, 10, 0x432aff97);
 407         OP(FI, C, D, A, B, 14, 15, 0xab9423a7);
 408         OP(FI, B, C, D, A, 5, 21, 0xfc93a039);
 409         OP(FI, A, B, C, D, 12, 6, 0x655b59c3);
 410         OP(FI, D, A, B, C, 3, 10, 0x8f0ccc92);
 411         OP(FI, C, D, A, B, 10, 15, 0xffeff47d);
 412         OP(FI, B, C, D, A, 1, 21, 0x85845dd1);
 413         OP(FI, A, B, C, D, 8, 6, 0x6fa87e4f);
 414         OP(FI, D, A, B, C, 15, 10, 0xfe2ce6e0);
 415         OP(FI, C, D, A, B, 6, 15, 0xa3014314);
 416         OP(FI, B, C, D, A, 13, 21, 0x4e0811a1);
 417         OP(FI, A, B, C, D, 4, 6, 0xf7537e82);
 418         OP(FI, D, A, B, C, 11, 10, 0xbd3af235);
 419         OP(FI, C, D, A, B, 2, 15, 0x2ad7d2bb);
 420         OP(FI, B, C, D, A, 9, 21, 0xeb86d391);
 421 # undef OP
 422 # endif
 423         /* Add checksum to the starting values */
 424         ctx->hash[0] = A_save + A;
 425         ctx->hash[1] = B_save + B;
 426         ctx->hash[2] = C_save + C;
 427         ctx->hash[3] = D_save + D;
 428 #endif
 429 }
 430 #undef FF
 431 #undef FG
 432 #undef FH
 433 #undef FI
 434
 435 /* Initialize structure containing state of computation.
 436  * (RFC 1321, 3.3: Step 3)
 437  */
 438 void FAST_FUNC md5_begin(md5_ctx_t *ctx)
 439 {
 440         ctx->hash[0] = 0x67452301;
 441         ctx->hash[1] = 0xefcdab89;
 442         ctx->hash[2] = 0x98badcfe;
 443         ctx->hash[3] = 0x10325476;
 444         ctx->total64 = 0;
 445         ctx->process_block = md5_process_block64;
 446 }
 447
 448 /* Used also for sha1 and sha256 */
 449 void FAST_FUNC md5_hash(md5_ctx_t *ctx, const void *buffer, size_t len)
 450 {
 451         common64_hash(ctx, buffer, len);
 452 }
 453
 454 /* Process the remaining bytes in the buffer and put result from CTX
 455  * in first 16 bytes following RESBUF.  The result is always in little
 456  * endian byte order, so that a byte-wise output yields to the wanted
 457  * ASCII representation of the message digest.
 458  */
 459 void FAST_FUNC md5_end(md5_ctx_t *ctx, void *resbuf)
 460 {
 461         /* MD5 stores total in LE, need to swap on BE arches: */
 462         common64_end(ctx, /*swap_needed:*/ BB_BIG_ENDIAN);
 463
 464         /* The MD5 result is in little endian byte order */
 465 #if BB_BIG_ENDIAN
 466         ctx->hash[0] = SWAP_LE32(ctx->hash[0]);
 467         ctx->hash[1] = SWAP_LE32(ctx->hash[1]);
 468         ctx->hash[2] = SWAP_LE32(ctx->hash[2]);
 469         ctx->hash[3] = SWAP_LE32(ctx->hash[3]);
 470 #endif
 471         memcpy(resbuf, ctx->hash, sizeof(ctx->hash[0]) * 4);
 472 }
 473
 474
 475 /*
 476  * SHA1 part is:
 477  * Copyright 2007 Rob Landley <rob@landley.net>
 478  *
 479  * Based on the public domain SHA-1 in C by Steve Reid <steve@edmweb.com>
 480  * from http://www.mirrors.wiretapped.net/security/cryptography/hashes/sha1/
 481  *
 482  * Licensed under GPLv2, see file LICENSE in this source tree.
 483  *
 484  * ---------------------------------------------------------------------------
 485  *
 486  * SHA256 and SHA512 parts are:
 487  * Released into the Public Domain by Ulrich Drepper <drepper@redhat.com>.
 488  * Shrank by Denys Vlasenko.
 489  *
 490  * ---------------------------------------------------------------------------
 491  *
 492  * The best way to test random blocksizes is to go to coreutils/md5_sha1_sum.c
 493  * and replace "4096" with something like "2000 + time(NULL) % 2097",
 494  * then rebuild and compare "shaNNNsum bigfile" results.
 495  */
 496
 497 static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx)
 498 {
 499         static const uint32_t rconsts[] = {
 500                 0x5A827999, 0x6ED9EBA1, 0x8F1BBCDC, 0xCA62C1D6
 501         };
 502         int i, j;
 503         int cnt;
 504         uint32_t W[16+16];
 505         uint32_t a, b, c, d, e;
 506
 507         /* On-stack work buffer frees up one register in the main loop
 508          * which otherwise will be needed to hold ctx pointer */
 509         for (i = 0; i < 16; i++)
 510                 W[i] = W[i+16] = SWAP_BE32(((uint32_t*)ctx->wbuffer)[i]);
 511
 512         a = ctx->hash[0];
 513         b = ctx->hash[1];
 514         c = ctx->hash[2];
 515         d = ctx->hash[3];
 516         e = ctx->hash[4];
 517
 518         /* 4 rounds of 20 operations each */
 519         cnt = 0;
 520         for (i = 0; i < 4; i++) {
 521                 j = 19;
 522                 do {
 523                         uint32_t work;
 524
 525                         work = c ^ d;
 526                         if (i == 0) {
 527                                 work = (work & b) ^ d;
 528                                 if (j <= 3)
 529                                         goto ge16;
 530                                 /* Used to do SWAP_BE32 here, but this
 531                                  * requires ctx (see comment above) */
 532                                 work += W[cnt];
 533                         } else {
 534                                 if (i == 2)
 535                                         work = ((b | c) & d) | (b & c);
 536                                 else /* i = 1 or 3 */
 537                                         work ^= b;
 538  ge16:
 539                                 W[cnt] = W[cnt+16] = rotl32(W[cnt+13] ^ W[cnt+8] ^ W[cnt+2] ^ W[cnt], 1);
 540                                 work += W[cnt];
 541                         }
 542                         work += e + rotl32(a, 5) + rconsts[i];
 543
 544                         /* Rotate by one for next time */
 545                         e = d;
 546                         d = c;
 547                         c = /* b = */ rotl32(b, 30);
 548                         b = a;
 549                         a = work;
 550                         cnt = (cnt + 1) & 15;
 551                 } while (--j >= 0);
 552         }
 553
 554         ctx->hash[0] += a;
 555         ctx->hash[1] += b;
 556         ctx->hash[2] += c;
 557         ctx->hash[3] += d;
 558         ctx->hash[4] += e;
 559 }
 560
 561 /* Constants for SHA512 from FIPS 180-2:4.2.3.
 562  * SHA256 constants from FIPS 180-2:4.2.2
 563  * are the most significant half of first 64 elements
 564  * of the same array.
 565  */
 566 static const uint64_t sha_K[80] = {
 567         0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL,
 568         0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL,
 569         0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
 570         0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL,
 571         0xd807aa98a3030242ULL, 0x12835b0145706fbeULL,
 572         0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,
 573         0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL,
 574         0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL,
 575         0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL,
 576         0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL,
 577         0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL,
 578         0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,
 579         0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL,
 580         0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL,
 581         0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL,
 582         0x06ca6351e003826fULL, 0x142929670a0e6e70ULL,
 583         0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL,
 584         0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,
 585         0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL,
 586         0x81c2c92e47edaee6ULL, 0x92722c851482353bULL,
 587         0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL,
 588         0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL,
 589         0xd192e819d6ef5218ULL, 0xd69906245565a910ULL,
 590         0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,
 591         0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL,
 592         0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL,
 593         0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL,
 594         0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL,
 595         0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL,
 596         0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,
 597         0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL,
 598         0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL,
 599         0xca273eceea26619cULL, 0xd186b8c721c0c207ULL, /* [64]+ are used for sha512 only */
 600         0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL,
 601         0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL,
 602         0x113f9804bef90daeULL, 0x1b710b35131c471bULL,
 603         0x28db77f523047d84ULL, 0x32caab7b40c72493ULL,
 604         0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL,
 605         0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,
 606         0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL
 607 };
 608
 609 #undef Ch
 610 #undef Maj
 611 #undef S0
 612 #undef S1
 613 #undef R0
 614 #undef R1
 615
 616 static void FAST_FUNC sha256_process_block64(sha256_ctx_t *ctx)
 617 {
 618         unsigned t;
 619         uint32_t W[64], a, b, c, d, e, f, g, h;
 620         const uint32_t *words = (uint32_t*) ctx->wbuffer;
 621
 622         /* Operators defined in FIPS 180-2:4.1.2.  */
 623 #define Ch(x, y, z) ((x & y) ^ (~x & z))
 624 #define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
 625 #define S0(x) (rotr32(x, 2) ^ rotr32(x, 13) ^ rotr32(x, 22))
 626 #define S1(x) (rotr32(x, 6) ^ rotr32(x, 11) ^ rotr32(x, 25))
 627 #define R0(x) (rotr32(x, 7) ^ rotr32(x, 18) ^ (x >> 3))
 628 #define R1(x) (rotr32(x, 17) ^ rotr32(x, 19) ^ (x >> 10))
 629
 630         /* Compute the message schedule according to FIPS 180-2:6.2.2 step 2.  */
 631         for (t = 0; t < 16; ++t)
 632                 W[t] = SWAP_BE32(words[t]);
 633         for (/*t = 16*/; t < 64; ++t)
 634                 W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16];
 635
 636         a = ctx->hash[0];
 637         b = ctx->hash[1];
 638         c = ctx->hash[2];
 639         d = ctx->hash[3];
 640         e = ctx->hash[4];
 641         f = ctx->hash[5];
 642         g = ctx->hash[6];
 643         h = ctx->hash[7];
 644
 645         /* The actual computation according to FIPS 180-2:6.2.2 step 3.  */
 646         for (t = 0; t < 64; ++t) {
 647                 /* Need to fetch upper half of sha_K[t]
 648                  * (I hope compiler is clever enough to just fetch
 649                  * upper half)
 650                  */
 651                 uint32_t K_t = sha_K[t] >> 32;
 652                 uint32_t T1 = h + S1(e) + Ch(e, f, g) + K_t + W[t];
 653                 uint32_t T2 = S0(a) + Maj(a, b, c);
 654                 h = g;
 655                 g = f;
 656                 f = e;
 657                 e = d + T1;
 658                 d = c;
 659                 c = b;
 660                 b = a;
 661                 a = T1 + T2;
 662         }
 663 #undef Ch
 664 #undef Maj
 665 #undef S0
 666 #undef S1
 667 #undef R0
 668 #undef R1
 669         /* Add the starting values of the context according to FIPS 180-2:6.2.2
 670            step 4.  */
 671         ctx->hash[0] += a;
 672         ctx->hash[1] += b;
 673         ctx->hash[2] += c;
 674         ctx->hash[3] += d;
 675         ctx->hash[4] += e;
 676         ctx->hash[5] += f;
 677         ctx->hash[6] += g;
 678         ctx->hash[7] += h;
 679 }
 680
 681 static void FAST_FUNC sha512_process_block128(sha512_ctx_t *ctx)
 682 {
 683         unsigned t;
 684         uint64_t W[80];
 685         /* On i386, having assignments here (not later as sha256 does)
 686          * produces 99 bytes smaller code with gcc 4.3.1
 687          */
 688         uint64_t a = ctx->hash[0];
 689         uint64_t b = ctx->hash[1];
 690         uint64_t c = ctx->hash[2];
 691         uint64_t d = ctx->hash[3];
 692         uint64_t e = ctx->hash[4];
 693         uint64_t f = ctx->hash[5];
 694         uint64_t g = ctx->hash[6];
 695         uint64_t h = ctx->hash[7];
 696         const uint64_t *words = (uint64_t*) ctx->wbuffer;
 697
 698         /* Operators defined in FIPS 180-2:4.1.2.  */
 699 #define Ch(x, y, z) ((x & y) ^ (~x & z))
 700 #define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
 701 #define S0(x) (rotr64(x, 28) ^ rotr64(x, 34) ^ rotr64(x, 39))
 702 #define S1(x) (rotr64(x, 14) ^ rotr64(x, 18) ^ rotr64(x, 41))
 703 #define R0(x) (rotr64(x, 1) ^ rotr64(x, 8) ^ (x >> 7))
 704 #define R1(x) (rotr64(x, 19) ^ rotr64(x, 61) ^ (x >> 6))
 705
 706         /* Compute the message schedule according to FIPS 180-2:6.3.2 step 2.  */
 707         for (t = 0; t < 16; ++t)
 708                 W[t] = SWAP_BE64(words[t]);
 709         for (/*t = 16*/; t < 80; ++t)
 710                 W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16];
 711
 712         /* The actual computation according to FIPS 180-2:6.3.2 step 3.  */
 713         for (t = 0; t < 80; ++t) {
 714                 uint64_t T1 = h + S1(e) + Ch(e, f, g) + sha_K[t] + W[t];
 715                 uint64_t T2 = S0(a) + Maj(a, b, c);
 716                 h = g;
 717                 g = f;
 718                 f = e;
 719                 e = d + T1;
 720                 d = c;
 721                 c = b;
 722                 b = a;
 723                 a = T1 + T2;
 724         }
 725 #undef Ch
 726 #undef Maj
 727 #undef S0
 728 #undef S1
 729 #undef R0
 730 #undef R1
 731         /* Add the starting values of the context according to FIPS 180-2:6.3.2
 732            step 4.  */
 733         ctx->hash[0] += a;
 734         ctx->hash[1] += b;
 735         ctx->hash[2] += c;
 736         ctx->hash[3] += d;
 737         ctx->hash[4] += e;
 738         ctx->hash[5] += f;
 739         ctx->hash[6] += g;
 740         ctx->hash[7] += h;
 741 }
 742
 743
 744 void FAST_FUNC sha1_begin(sha1_ctx_t *ctx)
 745 {
 746         ctx->hash[0] = 0x67452301;
 747         ctx->hash[1] = 0xefcdab89;
 748         ctx->hash[2] = 0x98badcfe;
 749         ctx->hash[3] = 0x10325476;
 750         ctx->hash[4] = 0xc3d2e1f0;
 751         ctx->total64 = 0;
 752         ctx->process_block = sha1_process_block64;
 753 }
 754
 755 static const uint32_t init256[] = {
 756         0,
 757         0,
 758         0x6a09e667,
 759         0xbb67ae85,
 760         0x3c6ef372,
 761         0xa54ff53a,
 762         0x510e527f,
 763         0x9b05688c,
 764         0x1f83d9ab,
 765         0x5be0cd19,
 766 };
 767 static const uint32_t init512_lo[] = {
 768         0,
 769         0,
 770         0xf3bcc908,
 771         0x84caa73b,
 772         0xfe94f82b,
 773         0x5f1d36f1,
 774         0xade682d1,
 775         0x2b3e6c1f,
 776         0xfb41bd6b,
 777         0x137e2179,
 778 };
 779
 780 /* Initialize structure containing state of computation.
 781    (FIPS 180-2:5.3.2)  */
 782 void FAST_FUNC sha256_begin(sha256_ctx_t *ctx)
 783 {
 784         memcpy(&ctx->total64, init256, sizeof(init256));
 785         /*ctx->total64 = 0; - done by prepending two 32-bit zeros to init256 */
 786         ctx->process_block = sha256_process_block64;
 787 }
 788
 789 /* Initialize structure containing state of computation.
 790    (FIPS 180-2:5.3.3)  */
 791 void FAST_FUNC sha512_begin(sha512_ctx_t *ctx)
 792 {
 793         int i;
 794         /* Two extra iterations zero out ctx->total64[2] */
 795         uint64_t *tp = ctx->total64;
 796         for (i = 0; i < 2+8; i++)
 797                 tp[i] = ((uint64_t)(init256[i]) << 32) + init512_lo[i];
 798         /*ctx->total64[0] = ctx->total64[1] = 0; - already done */
 799 }
 800
 801 void FAST_FUNC sha512_hash(sha512_ctx_t *ctx, const void *buffer, size_t len)
 802 {
 803         unsigned bufpos = ctx->total64[0] & 127;
 804         unsigned remaining;
 805
 806         /* First increment the byte count.  FIPS 180-2 specifies the possible
 807            length of the file up to 2^128 _bits_.
 808            We compute the number of _bytes_ and convert to bits later.  */
 809         ctx->total64[0] += len;
 810         if (ctx->total64[0] < len)
 811                 ctx->total64[1]++;
 812 #if 0
 813         remaining = 128 - bufpos;
 814
 815         /* Hash whole blocks */
 816         while (len >= remaining) {
 817                 memcpy(ctx->wbuffer + bufpos, buffer, remaining);
 818                 buffer = (const char *)buffer + remaining;
 819                 len -= remaining;
 820                 remaining = 128;
 821                 bufpos = 0;
 822                 sha512_process_block128(ctx);
 823         }
 824
 825         /* Save last, partial blosk */
 826         memcpy(ctx->wbuffer + bufpos, buffer, len);
 827 #else
 828         while (1) {
 829                 remaining = 128 - bufpos;
 830                 if (remaining > len)
 831                         remaining = len;
 832                 /* Copy data into aligned buffer */
 833                 memcpy(ctx->wbuffer + bufpos, buffer, remaining);
 834                 len -= remaining;
 835                 buffer = (const char *)buffer + remaining;
 836                 bufpos += remaining;
 837                 /* clever way to do "if (bufpos != 128) break; ... ; bufpos = 0;" */
 838                 bufpos -= 128;
 839                 if (bufpos != 0)
 840                         break;
 841                 /* Buffer is filled up, process it */
 842                 sha512_process_block128(ctx);
 843                 /*bufpos = 0; - already is */
 844         }
 845 #endif
 846 }
 847
 848 /* Used also for sha256 */
 849 void FAST_FUNC sha1_end(sha1_ctx_t *ctx, void *resbuf)
 850 {
 851         unsigned hash_size;
 852
 853         /* SHA stores total in BE, need to swap on LE arches: */
 854         common64_end(ctx, /*swap_needed:*/ BB_LITTLE_ENDIAN);
 855
 856         hash_size = (ctx->process_block == sha1_process_block64) ? 5 : 8;
 857         /* This way we do not impose alignment constraints on resbuf: */
 858         if (BB_LITTLE_ENDIAN) {
 859                 unsigned i;
 860                 for (i = 0; i < hash_size; ++i)
 861                         ctx->hash[i] = SWAP_BE32(ctx->hash[i]);
 862         }
 863         memcpy(resbuf, ctx->hash, sizeof(ctx->hash[0]) * hash_size);
 864 }
 865
 866 void FAST_FUNC sha512_end(sha512_ctx_t *ctx, void *resbuf)
 867 {
 868         unsigned bufpos = ctx->total64[0] & 127;
 869
 870         /* Pad the buffer to the next 128-byte boundary with 0x80,0,0,0... */
 871         ctx->wbuffer[bufpos++] = 0x80;
 872
 873         while (1) {
 874                 unsigned remaining = 128 - bufpos;
 875                 memset(ctx->wbuffer + bufpos, 0, remaining);
 876                 if (remaining >= 16) {
 877                         /* Store the 128-bit counter of bits in the buffer in BE format */
 878                         uint64_t t;
 879                         t = ctx->total64[0] << 3;
 880                         t = SWAP_BE64(t);
 881                         *(uint64_t *) (&ctx->wbuffer[128 - 8]) = t;
 882                         t = (ctx->total64[1] << 3) | (ctx->total64[0] >> 61);
 883                         t = SWAP_BE64(t);
 884                         *(uint64_t *) (&ctx->wbuffer[128 - 16]) = t;
 885                 }
 886                 sha512_process_block128(ctx);
 887                 if (remaining >= 16)
 888                         break;
 889                 bufpos = 0;
 890         }
 891
 892         if (BB_LITTLE_ENDIAN) {
 893                 unsigned i;
 894                 for (i = 0; i < ARRAY_SIZE(ctx->hash); ++i)
 895                         ctx->hash[i] = SWAP_BE64(ctx->hash[i]);
 896         }
 897         memcpy(resbuf, ctx->hash, sizeof(ctx->hash));
 898 }