libvo/md5sum.c

   1 /* ========================================================================== **
   2  *
   3  *                                    MD5.c
   4  *
   5  * Copyright:
   6  *  Copyright (C) 2003, 2004 by Christopher R. Hertel
   7  *
   8  * Email: crh@ubiqx.mn.org
   9  *
  10  * $Id$
  11  *
  12  * -------------------------------------------------------------------------- **
  13  *
  14  * Description:
  15  *  Implements the MD5 hash algorithm, as described in RFC 1321.
  16  *
  17  * -------------------------------------------------------------------------- **
  18  *
  19  * License:
  20  *
  21  *  This library is free software; you can redistribute it and/or
  22  *  modify it under the terms of the GNU Lesser General Public
  23  *  License as published by the Free Software Foundation; either
  24  *  version 2.1 of the License, or (at your option) any later version.
  25  *
  26  *  This library is distributed in the hope that it will be useful,
  27  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  28  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  29  *  Lesser General Public License for more details.
  30  *
  31  *  You should have received a copy of the GNU Lesser General Public
  32  *  License along with this library; if not, write to the Free Software
  33  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  34  *
  35  * -------------------------------------------------------------------------- **
  36  *
  37  * Notes:
  38  *
  39  *  None of this will make any sense unless you're studying RFC 1321 as you
  40  *  read the code.
  41  *
  42  *  MD5 is described in RFC 1321.
  43  *  The MD*4* algorithm is described in RFC 1320 (that's 1321 - 1).
  44  *  MD5 is very similar to MD4, but not quite similar enough to justify
  45  *  putting the two into a single module.  Besides, I wanted to add a few
  46  *  extra functions to this one to expand its usability.
  47  *
  48  *  There are three primary motivations for this particular implementation.
  49  *  1) Programmer's pride.  I wanted to be able to say I'd done it, and I
  50  *     wanted to learn from the experience.
  51  *  2) Portability.  I wanted an implementation that I knew to be portable
  52  *     to a reasonable number platforms.  In particular, the algorithm is
  53  *     designed with little-endian platforms in mind, but I wanted an
  54  *     endian-agnostic implementation.
  55  *  3) Compactness.  While not an overriding goal, I thought it worth-while
  56  *     to see if I could reduce the overall size of the result.  This is in
  57  *     keeping with my hopes that this library will be suitable for use in
  58  *     some embedded environments.
  59  *  Beyond that, cleanliness and clarity are always worth pursuing.
  60  *
  61  *  As mentioned above, the code really only makes sense if you are familiar
  62  *  with the MD5 algorithm or are using RFC 1321 as a guide.  This code is
  63  *  quirky, however, so you'll want to be reading carefully.
  64  *
  65  *  Yeah...most of the comments are cut-and-paste from my MD4 implementation.
  66  *
  67  * -------------------------------------------------------------------------- **
  68  *
  69  * References:
  70  *  IETF RFC 1321: The MD5 Message-Digest Algorithm
  71  *       Ron Rivest. IETF, April, 1992
  72  *
  73  * ========================================================================== **
  74  */
  75
  76 /* #include "MD5.h"   Line of original code */
  77
  78 #include "md5sum.h"   /* Added this line */
  79
  80 /* -------------------------------------------------------------------------- **
  81  * Static Constants:
  82  *
  83  *  K[][] - In round one, the values of k (which are used to index
  84  *          particular four-byte sequences in the input) are simply
  85  *          sequential.  In later rounds, however, they are a bit more
  86  *          varied.  Rather than calculate the values of k (which may
  87  *          or may not be possible--I haven't though about it) the
  88  *          values are stored in this array.
  89  *
  90  *  S[][] - In each round there is a left rotate operation performed as
  91  *          part of the 16 permutations.  The number of bits varies in
  92  *          a repeating patter.  This array keeps track of the patterns
  93  *          used in each round.
  94  *
  95  *  T[][] - There are four rounds of 16 permutations for a total of 64.
  96  *          In each of these 64 permutation operations, a different
  97  *          constant value is added to the mix.  The constants are
  98  *          based on the sine function...read RFC 1321 for more detail.
  99  *          In any case, the correct constants are stored in the T[][]
 100  *          array.  They're divided up into four groups of 16.
 101  */
 102
 103 static const uint8_t K[3][16] =
 104   {
 105     /* Round 1: skipped (since it is simply sequential). */
 106     {  1,  6, 11,  0,  5, 10, 15,  4,  9, 14,  3,  8, 13,  2,  7, 12 }, /* R2 */
 107     {  5,  8, 11, 14,  1,  4,  7, 10, 13,  0,  3,  6,  9, 12, 15,  2 }, /* R3 */
 108     {  0,  7, 14,  5, 12,  3, 10,  1,  8, 15,  6, 13,  4, 11,  2,  9 }  /* R4 */
 109   };
 110
 111 static const uint8_t S[4][4] =
 112   {
 113     { 7, 12, 17, 22 },  /* Round 1 */
 114     { 5,  9, 14, 20 },  /* Round 2 */
 115     { 4, 11, 16, 23 },  /* Round 3 */
 116     { 6, 10, 15, 21 }   /* Round 4 */
 117   };
 118
 119
 120 static const uint32_t T[4][16] =
 121   {
 122     { 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,   /* Round 1 */
 123       0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
 124       0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
 125       0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821 },
 126
 127     { 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,   /* Round 2 */
 128       0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
 129       0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
 130       0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a },
 131
 132     { 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,   /* Round 3 */
 133       0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
 134       0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05,
 135       0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665 },
 136
 137     { 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,   /* Round 4 */
 138       0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
 139       0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
 140       0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391 },
 141   };
 142
 143
 144 /* -------------------------------------------------------------------------- **
 145  * Macros:
 146  *  md5F(), md5G(), md5H(), and md5I() are described in RFC 1321.
 147  *  All of these operations are bitwise, and so not impacted by endian-ness.
 148  *
 149  *  GetLongByte()
 150  *    Extract one byte from a (32-bit) longword.  A value of 0 for <idx>
 151  *    indicates the lowest order byte, while 3 indicates the highest order
 152  *    byte.
 153  *
 154  */
 155
 156 #define md5F( X, Y, Z ) ( ((X) & (Y)) | ((~(X)) & (Z)) )
 157 #define md5G( X, Y, Z ) ( ((X) & (Z)) | ((Y) & (~(Z))) )
 158 #define md5H( X, Y, Z ) ( (X) ^ (Y) ^ (Z) )
 159 #define md5I( X, Y, Z ) ( (Y) ^ ((X) | (~(Z))) )
 160
 161 #define GetLongByte( L, idx ) ((uchar)(( L >> (((idx) & 0x03) << 3) ) & 0xFF))
 162
 163
 164 /* -------------------------------------------------------------------------- **
 165  * Static Functions:
 166  */
 167
 168 static void Permute( uint32_t ABCD[4], const uchar block[64] )
 169   /* ------------------------------------------------------------------------ **
 170    * Permute the ABCD "registers" using the 64-byte <block> as a driver.
 171    *
 172    *  Input:  ABCD  - Pointer to an array of four unsigned longwords.
 173    *          block - An array of bytes, 64 bytes in size.
 174    *
 175    *  Output: none.
 176    *
 177    *  Notes:  The MD5 algorithm operates on a set of four longwords stored
 178    *          (conceptually) in four "registers".  It is easy to imagine a
 179    *          simple MD4/5 chip that would operate this way.  In any case,
 180    *          the mangling of the contents of those registers is driven by
 181    *          the input message.  The message is chopped and finally padded
 182    *          into 64-byte chunks and each chunk is used to manipulate the
 183    *          contents of the registers.
 184    *
 185    *          The MD5 Algorithm calls for padding the input to ensure that
 186    *          it is a multiple of 64 bytes in length.  The last 16 bytes
 187    *          of the padding space are used to store the message length
 188    *          (the length of the original message, before padding, expressed
 189    *          in terms of bits).  If there is not enough room for 16 bytes
 190    *          worth of bitcount (eg., if the original message was 122 bytes
 191    *          long) then the block is padded to the end with zeros and
 192    *          passed to this function.  Then *another* block is filled with
 193    *          zeros except for the last 16 bytes which contain the length.
 194    *
 195    *          Oh... and the algorithm requires that there be at least one
 196    *          padding byte.  The first padding byte has a value of 0x80,
 197    *          and any others are 0x00.
 198    *
 199    * ------------------------------------------------------------------------ **
 200    */
 201   {
 202   int      round;
 203   int      i, j;
 204   uint8_t  s;
 205   uint32_t a, b, c, d;
 206   uint32_t KeepABCD[4];
 207   uint32_t X[16];
 208
 209   /* Store the current ABCD values for later re-use.
 210    */
 211   for( i = 0; i < 4; i++ )
 212     KeepABCD[i] = ABCD[i];
 213
 214   /* Convert the input block into an array of unsigned longs, taking care
 215    * to read the block in Little Endian order (the algorithm assumes this).
 216    * The uint32_t values are then handled in host order.
 217    */
 218   for( i = 0, j = 0; i < 16; i++ )
 219     {
 220     X[i]  =  (uint32_t)block[j++];
 221     X[i] |= ((uint32_t)block[j++] << 8);
 222     X[i] |= ((uint32_t)block[j++] << 16);
 223     X[i] |= ((uint32_t)block[j++] << 24);
 224     }
 225
 226   /* This loop performs the four rounds of permutations.
 227    * The rounds are each very similar.  The differences are in three areas:
 228    *   - The function (F, G, H, or I) used to perform bitwise permutations
 229    *     on the registers,
 230    *   - The order in which values from X[] are chosen.
 231    *   - Changes to the number of bits by which the registers are rotated.
 232    * This implementation uses a switch statement to deal with some of the
 233    * differences between rounds.  Other differences are handled by storing
 234    * values in arrays and using the round number to select the correct set
 235    * of values.
 236    *
 237    * (My implementation appears to be a poor compromise between speed, size,
 238    * and clarity.  Ugh.  [crh])
 239    */
 240   for( round = 0; round < 4; round++ )
 241     {
 242     for( i = 0; i < 16; i++ )
 243       {
 244       j = (4 - (i % 4)) & 0x3;  /* <j> handles the rotation of ABCD.          */
 245       s = S[round][i%4];        /* <s> is the bit shift for this iteration.   */
 246
 247       b = ABCD[(j+1) & 0x3];    /* Copy the b,c,d values per ABCD rotation.   */
 248       c = ABCD[(j+2) & 0x3];    /* This isn't really necessary, it just looks */
 249       d = ABCD[(j+3) & 0x3];    /* clean & will hopefully be optimized away.  */
 250
 251       /* The actual perumation function.
 252        * This is broken out to minimize the code within the switch().
 253        */
 254       switch( round )
 255         {
 256         case 0:
 257           /* round 1 */
 258           a = md5F( b, c, d ) + X[i];
 259           break;
 260         case 1:
 261           /* round 2 */
 262           a = md5G( b, c, d ) + X[ K[0][i] ];
 263           break;
 264         case 2:
 265           /* round 3 */
 266           a = md5H( b, c, d ) + X[ K[1][i] ];
 267           break;
 268         default:
 269           /* round 4 */
 270           a = md5I( b, c, d ) + X[ K[2][i] ];
 271           break;
 272         }
 273       a = 0xFFFFFFFF & ( ABCD[j] + a + T[round][i] );
 274       ABCD[j] = b + (0xFFFFFFFF & (( a << s ) | ( a >> (32 - s) )));
 275       }
 276     }
 277
 278   /* Use the stored original A, B, C, D values to perform
 279    * one last convolution.
 280    */
 281   for( i = 0; i < 4; i++ )
 282     ABCD[i] = 0xFFFFFFFF & ( ABCD[i] + KeepABCD[i] );
 283
 284   } /* Permute */
 285
 286
 287 /* -------------------------------------------------------------------------- **
 288  * Functions:
 289  */
 290
 291 auth_md5Ctx *auth_md5InitCtx( auth_md5Ctx *ctx )
 292   /* ------------------------------------------------------------------------ **
 293    * Initialize an MD5 context.
 294    *
 295    *  Input:  ctx - A pointer to the MD5 context structure to be initialized.
 296    *                Contexts are typically created thusly:
 297    *                  ctx = (auth_md5Ctx *)malloc( sizeof(auth_md5Ctx) );
 298    *
 299    *  Output: A pointer to the initialized context (same as <ctx>).
 300    *
 301    *  Notes:  The purpose of the context is to make it possible to generate
 302    *          an MD5 Message Digest in stages, rather than having to pass a
 303    *          single large block to a single MD5 function.  The context
 304    *          structure keeps track of various bits of state information.
 305    *
 306    *          Once the context is initialized, the blocks of message data
 307    *          are passed to the <auth_md5SumCtx()> function.  Once the
 308    *          final bit of data has been handed to <auth_md5SumCtx()> the
 309    *          context can be closed out by calling <auth_md5CloseCtx()>,
 310    *          which also calculates the final MD5 result.
 311    *
 312    *          Don't forget to free an allocated context structure when
 313    *          you've finished using it.
 314    *
 315    *  See Also:  <auth_md5SumCtx()>, <auth_md5CloseCtx()>
 316    *
 317    * ------------------------------------------------------------------------ **
 318    */
 319   {
 320   ctx->len     = 0;
 321   ctx->b_used  = 0;
 322
 323   ctx->ABCD[0] = 0x67452301;    /* The array ABCD[] contains the four 4-byte  */
 324   ctx->ABCD[1] = 0xefcdab89;    /* "registers" that are manipulated to        */
 325   ctx->ABCD[2] = 0x98badcfe;    /* produce the MD5 digest.  The input acts    */
 326   ctx->ABCD[3] = 0x10325476;    /* upon the registers, not the other way      */
 327                                 /* 'round.  The initial values are those      */
 328                 /* given in RFC 1321 (pg. 4).  Note, however, that RFC 1321   */
 329                 /* provides these values as bytes, not as longwords, and the  */
 330                 /* bytes are arranged in little-endian order as if they were  */
 331                 /* the bytes of (little endian) 32-bit ints.  That's          */
 332                 /* confusing as all getout (to me, anyway). The values given  */
 333                 /* here are provided as 32-bit values in C language format,   */
 334                 /* so they are endian-agnostic.  */
 335   return( ctx );
 336   } /* auth_md5InitCtx */
 337
 338
 339 auth_md5Ctx *auth_md5SumCtx( auth_md5Ctx *ctx,
 340                              const uchar *src,
 341                              const int    len )
 342   /* ------------------------------------------------------------------------ **
 343    * Build an MD5 Message Digest within the given context.
 344    *
 345    *  Input:  ctx - Pointer to the context in which the MD5 sum is being
 346    *                built.
 347    *          src - A chunk of source data.  This will be used to drive
 348    *                the MD5 algorithm.
 349    *          len - The number of bytes in <src>.
 350    *
 351    *  Output: A pointer to the updated context (same as <ctx>).
 352    *
 353    *  See Also:  <auth_md5InitCtx()>, <auth_md5CloseCtx()>, <auth_md5Sum()>
 354    *
 355    * ------------------------------------------------------------------------ **
 356    */
 357   {
 358   int i;
 359
 360   /* Add the new block's length to the total length.
 361    */
 362   ctx->len += (uint32_t)len;
 363
 364   /* Copy the new block's data into the context block.
 365    * Call the Permute() function whenever the context block is full.
 366    */
 367   for( i = 0; i < len; i++ )
 368     {
 369     ctx->block[ ctx->b_used ] = src[i];
 370     (ctx->b_used)++;
 371     if( 64 == ctx->b_used )
 372       {
 373       Permute( ctx->ABCD, ctx->block );
 374       ctx->b_used = 0;
 375       }
 376     }
 377
 378   /* Return the updated context.
 379    */
 380   return( ctx );
 381   } /* auth_md5SumCtx */
 382
 383
 384 auth_md5Ctx *auth_md5CloseCtx( auth_md5Ctx *ctx, uchar *dst )
 385   /* ------------------------------------------------------------------------ **
 386    * Close an MD5 Message Digest context and generate the final MD5 sum.
 387    *
 388    *  Input:  ctx - Pointer to the context in which the MD5 sum is being
 389    *                built.
 390    *          dst - A pointer to at least 16 bytes of memory, which will
 391    *                receive the finished MD5 sum.
 392    *
 393    *  Output: A pointer to the closed context (same as <ctx>).
 394    *          You might use this to free a malloc'd context structure.  :)
 395    *
 396    *  Notes:  The context (<ctx>) is returned in an undefined state.
 397    *          It must be re-initialized before re-use.
 398    *
 399    *  See Also:  <auth_md5InitCtx()>, <auth_md5SumCtx()>
 400    *
 401    * ------------------------------------------------------------------------ **
 402    */
 403   {
 404   int      i;
 405   uint32_t l;
 406
 407   /* Add the required 0x80 padding initiator byte.
 408    * The auth_md5SumCtx() function always permutes and resets the context
 409    * block when it gets full, so we know that there must be at least one
 410    * free byte in the context block.
 411    */
 412   ctx->block[ctx->b_used] = 0x80;
 413   (ctx->b_used)++;
 414
 415   /* Zero out any remaining free bytes in the context block.
 416    */
 417   for( i = ctx->b_used; i < 64; i++ )
 418     ctx->block[i] = 0;
 419
 420   /* We need 8 bytes to store the length field.
 421    * If we don't have 8, call Permute() and reset the context block.
 422    */
 423   if( 56 < ctx->b_used )
 424     {
 425     Permute( ctx->ABCD, ctx->block );
 426     for( i = 0; i < 64; i++ )
 427       ctx->block[i] = 0;
 428     }
 429
 430   /* Add the total length and perform the final perumation.
 431    * Note:  The 60'th byte is read from the *original* <ctx->len> value
 432    *        and shifted to the correct position.  This neatly avoids
 433    *        any MAXINT numeric overflow issues.
 434    */
 435   l = ctx->len << 3;
 436   for( i = 0; i < 4; i++ )
 437     ctx->block[56+i] |= GetLongByte( l, i );
 438   ctx->block[60] = ((GetLongByte( ctx->len, 3 ) & 0xE0) >> 5);  /* See Above! */
 439   Permute( ctx->ABCD, ctx->block );
 440
 441   /* Now copy the result into the output buffer and we're done.
 442    */
 443   for( i = 0; i < 4; i++ )
 444     {
 445     dst[ 0+i] = GetLongByte( ctx->ABCD[0], i );
 446     dst[ 4+i] = GetLongByte( ctx->ABCD[1], i );
 447     dst[ 8+i] = GetLongByte( ctx->ABCD[2], i );
 448     dst[12+i] = GetLongByte( ctx->ABCD[3], i );
 449     }
 450
 451   /* Return the context.
 452    * This is done for compatibility with the other auth_md5*Ctx() functions.
 453    */
 454   return( ctx );
 455   } /* auth_md5CloseCtx */
 456
 457
 458 uchar *auth_md5Sum( uchar *dst, const uchar *src, const int len )
 459   /* ------------------------------------------------------------------------ **
 460    * Compute an MD5 message digest.
 461    *
 462    *  Input:  dst - Destination buffer into which the result will be written.
 463    *                Must be 16 bytes, minimum.
 464    *          src - Source data block to be MD5'd.
 465    *          len - The length, in bytes, of the source block.
 466    *                (Note that the length is given in bytes, not bits.)
 467    *
 468    *  Output: A pointer to <dst>, which will contain the calculated 16-byte
 469    *          MD5 message digest.
 470    *
 471    *  Notes:  This function is a shortcut.  It takes a single input block.
 472    *          For more drawn-out operations, see <auth_md5InitCtx()>.
 473    *
 474    *          This function is interface-compatible with the
 475    *          <auth_md4Sum()> function in the MD4 module.
 476    *
 477    *          The MD5 algorithm is designed to work on data with an
 478    *          arbitrary *bit* length.  Most implementations, this one
 479    *          included, handle the input data in byte-sized chunks.
 480    *
 481    *          The MD5 algorithm does much of its work using four-byte
 482    *          words, and so can be tuned for speed based on the endian-ness
 483    *          of the host.  This implementation is intended to be
 484    *          endian-neutral, which may make it a teeny bit slower than
 485    *          others.  ...maybe.
 486    *
 487    *  See Also:  <auth_md5InitCtx()>
 488    *
 489    * ------------------------------------------------------------------------ **
 490    */
 491   {
 492   auth_md5Ctx ctx[1];
 493
 494   (void)auth_md5InitCtx( ctx );             /* Open a context.      */
 495   (void)auth_md5SumCtx( ctx, src, len );    /* Pass only one block. */
 496   (void)auth_md5CloseCtx( ctx, dst );       /* Close the context.   */
 497
 498   return( dst );                            /* Makes life easy.     */
 499   } /* auth_md5Sum */
 500
 501
 502 /* ========================================================================== */