mfbt/SHA1.cpp

   1 /* This Source Code Form is subject to the terms of the Mozilla Public
   2  * License, v. 2.0. If a copy of the MPL was not distributed with this
   3  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   4
   5 #include "mozilla/Assertions.h"
   6 #include "mozilla/Endian.h"
   7 #include "mozilla/SHA1.h"
   8
   9 #include <string.h>
  10
  11 using mozilla::NativeEndian;
  12 using mozilla::SHA1Sum;
  13
  14 static inline uint32_t
  15 SHA_ROTL(uint32_t t, uint32_t n)
  16 {
  17   MOZ_ASSERT(n < 32);
  18   return (t << n) | (t >> (32 - n));
  19 }
  20
  21 static void
  22 shaCompress(volatile unsigned* X, const uint32_t* datain);
  23
  24 #define SHA_F1(X, Y, Z) ((((Y) ^ (Z)) & (X)) ^ (Z))
  25 #define SHA_F2(X, Y, Z) ((X) ^ (Y) ^ (Z))
  26 #define SHA_F3(X, Y, Z) (((X) & (Y)) | ((Z) & ((X) | (Y))))
  27 #define SHA_F4(X, Y, Z) ((X) ^ (Y) ^ (Z))
  28
  29 #define SHA_MIX(n, a, b, c)    XW(n) = SHA_ROTL(XW(a) ^ XW(b) ^ XW(c) ^XW(n), 1)
  30
  31 SHA1Sum::SHA1Sum()
  32   : size(0), mDone(false)
  33 {
  34   // Initialize H with constants from FIPS180-1.
  35   H[0] = 0x67452301L;
  36   H[1] = 0xefcdab89L;
  37   H[2] = 0x98badcfeL;
  38   H[3] = 0x10325476L;
  39   H[4] = 0xc3d2e1f0L;
  40 }
  41
  42 /*
  43  * Explanation of H array and index values:
  44  *
  45  * The context's H array is actually the concatenation of two arrays
  46  * defined by SHA1, the H array of state variables (5 elements),
  47  * and the W array of intermediate values, of which there are 16 elements.
  48  * The W array starts at H[5], that is W[0] is H[5].
  49  * Although these values are defined as 32-bit values, we use 64-bit
  50  * variables to hold them because the AMD64 stores 64 bit values in
  51  * memory MUCH faster than it stores any smaller values.
  52  *
  53  * Rather than passing the context structure to shaCompress, we pass
  54  * this combined array of H and W values.  We do not pass the address
  55  * of the first element of this array, but rather pass the address of an
  56  * element in the middle of the array, element X.  Presently X[0] is H[11].
  57  * So we pass the address of H[11] as the address of array X to shaCompress.
  58  * Then shaCompress accesses the members of the array using positive AND
  59  * negative indexes.
  60  *
  61  * Pictorially: (each element is 8 bytes)
  62  * H | H0 H1 H2 H3 H4 W0 W1 W2 W3 W4 W5 W6 W7 W8 W9 Wa Wb Wc Wd We Wf |
  63  * X |-11-10 -9 -8 -7 -6 -5 -4 -3 -2 -1 X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 |
  64  *
  65  * The byte offset from X[0] to any member of H and W is always
  66  * representable in a signed 8-bit value, which will be encoded
  67  * as a single byte offset in the X86-64 instruction set.
  68  * If we didn't pass the address of H[11], and instead passed the
  69  * address of H[0], the offsets to elements H[16] and above would be
  70  * greater than 127, not representable in a signed 8-bit value, and the
  71  * x86-64 instruction set would encode every such offset as a 32-bit
  72  * signed number in each instruction that accessed element H[16] or
  73  * higher.  This results in much bigger and slower code.
  74  */
  75 #define H2X 11 /* X[0] is H[11], and H[0] is X[-11] */
  76 #define W2X  6 /* X[0] is W[6],  and W[0] is X[-6]  */
  77
  78 /*
  79  *  SHA: Add data to context.
  80  */
  81 void
  82 SHA1Sum::update(const void* dataIn, uint32_t len)
  83 {
  84   MOZ_ASSERT(!mDone, "SHA1Sum can only be used to compute a single hash.");
  85
  86   const uint8_t* data = static_cast<const uint8_t*>(dataIn);
  87
  88   if (len == 0)
  89     return;
  90
  91   /* Accumulate the byte count. */
  92   unsigned int lenB = static_cast<unsigned int>(size) & 63U;
  93
  94   size += len;
  95
  96   /* Read the data into W and process blocks as they get full. */
  97   unsigned int togo;
  98   if (lenB > 0) {
  99     togo = 64U - lenB;
 100     if (len < togo)
 101       togo = len;
 102     memcpy(u.b + lenB, data, togo);
 103     len -= togo;
 104     data += togo;
 105     lenB = (lenB + togo) & 63U;
 106     if (!lenB)
 107       shaCompress(&H[H2X], u.w);
 108   }
 109
 110   while (len >= 64U) {
 111     len -= 64U;
 112     shaCompress(&H[H2X], reinterpret_cast<const uint32_t*>(data));
 113     data += 64U;
 114   }
 115
 116   if (len > 0)
 117     memcpy(u.b, data, len);
 118 }
 119
 120
 121 /*
 122  *  SHA: Generate hash value
 123  */
 124 void
 125 SHA1Sum::finish(SHA1Sum::Hash& hashOut)
 126 {
 127   MOZ_ASSERT(!mDone, "SHA1Sum can only be used to compute a single hash.");
 128
 129   uint64_t size2 = size;
 130   uint32_t lenB = uint32_t(size2) & 63;
 131
 132   static const uint8_t bulk_pad[64] =
 133     { 0x80,0,0,0,0,0,0,0,0,0,
 134       0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 135       0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
 136
 137   /* Pad with a binary 1 (e.g. 0x80), then zeroes, then length in bits. */
 138   update(bulk_pad, (((55 + 64) - lenB) & 63) + 1);
 139   MOZ_ASSERT((uint32_t(size) & 63) == 56);
 140
 141   /* Convert size from bytes to bits. */
 142   size2 <<= 3;
 143   u.w[14] = NativeEndian::swapToBigEndian(uint32_t(size2 >> 32));
 144   u.w[15] = NativeEndian::swapToBigEndian(uint32_t(size2));
 145   shaCompress(&H[H2X], u.w);
 146
 147   /* Output hash. */
 148   u.w[0] = NativeEndian::swapToBigEndian(H[0]);
 149   u.w[1] = NativeEndian::swapToBigEndian(H[1]);
 150   u.w[2] = NativeEndian::swapToBigEndian(H[2]);
 151   u.w[3] = NativeEndian::swapToBigEndian(H[3]);
 152   u.w[4] = NativeEndian::swapToBigEndian(H[4]);
 153   memcpy(hashOut, u.w, 20);
 154   mDone = true;
 155 }
 156
 157 /*
 158  *  SHA: Compression function, unrolled.
 159  *
 160  * Some operations in shaCompress are done as 5 groups of 16 operations.
 161  * Others are done as 4 groups of 20 operations.
 162  * The code below shows that structure.
 163  *
 164  * The functions that compute the new values of the 5 state variables
 165  * A-E are done in 4 groups of 20 operations (or you may also think
 166  * of them as being done in 16 groups of 5 operations).  They are
 167  * done by the SHA_RNDx macros below, in the right column.
 168  *
 169  * The functions that set the 16 values of the W array are done in
 170  * 5 groups of 16 operations.  The first group is done by the
 171  * LOAD macros below, the latter 4 groups are done by SHA_MIX below,
 172  * in the left column.
 173  *
 174  * gcc's optimizer observes that each member of the W array is assigned
 175  * a value 5 times in this code.  It reduces the number of store
 176  * operations done to the W array in the context (that is, in the X array)
 177  * by creating a W array on the stack, and storing the W values there for
 178  * the first 4 groups of operations on W, and storing the values in the
 179  * context's W array only in the fifth group.  This is undesirable.
 180  * It is MUCH bigger code than simply using the context's W array, because
 181  * all the offsets to the W array in the stack are 32-bit signed offsets,
 182  * and it is no faster than storing the values in the context's W array.
 183  *
 184  * The original code for sha_fast.c prevented this creation of a separate
 185  * W array in the stack by creating a W array of 80 members, each of
 186  * whose elements is assigned only once. It also separated the computations
 187  * of the W array values and the computations of the values for the 5
 188  * state variables into two separate passes, W's, then A-E's so that the
 189  * second pass could be done all in registers (except for accessing the W
 190  * array) on machines with fewer registers.  The method is suboptimal
 191  * for machines with enough registers to do it all in one pass, and it
 192  * necessitates using many instructions with 32-bit offsets.
 193  *
 194  * This code eliminates the separate W array on the stack by a completely
 195  * different means: by declaring the X array volatile.  This prevents
 196  * the optimizer from trying to reduce the use of the X array by the
 197  * creation of a MORE expensive W array on the stack. The result is
 198  * that all instructions use signed 8-bit offsets and not 32-bit offsets.
 199  *
 200  * The combination of this code and the -O3 optimizer flag on GCC 3.4.3
 201  * results in code that is 3 times faster than the previous NSS sha_fast
 202  * code on AMD64.
 203  */
 204 static void
 205 shaCompress(volatile unsigned *X, const uint32_t *inbuf)
 206 {
 207   unsigned A, B, C, D, E;
 208
 209 #define XH(n) X[n - H2X]
 210 #define XW(n) X[n - W2X]
 211
 212 #define K0 0x5a827999L
 213 #define K1 0x6ed9eba1L
 214 #define K2 0x8f1bbcdcL
 215 #define K3 0xca62c1d6L
 216
 217 #define SHA_RND1(a, b, c, d, e, n) \
 218   a = SHA_ROTL(b, 5) + SHA_F1(c, d, e) + a + XW(n) + K0; c = SHA_ROTL(c, 30)
 219 #define SHA_RND2(a, b, c, d, e, n) \
 220   a = SHA_ROTL(b, 5) + SHA_F2(c, d, e) + a + XW(n) + K1; c = SHA_ROTL(c, 30)
 221 #define SHA_RND3(a, b, c, d, e, n) \
 222   a = SHA_ROTL(b, 5) + SHA_F3(c, d, e) + a + XW(n) + K2; c = SHA_ROTL(c, 30)
 223 #define SHA_RND4(a, b, c, d, e, n) \
 224   a = SHA_ROTL(b ,5) + SHA_F4(c, d, e) + a + XW(n) + K3; c = SHA_ROTL(c, 30)
 225
 226 #define LOAD(n) XW(n) = NativeEndian::swapToBigEndian(inbuf[n])
 227
 228   A = XH(0);
 229   B = XH(1);
 230   C = XH(2);
 231   D = XH(3);
 232   E = XH(4);
 233
 234   LOAD(0);                 SHA_RND1(E,A,B,C,D, 0);
 235   LOAD(1);                 SHA_RND1(D,E,A,B,C, 1);
 236   LOAD(2);                 SHA_RND1(C,D,E,A,B, 2);
 237   LOAD(3);                 SHA_RND1(B,C,D,E,A, 3);
 238   LOAD(4);                 SHA_RND1(A,B,C,D,E, 4);
 239   LOAD(5);                 SHA_RND1(E,A,B,C,D, 5);
 240   LOAD(6);                 SHA_RND1(D,E,A,B,C, 6);
 241   LOAD(7);                 SHA_RND1(C,D,E,A,B, 7);
 242   LOAD(8);                 SHA_RND1(B,C,D,E,A, 8);
 243   LOAD(9);                 SHA_RND1(A,B,C,D,E, 9);
 244   LOAD(10);                SHA_RND1(E,A,B,C,D,10);
 245   LOAD(11);                SHA_RND1(D,E,A,B,C,11);
 246   LOAD(12);                SHA_RND1(C,D,E,A,B,12);
 247   LOAD(13);                SHA_RND1(B,C,D,E,A,13);
 248   LOAD(14);                SHA_RND1(A,B,C,D,E,14);
 249   LOAD(15);                SHA_RND1(E,A,B,C,D,15);
 250
 251   SHA_MIX( 0, 13,  8,  2); SHA_RND1(D,E,A,B,C, 0);
 252   SHA_MIX( 1, 14,  9,  3); SHA_RND1(C,D,E,A,B, 1);
 253   SHA_MIX( 2, 15, 10,  4); SHA_RND1(B,C,D,E,A, 2);
 254   SHA_MIX( 3,  0, 11,  5); SHA_RND1(A,B,C,D,E, 3);
 255
 256   SHA_MIX( 4,  1, 12,  6); SHA_RND2(E,A,B,C,D, 4);
 257   SHA_MIX( 5,  2, 13,  7); SHA_RND2(D,E,A,B,C, 5);
 258   SHA_MIX( 6,  3, 14,  8); SHA_RND2(C,D,E,A,B, 6);
 259   SHA_MIX( 7,  4, 15,  9); SHA_RND2(B,C,D,E,A, 7);
 260   SHA_MIX( 8,  5,  0, 10); SHA_RND2(A,B,C,D,E, 8);
 261   SHA_MIX( 9,  6,  1, 11); SHA_RND2(E,A,B,C,D, 9);
 262   SHA_MIX(10,  7,  2, 12); SHA_RND2(D,E,A,B,C,10);
 263   SHA_MIX(11,  8,  3, 13); SHA_RND2(C,D,E,A,B,11);
 264   SHA_MIX(12,  9,  4, 14); SHA_RND2(B,C,D,E,A,12);
 265   SHA_MIX(13, 10,  5, 15); SHA_RND2(A,B,C,D,E,13);
 266   SHA_MIX(14, 11,  6,  0); SHA_RND2(E,A,B,C,D,14);
 267   SHA_MIX(15, 12,  7,  1); SHA_RND2(D,E,A,B,C,15);
 268
 269   SHA_MIX( 0, 13,  8,  2); SHA_RND2(C,D,E,A,B, 0);
 270   SHA_MIX( 1, 14,  9,  3); SHA_RND2(B,C,D,E,A, 1);
 271   SHA_MIX( 2, 15, 10,  4); SHA_RND2(A,B,C,D,E, 2);
 272   SHA_MIX( 3,  0, 11,  5); SHA_RND2(E,A,B,C,D, 3);
 273   SHA_MIX( 4,  1, 12,  6); SHA_RND2(D,E,A,B,C, 4);
 274   SHA_MIX( 5,  2, 13,  7); SHA_RND2(C,D,E,A,B, 5);
 275   SHA_MIX( 6,  3, 14,  8); SHA_RND2(B,C,D,E,A, 6);
 276   SHA_MIX( 7,  4, 15,  9); SHA_RND2(A,B,C,D,E, 7);
 277
 278   SHA_MIX( 8,  5,  0, 10); SHA_RND3(E,A,B,C,D, 8);
 279   SHA_MIX( 9,  6,  1, 11); SHA_RND3(D,E,A,B,C, 9);
 280   SHA_MIX(10,  7,  2, 12); SHA_RND3(C,D,E,A,B,10);
 281   SHA_MIX(11,  8,  3, 13); SHA_RND3(B,C,D,E,A,11);
 282   SHA_MIX(12,  9,  4, 14); SHA_RND3(A,B,C,D,E,12);
 283   SHA_MIX(13, 10,  5, 15); SHA_RND3(E,A,B,C,D,13);
 284   SHA_MIX(14, 11,  6,  0); SHA_RND3(D,E,A,B,C,14);
 285   SHA_MIX(15, 12,  7,  1); SHA_RND3(C,D,E,A,B,15);
 286
 287   SHA_MIX( 0, 13,  8,  2); SHA_RND3(B,C,D,E,A, 0);
 288   SHA_MIX( 1, 14,  9,  3); SHA_RND3(A,B,C,D,E, 1);
 289   SHA_MIX( 2, 15, 10,  4); SHA_RND3(E,A,B,C,D, 2);
 290   SHA_MIX( 3,  0, 11,  5); SHA_RND3(D,E,A,B,C, 3);
 291   SHA_MIX( 4,  1, 12,  6); SHA_RND3(C,D,E,A,B, 4);
 292   SHA_MIX( 5,  2, 13,  7); SHA_RND3(B,C,D,E,A, 5);
 293   SHA_MIX( 6,  3, 14,  8); SHA_RND3(A,B,C,D,E, 6);
 294   SHA_MIX( 7,  4, 15,  9); SHA_RND3(E,A,B,C,D, 7);
 295   SHA_MIX( 8,  5,  0, 10); SHA_RND3(D,E,A,B,C, 8);
 296   SHA_MIX( 9,  6,  1, 11); SHA_RND3(C,D,E,A,B, 9);
 297   SHA_MIX(10,  7,  2, 12); SHA_RND3(B,C,D,E,A,10);
 298   SHA_MIX(11,  8,  3, 13); SHA_RND3(A,B,C,D,E,11);
 299
 300   SHA_MIX(12,  9,  4, 14); SHA_RND4(E,A,B,C,D,12);
 301   SHA_MIX(13, 10,  5, 15); SHA_RND4(D,E,A,B,C,13);
 302   SHA_MIX(14, 11,  6,  0); SHA_RND4(C,D,E,A,B,14);
 303   SHA_MIX(15, 12,  7,  1); SHA_RND4(B,C,D,E,A,15);
 304
 305   SHA_MIX( 0, 13,  8,  2); SHA_RND4(A,B,C,D,E, 0);
 306   SHA_MIX( 1, 14,  9,  3); SHA_RND4(E,A,B,C,D, 1);
 307   SHA_MIX( 2, 15, 10,  4); SHA_RND4(D,E,A,B,C, 2);
 308   SHA_MIX( 3,  0, 11,  5); SHA_RND4(C,D,E,A,B, 3);
 309   SHA_MIX( 4,  1, 12,  6); SHA_RND4(B,C,D,E,A, 4);
 310   SHA_MIX( 5,  2, 13,  7); SHA_RND4(A,B,C,D,E, 5);
 311   SHA_MIX( 6,  3, 14,  8); SHA_RND4(E,A,B,C,D, 6);
 312   SHA_MIX( 7,  4, 15,  9); SHA_RND4(D,E,A,B,C, 7);
 313   SHA_MIX( 8,  5,  0, 10); SHA_RND4(C,D,E,A,B, 8);
 314   SHA_MIX( 9,  6,  1, 11); SHA_RND4(B,C,D,E,A, 9);
 315   SHA_MIX(10,  7,  2, 12); SHA_RND4(A,B,C,D,E,10);
 316   SHA_MIX(11,  8,  3, 13); SHA_RND4(E,A,B,C,D,11);
 317   SHA_MIX(12,  9,  4, 14); SHA_RND4(D,E,A,B,C,12);
 318   SHA_MIX(13, 10,  5, 15); SHA_RND4(C,D,E,A,B,13);
 319   SHA_MIX(14, 11,  6,  0); SHA_RND4(B,C,D,E,A,14);
 320   SHA_MIX(15, 12,  7,  1); SHA_RND4(A,B,C,D,E,15);
 321
 322   XH(0) += A;
 323   XH(1) += B;
 324   XH(2) += C;
 325   XH(3) += D;
 326   XH(4) += E;
 327 }