mfbt/SHA1.cpp

   1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
   3 /* This Source Code Form is subject to the terms of the Mozilla Public
   4  * License, v. 2.0. If a copy of the MPL was not distributed with this
   5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   6
   7 #include "mozilla/Assertions.h"
   8 #include "mozilla/EndianUtils.h"
   9 #include "mozilla/SHA1.h"
  10
  11 #include <string.h>
  12
  13 using mozilla::NativeEndian;
  14 using mozilla::SHA1Sum;
  15
  16 static inline uint32_t
  17 SHA_ROTL(uint32_t aT, uint32_t aN)
  18 {
  19   MOZ_ASSERT(aN < 32);
  20   return (aT << aN) | (aT >> (32 - aN));
  21 }
  22
  23 static void
  24 shaCompress(volatile unsigned* aX, const uint32_t* aBuf);
  25
  26 #define SHA_F1(X, Y, Z) ((((Y) ^ (Z)) & (X)) ^ (Z))
  27 #define SHA_F2(X, Y, Z) ((X) ^ (Y) ^ (Z))
  28 #define SHA_F3(X, Y, Z) (((X) & (Y)) | ((Z) & ((X) | (Y))))
  29 #define SHA_F4(X, Y, Z) ((X) ^ (Y) ^ (Z))
  30
  31 #define SHA_MIX(n, a, b, c)    XW(n) = SHA_ROTL(XW(a) ^ XW(b) ^ XW(c) ^XW(n), 1)
  32
  33 SHA1Sum::SHA1Sum()
  34   : mSize(0), mDone(false)
  35 {
  36   // Initialize H with constants from FIPS180-1.
  37   mH[0] = 0x67452301L;
  38   mH[1] = 0xefcdab89L;
  39   mH[2] = 0x98badcfeL;
  40   mH[3] = 0x10325476L;
  41   mH[4] = 0xc3d2e1f0L;
  42 }
  43
  44 /*
  45  * Explanation of H array and index values:
  46  *
  47  * The context's H array is actually the concatenation of two arrays
  48  * defined by SHA1, the H array of state variables (5 elements),
  49  * and the W array of intermediate values, of which there are 16 elements.
  50  * The W array starts at H[5], that is W[0] is H[5].
  51  * Although these values are defined as 32-bit values, we use 64-bit
  52  * variables to hold them because the AMD64 stores 64 bit values in
  53  * memory MUCH faster than it stores any smaller values.
  54  *
  55  * Rather than passing the context structure to shaCompress, we pass
  56  * this combined array of H and W values.  We do not pass the address
  57  * of the first element of this array, but rather pass the address of an
  58  * element in the middle of the array, element X.  Presently X[0] is H[11].
  59  * So we pass the address of H[11] as the address of array X to shaCompress.
  60  * Then shaCompress accesses the members of the array using positive AND
  61  * negative indexes.
  62  *
  63  * Pictorially: (each element is 8 bytes)
  64  * H | H0 H1 H2 H3 H4 W0 W1 W2 W3 W4 W5 W6 W7 W8 W9 Wa Wb Wc Wd We Wf |
  65  * X |-11-10 -9 -8 -7 -6 -5 -4 -3 -2 -1 X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 |
  66  *
  67  * The byte offset from X[0] to any member of H and W is always
  68  * representable in a signed 8-bit value, which will be encoded
  69  * as a single byte offset in the X86-64 instruction set.
  70  * If we didn't pass the address of H[11], and instead passed the
  71  * address of H[0], the offsets to elements H[16] and above would be
  72  * greater than 127, not representable in a signed 8-bit value, and the
  73  * x86-64 instruction set would encode every such offset as a 32-bit
  74  * signed number in each instruction that accessed element H[16] or
  75  * higher.  This results in much bigger and slower code.
  76  */
  77 #define H2X 11 /* X[0] is H[11], and H[0] is X[-11] */
  78 #define W2X  6 /* X[0] is W[6],  and W[0] is X[-6]  */
  79
  80 /*
  81  *  SHA: Add data to context.
  82  */
  83 void
  84 SHA1Sum::update(const void* aData, uint32_t aLen)
  85 {
  86   MOZ_ASSERT(!mDone, "SHA1Sum can only be used to compute a single hash.");
  87
  88   const uint8_t* data = static_cast<const uint8_t*>(aData);
  89
  90   if (aLen == 0) {
  91     return;
  92   }
  93
  94   /* Accumulate the byte count. */
  95   unsigned int lenB = static_cast<unsigned int>(mSize) & 63U;
  96
  97   mSize += aLen;
  98
  99   /* Read the data into W and process blocks as they get full. */
 100   unsigned int togo;
 101   if (lenB > 0) {
 102     togo = 64U - lenB;
 103     if (aLen < togo) {
 104       togo = aLen;
 105     }
 106     memcpy(mU.mB + lenB, data, togo);
 107     aLen -= togo;
 108     data += togo;
 109     lenB = (lenB + togo) & 63U;
 110     if (!lenB) {
 111       shaCompress(&mH[H2X], mU.mW);
 112     }
 113   }
 114
 115   while (aLen >= 64U) {
 116     aLen -= 64U;
 117     shaCompress(&mH[H2X], reinterpret_cast<const uint32_t*>(data));
 118     data += 64U;
 119   }
 120
 121   if (aLen > 0) {
 122     memcpy(mU.mB, data, aLen);
 123   }
 124 }
 125
 126
 127 /*
 128  *  SHA: Generate hash value
 129  */
 130 void
 131 SHA1Sum::finish(SHA1Sum::Hash& aHashOut)
 132 {
 133   MOZ_ASSERT(!mDone, "SHA1Sum can only be used to compute a single hash.");
 134
 135   uint64_t size = mSize;
 136   uint32_t lenB = uint32_t(size) & 63;
 137
 138   static const uint8_t bulk_pad[64] =
 139     { 0x80,0,0,0,0,0,0,0,0,0,
 140       0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 141       0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
 142
 143   /* Pad with a binary 1 (e.g. 0x80), then zeroes, then length in bits. */
 144   update(bulk_pad, (((55 + 64) - lenB) & 63) + 1);
 145   MOZ_ASSERT((uint32_t(mSize) & 63) == 56);
 146
 147   /* Convert size from bytes to bits. */
 148   size <<= 3;
 149   mU.mW[14] = NativeEndian::swapToBigEndian(uint32_t(size >> 32));
 150   mU.mW[15] = NativeEndian::swapToBigEndian(uint32_t(size));
 151   shaCompress(&mH[H2X], mU.mW);
 152
 153   /* Output hash. */
 154   mU.mW[0] = NativeEndian::swapToBigEndian(mH[0]);
 155   mU.mW[1] = NativeEndian::swapToBigEndian(mH[1]);
 156   mU.mW[2] = NativeEndian::swapToBigEndian(mH[2]);
 157   mU.mW[3] = NativeEndian::swapToBigEndian(mH[3]);
 158   mU.mW[4] = NativeEndian::swapToBigEndian(mH[4]);
 159   memcpy(aHashOut, mU.mW, 20);
 160   mDone = true;
 161 }
 162
 163 /*
 164  *  SHA: Compression function, unrolled.
 165  *
 166  * Some operations in shaCompress are done as 5 groups of 16 operations.
 167  * Others are done as 4 groups of 20 operations.
 168  * The code below shows that structure.
 169  *
 170  * The functions that compute the new values of the 5 state variables
 171  * A-E are done in 4 groups of 20 operations (or you may also think
 172  * of them as being done in 16 groups of 5 operations).  They are
 173  * done by the SHA_RNDx macros below, in the right column.
 174  *
 175  * The functions that set the 16 values of the W array are done in
 176  * 5 groups of 16 operations.  The first group is done by the
 177  * LOAD macros below, the latter 4 groups are done by SHA_MIX below,
 178  * in the left column.
 179  *
 180  * gcc's optimizer observes that each member of the W array is assigned
 181  * a value 5 times in this code.  It reduces the number of store
 182  * operations done to the W array in the context (that is, in the X array)
 183  * by creating a W array on the stack, and storing the W values there for
 184  * the first 4 groups of operations on W, and storing the values in the
 185  * context's W array only in the fifth group.  This is undesirable.
 186  * It is MUCH bigger code than simply using the context's W array, because
 187  * all the offsets to the W array in the stack are 32-bit signed offsets,
 188  * and it is no faster than storing the values in the context's W array.
 189  *
 190  * The original code for sha_fast.c prevented this creation of a separate
 191  * W array in the stack by creating a W array of 80 members, each of
 192  * whose elements is assigned only once. It also separated the computations
 193  * of the W array values and the computations of the values for the 5
 194  * state variables into two separate passes, W's, then A-E's so that the
 195  * second pass could be done all in registers (except for accessing the W
 196  * array) on machines with fewer registers.  The method is suboptimal
 197  * for machines with enough registers to do it all in one pass, and it
 198  * necessitates using many instructions with 32-bit offsets.
 199  *
 200  * This code eliminates the separate W array on the stack by a completely
 201  * different means: by declaring the X array volatile.  This prevents
 202  * the optimizer from trying to reduce the use of the X array by the
 203  * creation of a MORE expensive W array on the stack. The result is
 204  * that all instructions use signed 8-bit offsets and not 32-bit offsets.
 205  *
 206  * The combination of this code and the -O3 optimizer flag on GCC 3.4.3
 207  * results in code that is 3 times faster than the previous NSS sha_fast
 208  * code on AMD64.
 209  */
 210 static void
 211 shaCompress(volatile unsigned* aX, const uint32_t* aBuf)
 212 {
 213   unsigned A, B, C, D, E;
 214
 215 #define XH(n) aX[n - H2X]
 216 #define XW(n) aX[n - W2X]
 217
 218 #define K0 0x5a827999L
 219 #define K1 0x6ed9eba1L
 220 #define K2 0x8f1bbcdcL
 221 #define K3 0xca62c1d6L
 222
 223 #define SHA_RND1(a, b, c, d, e, n) \
 224   a = SHA_ROTL(b, 5) + SHA_F1(c, d, e) + a + XW(n) + K0; c = SHA_ROTL(c, 30)
 225 #define SHA_RND2(a, b, c, d, e, n) \
 226   a = SHA_ROTL(b, 5) + SHA_F2(c, d, e) + a + XW(n) + K1; c = SHA_ROTL(c, 30)
 227 #define SHA_RND3(a, b, c, d, e, n) \
 228   a = SHA_ROTL(b, 5) + SHA_F3(c, d, e) + a + XW(n) + K2; c = SHA_ROTL(c, 30)
 229 #define SHA_RND4(a, b, c, d, e, n) \
 230   a = SHA_ROTL(b ,5) + SHA_F4(c, d, e) + a + XW(n) + K3; c = SHA_ROTL(c, 30)
 231
 232 #define LOAD(n) XW(n) = NativeEndian::swapToBigEndian(aBuf[n])
 233
 234   A = XH(0);
 235   B = XH(1);
 236   C = XH(2);
 237   D = XH(3);
 238   E = XH(4);
 239
 240   LOAD(0);                 SHA_RND1(E,A,B,C,D, 0);
 241   LOAD(1);                 SHA_RND1(D,E,A,B,C, 1);
 242   LOAD(2);                 SHA_RND1(C,D,E,A,B, 2);
 243   LOAD(3);                 SHA_RND1(B,C,D,E,A, 3);
 244   LOAD(4);                 SHA_RND1(A,B,C,D,E, 4);
 245   LOAD(5);                 SHA_RND1(E,A,B,C,D, 5);
 246   LOAD(6);                 SHA_RND1(D,E,A,B,C, 6);
 247   LOAD(7);                 SHA_RND1(C,D,E,A,B, 7);
 248   LOAD(8);                 SHA_RND1(B,C,D,E,A, 8);
 249   LOAD(9);                 SHA_RND1(A,B,C,D,E, 9);
 250   LOAD(10);                SHA_RND1(E,A,B,C,D,10);
 251   LOAD(11);                SHA_RND1(D,E,A,B,C,11);
 252   LOAD(12);                SHA_RND1(C,D,E,A,B,12);
 253   LOAD(13);                SHA_RND1(B,C,D,E,A,13);
 254   LOAD(14);                SHA_RND1(A,B,C,D,E,14);
 255   LOAD(15);                SHA_RND1(E,A,B,C,D,15);
 256
 257   SHA_MIX( 0, 13,  8,  2); SHA_RND1(D,E,A,B,C, 0);
 258   SHA_MIX( 1, 14,  9,  3); SHA_RND1(C,D,E,A,B, 1);
 259   SHA_MIX( 2, 15, 10,  4); SHA_RND1(B,C,D,E,A, 2);
 260   SHA_MIX( 3,  0, 11,  5); SHA_RND1(A,B,C,D,E, 3);
 261
 262   SHA_MIX( 4,  1, 12,  6); SHA_RND2(E,A,B,C,D, 4);
 263   SHA_MIX( 5,  2, 13,  7); SHA_RND2(D,E,A,B,C, 5);
 264   SHA_MIX( 6,  3, 14,  8); SHA_RND2(C,D,E,A,B, 6);
 265   SHA_MIX( 7,  4, 15,  9); SHA_RND2(B,C,D,E,A, 7);
 266   SHA_MIX( 8,  5,  0, 10); SHA_RND2(A,B,C,D,E, 8);
 267   SHA_MIX( 9,  6,  1, 11); SHA_RND2(E,A,B,C,D, 9);
 268   SHA_MIX(10,  7,  2, 12); SHA_RND2(D,E,A,B,C,10);
 269   SHA_MIX(11,  8,  3, 13); SHA_RND2(C,D,E,A,B,11);
 270   SHA_MIX(12,  9,  4, 14); SHA_RND2(B,C,D,E,A,12);
 271   SHA_MIX(13, 10,  5, 15); SHA_RND2(A,B,C,D,E,13);
 272   SHA_MIX(14, 11,  6,  0); SHA_RND2(E,A,B,C,D,14);
 273   SHA_MIX(15, 12,  7,  1); SHA_RND2(D,E,A,B,C,15);
 274
 275   SHA_MIX( 0, 13,  8,  2); SHA_RND2(C,D,E,A,B, 0);
 276   SHA_MIX( 1, 14,  9,  3); SHA_RND2(B,C,D,E,A, 1);
 277   SHA_MIX( 2, 15, 10,  4); SHA_RND2(A,B,C,D,E, 2);
 278   SHA_MIX( 3,  0, 11,  5); SHA_RND2(E,A,B,C,D, 3);
 279   SHA_MIX( 4,  1, 12,  6); SHA_RND2(D,E,A,B,C, 4);
 280   SHA_MIX( 5,  2, 13,  7); SHA_RND2(C,D,E,A,B, 5);
 281   SHA_MIX( 6,  3, 14,  8); SHA_RND2(B,C,D,E,A, 6);
 282   SHA_MIX( 7,  4, 15,  9); SHA_RND2(A,B,C,D,E, 7);
 283
 284   SHA_MIX( 8,  5,  0, 10); SHA_RND3(E,A,B,C,D, 8);
 285   SHA_MIX( 9,  6,  1, 11); SHA_RND3(D,E,A,B,C, 9);
 286   SHA_MIX(10,  7,  2, 12); SHA_RND3(C,D,E,A,B,10);
 287   SHA_MIX(11,  8,  3, 13); SHA_RND3(B,C,D,E,A,11);
 288   SHA_MIX(12,  9,  4, 14); SHA_RND3(A,B,C,D,E,12);
 289   SHA_MIX(13, 10,  5, 15); SHA_RND3(E,A,B,C,D,13);
 290   SHA_MIX(14, 11,  6,  0); SHA_RND3(D,E,A,B,C,14);
 291   SHA_MIX(15, 12,  7,  1); SHA_RND3(C,D,E,A,B,15);
 292
 293   SHA_MIX( 0, 13,  8,  2); SHA_RND3(B,C,D,E,A, 0);
 294   SHA_MIX( 1, 14,  9,  3); SHA_RND3(A,B,C,D,E, 1);
 295   SHA_MIX( 2, 15, 10,  4); SHA_RND3(E,A,B,C,D, 2);
 296   SHA_MIX( 3,  0, 11,  5); SHA_RND3(D,E,A,B,C, 3);
 297   SHA_MIX( 4,  1, 12,  6); SHA_RND3(C,D,E,A,B, 4);
 298   SHA_MIX( 5,  2, 13,  7); SHA_RND3(B,C,D,E,A, 5);
 299   SHA_MIX( 6,  3, 14,  8); SHA_RND3(A,B,C,D,E, 6);
 300   SHA_MIX( 7,  4, 15,  9); SHA_RND3(E,A,B,C,D, 7);
 301   SHA_MIX( 8,  5,  0, 10); SHA_RND3(D,E,A,B,C, 8);
 302   SHA_MIX( 9,  6,  1, 11); SHA_RND3(C,D,E,A,B, 9);
 303   SHA_MIX(10,  7,  2, 12); SHA_RND3(B,C,D,E,A,10);
 304   SHA_MIX(11,  8,  3, 13); SHA_RND3(A,B,C,D,E,11);
 305
 306   SHA_MIX(12,  9,  4, 14); SHA_RND4(E,A,B,C,D,12);
 307   SHA_MIX(13, 10,  5, 15); SHA_RND4(D,E,A,B,C,13);
 308   SHA_MIX(14, 11,  6,  0); SHA_RND4(C,D,E,A,B,14);
 309   SHA_MIX(15, 12,  7,  1); SHA_RND4(B,C,D,E,A,15);
 310
 311   SHA_MIX( 0, 13,  8,  2); SHA_RND4(A,B,C,D,E, 0);
 312   SHA_MIX( 1, 14,  9,  3); SHA_RND4(E,A,B,C,D, 1);
 313   SHA_MIX( 2, 15, 10,  4); SHA_RND4(D,E,A,B,C, 2);
 314   SHA_MIX( 3,  0, 11,  5); SHA_RND4(C,D,E,A,B, 3);
 315   SHA_MIX( 4,  1, 12,  6); SHA_RND4(B,C,D,E,A, 4);
 316   SHA_MIX( 5,  2, 13,  7); SHA_RND4(A,B,C,D,E, 5);
 317   SHA_MIX( 6,  3, 14,  8); SHA_RND4(E,A,B,C,D, 6);
 318   SHA_MIX( 7,  4, 15,  9); SHA_RND4(D,E,A,B,C, 7);
 319   SHA_MIX( 8,  5,  0, 10); SHA_RND4(C,D,E,A,B, 8);
 320   SHA_MIX( 9,  6,  1, 11); SHA_RND4(B,C,D,E,A, 9);
 321   SHA_MIX(10,  7,  2, 12); SHA_RND4(A,B,C,D,E,10);
 322   SHA_MIX(11,  8,  3, 13); SHA_RND4(E,A,B,C,D,11);
 323   SHA_MIX(12,  9,  4, 14); SHA_RND4(D,E,A,B,C,12);
 324   SHA_MIX(13, 10,  5, 15); SHA_RND4(C,D,E,A,B,13);
 325   SHA_MIX(14, 11,  6,  0); SHA_RND4(B,C,D,E,A,14);
 326   SHA_MIX(15, 12,  7,  1); SHA_RND4(A,B,C,D,E,15);
 327
 328   XH(0) += A;
 329   XH(1) += B;
 330   XH(2) += C;
 331   XH(3) += D;
 332   XH(4) += E;
 333 }