mfbt/SHA1.cpp

   1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
   3 /* This Source Code Form is subject to the terms of the Mozilla Public
   4  * License, v. 2.0. If a copy of the MPL was not distributed with this
   5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   6
   7 #include "mozilla/Assertions.h"
   8 #include "mozilla/EndianUtils.h"
   9 #include "mozilla/SHA1.h"
  10
  11 #include <string.h>
  12
  13 using mozilla::NativeEndian;
  14 using mozilla::SHA1Sum;
  15
  16 static inline uint32_t SHA_ROTL(uint32_t aT, uint32_t aN) {
  17   MOZ_ASSERT(aN < 32);
  18   return (aT << aN) | (aT >> (32 - aN));
  19 }
  20
  21 static void shaCompress(volatile unsigned* aX, const uint32_t* aBuf);
  22
  23 #define SHA_F1(X, Y, Z) ((((Y) ^ (Z)) & (X)) ^ (Z))
  24 #define SHA_F2(X, Y, Z) ((X) ^ (Y) ^ (Z))
  25 #define SHA_F3(X, Y, Z) (((X) & (Y)) | ((Z) & ((X) | (Y))))
  26 #define SHA_F4(X, Y, Z) ((X) ^ (Y) ^ (Z))
  27
  28 #define SHA_MIX(n, a, b, c) XW(n) = SHA_ROTL(XW(a) ^ XW(b) ^ XW(c) ^ XW(n), 1)
  29
  30 SHA1Sum::SHA1Sum() : mSize(0), mDone(false) {
  31   // Initialize H with constants from FIPS180-1.
  32   mH[0] = 0x67452301L;
  33   mH[1] = 0xefcdab89L;
  34   mH[2] = 0x98badcfeL;
  35   mH[3] = 0x10325476L;
  36   mH[4] = 0xc3d2e1f0L;
  37 }
  38
  39 /*
  40  * Explanation of H array and index values:
  41  *
  42  * The context's H array is actually the concatenation of two arrays
  43  * defined by SHA1, the H array of state variables (5 elements),
  44  * and the W array of intermediate values, of which there are 16 elements.
  45  * The W array starts at H[5], that is W[0] is H[5].
  46  * Although these values are defined as 32-bit values, we use 64-bit
  47  * variables to hold them because the AMD64 stores 64 bit values in
  48  * memory MUCH faster than it stores any smaller values.
  49  *
  50  * Rather than passing the context structure to shaCompress, we pass
  51  * this combined array of H and W values.  We do not pass the address
  52  * of the first element of this array, but rather pass the address of an
  53  * element in the middle of the array, element X.  Presently X[0] is H[11].
  54  * So we pass the address of H[11] as the address of array X to shaCompress.
  55  * Then shaCompress accesses the members of the array using positive AND
  56  * negative indexes.
  57  *
  58  * Pictorially: (each element is 8 bytes)
  59  * H | H0 H1 H2 H3 H4 W0 W1 W2 W3 W4 W5 W6 W7 W8 W9 Wa Wb Wc Wd We Wf |
  60  * X |-11-10 -9 -8 -7 -6 -5 -4 -3 -2 -1 X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 |
  61  *
  62  * The byte offset from X[0] to any member of H and W is always
  63  * representable in a signed 8-bit value, which will be encoded
  64  * as a single byte offset in the X86-64 instruction set.
  65  * If we didn't pass the address of H[11], and instead passed the
  66  * address of H[0], the offsets to elements H[16] and above would be
  67  * greater than 127, not representable in a signed 8-bit value, and the
  68  * x86-64 instruction set would encode every such offset as a 32-bit
  69  * signed number in each instruction that accessed element H[16] or
  70  * higher.  This results in much bigger and slower code.
  71  */
  72 #define H2X 11 /* X[0] is H[11], and H[0] is X[-11] */
  73 #define W2X 6  /* X[0] is W[6],  and W[0] is X[-6]  */
  74
  75 /*
  76  *  SHA: Add data to context.
  77  */
  78 void SHA1Sum::update(const void* aData, uint32_t aLen) {
  79   MOZ_ASSERT(!mDone, "SHA1Sum can only be used to compute a single hash.");
  80
  81   const uint8_t* data = static_cast<const uint8_t*>(aData);
  82
  83   if (aLen == 0) {
  84     return;
  85   }
  86
  87   /* Accumulate the byte count. */
  88   unsigned int lenB = static_cast<unsigned int>(mSize) & 63U;
  89
  90   mSize += aLen;
  91
  92   /* Read the data into W and process blocks as they get full. */
  93   unsigned int togo;
  94   if (lenB > 0) {
  95     togo = 64U - lenB;
  96     if (aLen < togo) {
  97       togo = aLen;
  98     }
  99     memcpy(mU.mB + lenB, data, togo);
 100     aLen -= togo;
 101     data += togo;
 102     lenB = (lenB + togo) & 63U;
 103     if (!lenB) {
 104       shaCompress(&mH[H2X], mU.mW);
 105     }
 106   }
 107
 108   while (aLen >= 64U) {
 109     aLen -= 64U;
 110     shaCompress(&mH[H2X], reinterpret_cast<const uint32_t*>(data));
 111     data += 64U;
 112   }
 113
 114   if (aLen > 0) {
 115     memcpy(mU.mB, data, aLen);
 116   }
 117 }
 118
 119 /*
 120  *  SHA: Generate hash value
 121  */
 122 void SHA1Sum::finish(SHA1Sum::Hash& aHashOut) {
 123   MOZ_ASSERT(!mDone, "SHA1Sum can only be used to compute a single hash.");
 124
 125   uint64_t size = mSize;
 126   uint32_t lenB = uint32_t(size) & 63;
 127
 128   static const uint8_t bulk_pad[64] = {
 129       0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 130       0,    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 131       0,    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
 132
 133   /* Pad with a binary 1 (e.g. 0x80), then zeroes, then length in bits. */
 134   update(bulk_pad, (((55 + 64) - lenB) & 63) + 1);
 135   MOZ_ASSERT((uint32_t(mSize) & 63) == 56);
 136
 137   /* Convert size from bytes to bits. */
 138   size <<= 3;
 139   mU.mW[14] = NativeEndian::swapToBigEndian(uint32_t(size >> 32));
 140   mU.mW[15] = NativeEndian::swapToBigEndian(uint32_t(size));
 141   shaCompress(&mH[H2X], mU.mW);
 142
 143   /* Output hash. */
 144   mU.mW[0] = NativeEndian::swapToBigEndian(mH[0]);
 145   mU.mW[1] = NativeEndian::swapToBigEndian(mH[1]);
 146   mU.mW[2] = NativeEndian::swapToBigEndian(mH[2]);
 147   mU.mW[3] = NativeEndian::swapToBigEndian(mH[3]);
 148   mU.mW[4] = NativeEndian::swapToBigEndian(mH[4]);
 149   memcpy(aHashOut, mU.mW, 20);
 150   mDone = true;
 151 }
 152
 153 /*
 154  *  SHA: Compression function, unrolled.
 155  *
 156  * Some operations in shaCompress are done as 5 groups of 16 operations.
 157  * Others are done as 4 groups of 20 operations.
 158  * The code below shows that structure.
 159  *
 160  * The functions that compute the new values of the 5 state variables
 161  * A-E are done in 4 groups of 20 operations (or you may also think
 162  * of them as being done in 16 groups of 5 operations).  They are
 163  * done by the SHA_RNDx macros below, in the right column.
 164  *
 165  * The functions that set the 16 values of the W array are done in
 166  * 5 groups of 16 operations.  The first group is done by the
 167  * LOAD macros below, the latter 4 groups are done by SHA_MIX below,
 168  * in the left column.
 169  *
 170  * gcc's optimizer observes that each member of the W array is assigned
 171  * a value 5 times in this code.  It reduces the number of store
 172  * operations done to the W array in the context (that is, in the X array)
 173  * by creating a W array on the stack, and storing the W values there for
 174  * the first 4 groups of operations on W, and storing the values in the
 175  * context's W array only in the fifth group.  This is undesirable.
 176  * It is MUCH bigger code than simply using the context's W array, because
 177  * all the offsets to the W array in the stack are 32-bit signed offsets,
 178  * and it is no faster than storing the values in the context's W array.
 179  *
 180  * The original code for sha_fast.c prevented this creation of a separate
 181  * W array in the stack by creating a W array of 80 members, each of
 182  * whose elements is assigned only once. It also separated the computations
 183  * of the W array values and the computations of the values for the 5
 184  * state variables into two separate passes, W's, then A-E's so that the
 185  * second pass could be done all in registers (except for accessing the W
 186  * array) on machines with fewer registers.  The method is suboptimal
 187  * for machines with enough registers to do it all in one pass, and it
 188  * necessitates using many instructions with 32-bit offsets.
 189  *
 190  * This code eliminates the separate W array on the stack by a completely
 191  * different means: by declaring the X array volatile.  This prevents
 192  * the optimizer from trying to reduce the use of the X array by the
 193  * creation of a MORE expensive W array on the stack. The result is
 194  * that all instructions use signed 8-bit offsets and not 32-bit offsets.
 195  *
 196  * The combination of this code and the -O3 optimizer flag on GCC 3.4.3
 197  * results in code that is 3 times faster than the previous NSS sha_fast
 198  * code on AMD64.
 199  */
 200 static void shaCompress(volatile unsigned* aX, const uint32_t* aBuf) {
 201   unsigned A, B, C, D, E;
 202
 203 #define XH(n) aX[n - H2X]
 204 #define XW(n) aX[n - W2X]
 205
 206 #define K0 0x5a827999L
 207 #define K1 0x6ed9eba1L
 208 #define K2 0x8f1bbcdcL
 209 #define K3 0xca62c1d6L
 210
 211 #define SHA_RND1(a, b, c, d, e, n)                       \
 212   a = SHA_ROTL(b, 5) + SHA_F1(c, d, e) + a + XW(n) + K0; \
 213   c = SHA_ROTL(c, 30)
 214 #define SHA_RND2(a, b, c, d, e, n)                       \
 215   a = SHA_ROTL(b, 5) + SHA_F2(c, d, e) + a + XW(n) + K1; \
 216   c = SHA_ROTL(c, 30)
 217 #define SHA_RND3(a, b, c, d, e, n)                       \
 218   a = SHA_ROTL(b, 5) + SHA_F3(c, d, e) + a + XW(n) + K2; \
 219   c = SHA_ROTL(c, 30)
 220 #define SHA_RND4(a, b, c, d, e, n)                       \
 221   a = SHA_ROTL(b, 5) + SHA_F4(c, d, e) + a + XW(n) + K3; \
 222   c = SHA_ROTL(c, 30)
 223
 224 #define LOAD(n) XW(n) = NativeEndian::swapToBigEndian(aBuf[n])
 225
 226   A = XH(0);
 227   B = XH(1);
 228   C = XH(2);
 229   D = XH(3);
 230   E = XH(4);
 231
 232   LOAD(0);
 233   SHA_RND1(E, A, B, C, D, 0);
 234   LOAD(1);
 235   SHA_RND1(D, E, A, B, C, 1);
 236   LOAD(2);
 237   SHA_RND1(C, D, E, A, B, 2);
 238   LOAD(3);
 239   SHA_RND1(B, C, D, E, A, 3);
 240   LOAD(4);
 241   SHA_RND1(A, B, C, D, E, 4);
 242   LOAD(5);
 243   SHA_RND1(E, A, B, C, D, 5);
 244   LOAD(6);
 245   SHA_RND1(D, E, A, B, C, 6);
 246   LOAD(7);
 247   SHA_RND1(C, D, E, A, B, 7);
 248   LOAD(8);
 249   SHA_RND1(B, C, D, E, A, 8);
 250   LOAD(9);
 251   SHA_RND1(A, B, C, D, E, 9);
 252   LOAD(10);
 253   SHA_RND1(E, A, B, C, D, 10);
 254   LOAD(11);
 255   SHA_RND1(D, E, A, B, C, 11);
 256   LOAD(12);
 257   SHA_RND1(C, D, E, A, B, 12);
 258   LOAD(13);
 259   SHA_RND1(B, C, D, E, A, 13);
 260   LOAD(14);
 261   SHA_RND1(A, B, C, D, E, 14);
 262   LOAD(15);
 263   SHA_RND1(E, A, B, C, D, 15);
 264
 265   SHA_MIX(0, 13, 8, 2);
 266   SHA_RND1(D, E, A, B, C, 0);
 267   SHA_MIX(1, 14, 9, 3);
 268   SHA_RND1(C, D, E, A, B, 1);
 269   SHA_MIX(2, 15, 10, 4);
 270   SHA_RND1(B, C, D, E, A, 2);
 271   SHA_MIX(3, 0, 11, 5);
 272   SHA_RND1(A, B, C, D, E, 3);
 273
 274   SHA_MIX(4, 1, 12, 6);
 275   SHA_RND2(E, A, B, C, D, 4);
 276   SHA_MIX(5, 2, 13, 7);
 277   SHA_RND2(D, E, A, B, C, 5);
 278   SHA_MIX(6, 3, 14, 8);
 279   SHA_RND2(C, D, E, A, B, 6);
 280   SHA_MIX(7, 4, 15, 9);
 281   SHA_RND2(B, C, D, E, A, 7);
 282   SHA_MIX(8, 5, 0, 10);
 283   SHA_RND2(A, B, C, D, E, 8);
 284   SHA_MIX(9, 6, 1, 11);
 285   SHA_RND2(E, A, B, C, D, 9);
 286   SHA_MIX(10, 7, 2, 12);
 287   SHA_RND2(D, E, A, B, C, 10);
 288   SHA_MIX(11, 8, 3, 13);
 289   SHA_RND2(C, D, E, A, B, 11);
 290   SHA_MIX(12, 9, 4, 14);
 291   SHA_RND2(B, C, D, E, A, 12);
 292   SHA_MIX(13, 10, 5, 15);
 293   SHA_RND2(A, B, C, D, E, 13);
 294   SHA_MIX(14, 11, 6, 0);
 295   SHA_RND2(E, A, B, C, D, 14);
 296   SHA_MIX(15, 12, 7, 1);
 297   SHA_RND2(D, E, A, B, C, 15);
 298
 299   SHA_MIX(0, 13, 8, 2);
 300   SHA_RND2(C, D, E, A, B, 0);
 301   SHA_MIX(1, 14, 9, 3);
 302   SHA_RND2(B, C, D, E, A, 1);
 303   SHA_MIX(2, 15, 10, 4);
 304   SHA_RND2(A, B, C, D, E, 2);
 305   SHA_MIX(3, 0, 11, 5);
 306   SHA_RND2(E, A, B, C, D, 3);
 307   SHA_MIX(4, 1, 12, 6);
 308   SHA_RND2(D, E, A, B, C, 4);
 309   SHA_MIX(5, 2, 13, 7);
 310   SHA_RND2(C, D, E, A, B, 5);
 311   SHA_MIX(6, 3, 14, 8);
 312   SHA_RND2(B, C, D, E, A, 6);
 313   SHA_MIX(7, 4, 15, 9);
 314   SHA_RND2(A, B, C, D, E, 7);
 315
 316   SHA_MIX(8, 5, 0, 10);
 317   SHA_RND3(E, A, B, C, D, 8);
 318   SHA_MIX(9, 6, 1, 11);
 319   SHA_RND3(D, E, A, B, C, 9);
 320   SHA_MIX(10, 7, 2, 12);
 321   SHA_RND3(C, D, E, A, B, 10);
 322   SHA_MIX(11, 8, 3, 13);
 323   SHA_RND3(B, C, D, E, A, 11);
 324   SHA_MIX(12, 9, 4, 14);
 325   SHA_RND3(A, B, C, D, E, 12);
 326   SHA_MIX(13, 10, 5, 15);
 327   SHA_RND3(E, A, B, C, D, 13);
 328   SHA_MIX(14, 11, 6, 0);
 329   SHA_RND3(D, E, A, B, C, 14);
 330   SHA_MIX(15, 12, 7, 1);
 331   SHA_RND3(C, D, E, A, B, 15);
 332
 333   SHA_MIX(0, 13, 8, 2);
 334   SHA_RND3(B, C, D, E, A, 0);
 335   SHA_MIX(1, 14, 9, 3);
 336   SHA_RND3(A, B, C, D, E, 1);
 337   SHA_MIX(2, 15, 10, 4);
 338   SHA_RND3(E, A, B, C, D, 2);
 339   SHA_MIX(3, 0, 11, 5);
 340   SHA_RND3(D, E, A, B, C, 3);
 341   SHA_MIX(4, 1, 12, 6);
 342   SHA_RND3(C, D, E, A, B, 4);
 343   SHA_MIX(5, 2, 13, 7);
 344   SHA_RND3(B, C, D, E, A, 5);
 345   SHA_MIX(6, 3, 14, 8);
 346   SHA_RND3(A, B, C, D, E, 6);
 347   SHA_MIX(7, 4, 15, 9);
 348   SHA_RND3(E, A, B, C, D, 7);
 349   SHA_MIX(8, 5, 0, 10);
 350   SHA_RND3(D, E, A, B, C, 8);
 351   SHA_MIX(9, 6, 1, 11);
 352   SHA_RND3(C, D, E, A, B, 9);
 353   SHA_MIX(10, 7, 2, 12);
 354   SHA_RND3(B, C, D, E, A, 10);
 355   SHA_MIX(11, 8, 3, 13);
 356   SHA_RND3(A, B, C, D, E, 11);
 357
 358   SHA_MIX(12, 9, 4, 14);
 359   SHA_RND4(E, A, B, C, D, 12);
 360   SHA_MIX(13, 10, 5, 15);
 361   SHA_RND4(D, E, A, B, C, 13);
 362   SHA_MIX(14, 11, 6, 0);
 363   SHA_RND4(C, D, E, A, B, 14);
 364   SHA_MIX(15, 12, 7, 1);
 365   SHA_RND4(B, C, D, E, A, 15);
 366
 367   SHA_MIX(0, 13, 8, 2);
 368   SHA_RND4(A, B, C, D, E, 0);
 369   SHA_MIX(1, 14, 9, 3);
 370   SHA_RND4(E, A, B, C, D, 1);
 371   SHA_MIX(2, 15, 10, 4);
 372   SHA_RND4(D, E, A, B, C, 2);
 373   SHA_MIX(3, 0, 11, 5);
 374   SHA_RND4(C, D, E, A, B, 3);
 375   SHA_MIX(4, 1, 12, 6);
 376   SHA_RND4(B, C, D, E, A, 4);
 377   SHA_MIX(5, 2, 13, 7);
 378   SHA_RND4(A, B, C, D, E, 5);
 379   SHA_MIX(6, 3, 14, 8);
 380   SHA_RND4(E, A, B, C, D, 6);
 381   SHA_MIX(7, 4, 15, 9);
 382   SHA_RND4(D, E, A, B, C, 7);
 383   SHA_MIX(8, 5, 0, 10);
 384   SHA_RND4(C, D, E, A, B, 8);
 385   SHA_MIX(9, 6, 1, 11);
 386   SHA_RND4(B, C, D, E, A, 9);
 387   SHA_MIX(10, 7, 2, 12);
 388   SHA_RND4(A, B, C, D, E, 10);
 389   SHA_MIX(11, 8, 3, 13);
 390   SHA_RND4(E, A, B, C, D, 11);
 391   SHA_MIX(12, 9, 4, 14);
 392   SHA_RND4(D, E, A, B, C, 12);
 393   SHA_MIX(13, 10, 5, 15);
 394   SHA_RND4(C, D, E, A, B, 13);
 395   SHA_MIX(14, 11, 6, 0);
 396   SHA_RND4(B, C, D, E, A, 14);
 397   SHA_MIX(15, 12, 7, 1);
 398   SHA_RND4(A, B, C, D, E, 15);
 399
 400   XH(0) = XH(0) + A;
 401   XH(1) = XH(1) + B;
 402   XH(2) = XH(2) + C;
 403   XH(3) = XH(3) + D;
 404   XH(4) = XH(4) + E;
 405 }