Bug 1817240 - Cherry-pick ANGLE skylake clearview fix. r=gfx-reviewers,lsalzman
[gecko.git] / mfbt / SHA1.cpp
blob2a315c5c063f1713281687d3a1291c06e399c3bd
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include "mozilla/Assertions.h"
8 #include "mozilla/EndianUtils.h"
9 #include "mozilla/SHA1.h"
11 #include <string.h>
13 using mozilla::NativeEndian;
14 using mozilla::SHA1Sum;
16 static inline uint32_t SHA_ROTL(uint32_t aT, uint32_t aN) {
17 MOZ_ASSERT(aN < 32);
18 return (aT << aN) | (aT >> (32 - aN));
21 static void shaCompress(volatile unsigned* aX, const uint32_t* aBuf);
23 #define SHA_F1(X, Y, Z) ((((Y) ^ (Z)) & (X)) ^ (Z))
24 #define SHA_F2(X, Y, Z) ((X) ^ (Y) ^ (Z))
25 #define SHA_F3(X, Y, Z) (((X) & (Y)) | ((Z) & ((X) | (Y))))
26 #define SHA_F4(X, Y, Z) ((X) ^ (Y) ^ (Z))
28 #define SHA_MIX(n, a, b, c) XW(n) = SHA_ROTL(XW(a) ^ XW(b) ^ XW(c) ^ XW(n), 1)
30 SHA1Sum::SHA1Sum() : mSize(0), mDone(false) {
31 // Initialize H with constants from FIPS180-1.
32 mH[0] = 0x67452301L;
33 mH[1] = 0xefcdab89L;
34 mH[2] = 0x98badcfeL;
35 mH[3] = 0x10325476L;
36 mH[4] = 0xc3d2e1f0L;
40 * Explanation of H array and index values:
42 * The context's H array is actually the concatenation of two arrays
43 * defined by SHA1, the H array of state variables (5 elements),
44 * and the W array of intermediate values, of which there are 16 elements.
45 * The W array starts at H[5], that is W[0] is H[5].
46 * Although these values are defined as 32-bit values, we use 64-bit
47 * variables to hold them because the AMD64 stores 64 bit values in
48 * memory MUCH faster than it stores any smaller values.
50 * Rather than passing the context structure to shaCompress, we pass
51 * this combined array of H and W values. We do not pass the address
52 * of the first element of this array, but rather pass the address of an
53 * element in the middle of the array, element X. Presently X[0] is H[11].
54 * So we pass the address of H[11] as the address of array X to shaCompress.
55 * Then shaCompress accesses the members of the array using positive AND
56 * negative indexes.
58 * Pictorially: (each element is 8 bytes)
59 * H | H0 H1 H2 H3 H4 W0 W1 W2 W3 W4 W5 W6 W7 W8 W9 Wa Wb Wc Wd We Wf |
60 * X |-11-10 -9 -8 -7 -6 -5 -4 -3 -2 -1 X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 |
62 * The byte offset from X[0] to any member of H and W is always
63 * representable in a signed 8-bit value, which will be encoded
64 * as a single byte offset in the X86-64 instruction set.
65 * If we didn't pass the address of H[11], and instead passed the
66 * address of H[0], the offsets to elements H[16] and above would be
67 * greater than 127, not representable in a signed 8-bit value, and the
68 * x86-64 instruction set would encode every such offset as a 32-bit
69 * signed number in each instruction that accessed element H[16] or
70 * higher. This results in much bigger and slower code.
72 #define H2X 11 /* X[0] is H[11], and H[0] is X[-11] */
73 #define W2X 6 /* X[0] is W[6], and W[0] is X[-6] */
76 * SHA: Add data to context.
78 void SHA1Sum::update(const void* aData, uint32_t aLen) {
79 MOZ_ASSERT(!mDone, "SHA1Sum can only be used to compute a single hash.");
81 const uint8_t* data = static_cast<const uint8_t*>(aData);
83 if (aLen == 0) {
84 return;
87 /* Accumulate the byte count. */
88 unsigned int lenB = static_cast<unsigned int>(mSize) & 63U;
90 mSize += aLen;
92 /* Read the data into W and process blocks as they get full. */
93 unsigned int togo;
94 if (lenB > 0) {
95 togo = 64U - lenB;
96 if (aLen < togo) {
97 togo = aLen;
99 memcpy(mU.mB + lenB, data, togo);
100 aLen -= togo;
101 data += togo;
102 lenB = (lenB + togo) & 63U;
103 if (!lenB) {
104 shaCompress(&mH[H2X], mU.mW);
108 while (aLen >= 64U) {
109 aLen -= 64U;
110 shaCompress(&mH[H2X], reinterpret_cast<const uint32_t*>(data));
111 data += 64U;
114 if (aLen > 0) {
115 memcpy(mU.mB, data, aLen);
120 * SHA: Generate hash value
122 void SHA1Sum::finish(SHA1Sum::Hash& aHashOut) {
123 MOZ_ASSERT(!mDone, "SHA1Sum can only be used to compute a single hash.");
125 uint64_t size = mSize;
126 uint32_t lenB = uint32_t(size) & 63;
128 static const uint8_t bulk_pad[64] = {
129 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
130 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
131 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
133 /* Pad with a binary 1 (e.g. 0x80), then zeroes, then length in bits. */
134 update(bulk_pad, (((55 + 64) - lenB) & 63) + 1);
135 MOZ_ASSERT((uint32_t(mSize) & 63) == 56);
137 /* Convert size from bytes to bits. */
138 size <<= 3;
139 mU.mW[14] = NativeEndian::swapToBigEndian(uint32_t(size >> 32));
140 mU.mW[15] = NativeEndian::swapToBigEndian(uint32_t(size));
141 shaCompress(&mH[H2X], mU.mW);
143 /* Output hash. */
144 mU.mW[0] = NativeEndian::swapToBigEndian(mH[0]);
145 mU.mW[1] = NativeEndian::swapToBigEndian(mH[1]);
146 mU.mW[2] = NativeEndian::swapToBigEndian(mH[2]);
147 mU.mW[3] = NativeEndian::swapToBigEndian(mH[3]);
148 mU.mW[4] = NativeEndian::swapToBigEndian(mH[4]);
149 memcpy(aHashOut, mU.mW, 20);
150 mDone = true;
154 * SHA: Compression function, unrolled.
156 * Some operations in shaCompress are done as 5 groups of 16 operations.
157 * Others are done as 4 groups of 20 operations.
158 * The code below shows that structure.
160 * The functions that compute the new values of the 5 state variables
161 * A-E are done in 4 groups of 20 operations (or you may also think
162 * of them as being done in 16 groups of 5 operations). They are
163 * done by the SHA_RNDx macros below, in the right column.
165 * The functions that set the 16 values of the W array are done in
166 * 5 groups of 16 operations. The first group is done by the
167 * LOAD macros below, the latter 4 groups are done by SHA_MIX below,
168 * in the left column.
170 * gcc's optimizer observes that each member of the W array is assigned
171 * a value 5 times in this code. It reduces the number of store
172 * operations done to the W array in the context (that is, in the X array)
173 * by creating a W array on the stack, and storing the W values there for
174 * the first 4 groups of operations on W, and storing the values in the
175 * context's W array only in the fifth group. This is undesirable.
176 * It is MUCH bigger code than simply using the context's W array, because
177 * all the offsets to the W array in the stack are 32-bit signed offsets,
178 * and it is no faster than storing the values in the context's W array.
180 * The original code for sha_fast.c prevented this creation of a separate
181 * W array in the stack by creating a W array of 80 members, each of
182 * whose elements is assigned only once. It also separated the computations
183 * of the W array values and the computations of the values for the 5
184 * state variables into two separate passes, W's, then A-E's so that the
185 * second pass could be done all in registers (except for accessing the W
186 * array) on machines with fewer registers. The method is suboptimal
187 * for machines with enough registers to do it all in one pass, and it
188 * necessitates using many instructions with 32-bit offsets.
190 * This code eliminates the separate W array on the stack by a completely
191 * different means: by declaring the X array volatile. This prevents
192 * the optimizer from trying to reduce the use of the X array by the
193 * creation of a MORE expensive W array on the stack. The result is
194 * that all instructions use signed 8-bit offsets and not 32-bit offsets.
196 * The combination of this code and the -O3 optimizer flag on GCC 3.4.3
197 * results in code that is 3 times faster than the previous NSS sha_fast
198 * code on AMD64.
200 static void shaCompress(volatile unsigned* aX, const uint32_t* aBuf) {
201 unsigned A, B, C, D, E;
203 #define XH(n) aX[n - H2X]
204 #define XW(n) aX[n - W2X]
206 #define K0 0x5a827999L
207 #define K1 0x6ed9eba1L
208 #define K2 0x8f1bbcdcL
209 #define K3 0xca62c1d6L
211 #define SHA_RND1(a, b, c, d, e, n) \
212 a = SHA_ROTL(b, 5) + SHA_F1(c, d, e) + a + XW(n) + K0; \
213 c = SHA_ROTL(c, 30)
214 #define SHA_RND2(a, b, c, d, e, n) \
215 a = SHA_ROTL(b, 5) + SHA_F2(c, d, e) + a + XW(n) + K1; \
216 c = SHA_ROTL(c, 30)
217 #define SHA_RND3(a, b, c, d, e, n) \
218 a = SHA_ROTL(b, 5) + SHA_F3(c, d, e) + a + XW(n) + K2; \
219 c = SHA_ROTL(c, 30)
220 #define SHA_RND4(a, b, c, d, e, n) \
221 a = SHA_ROTL(b, 5) + SHA_F4(c, d, e) + a + XW(n) + K3; \
222 c = SHA_ROTL(c, 30)
224 #define LOAD(n) XW(n) = NativeEndian::swapToBigEndian(aBuf[n])
226 A = XH(0);
227 B = XH(1);
228 C = XH(2);
229 D = XH(3);
230 E = XH(4);
232 LOAD(0);
233 SHA_RND1(E, A, B, C, D, 0);
234 LOAD(1);
235 SHA_RND1(D, E, A, B, C, 1);
236 LOAD(2);
237 SHA_RND1(C, D, E, A, B, 2);
238 LOAD(3);
239 SHA_RND1(B, C, D, E, A, 3);
240 LOAD(4);
241 SHA_RND1(A, B, C, D, E, 4);
242 LOAD(5);
243 SHA_RND1(E, A, B, C, D, 5);
244 LOAD(6);
245 SHA_RND1(D, E, A, B, C, 6);
246 LOAD(7);
247 SHA_RND1(C, D, E, A, B, 7);
248 LOAD(8);
249 SHA_RND1(B, C, D, E, A, 8);
250 LOAD(9);
251 SHA_RND1(A, B, C, D, E, 9);
252 LOAD(10);
253 SHA_RND1(E, A, B, C, D, 10);
254 LOAD(11);
255 SHA_RND1(D, E, A, B, C, 11);
256 LOAD(12);
257 SHA_RND1(C, D, E, A, B, 12);
258 LOAD(13);
259 SHA_RND1(B, C, D, E, A, 13);
260 LOAD(14);
261 SHA_RND1(A, B, C, D, E, 14);
262 LOAD(15);
263 SHA_RND1(E, A, B, C, D, 15);
265 SHA_MIX(0, 13, 8, 2);
266 SHA_RND1(D, E, A, B, C, 0);
267 SHA_MIX(1, 14, 9, 3);
268 SHA_RND1(C, D, E, A, B, 1);
269 SHA_MIX(2, 15, 10, 4);
270 SHA_RND1(B, C, D, E, A, 2);
271 SHA_MIX(3, 0, 11, 5);
272 SHA_RND1(A, B, C, D, E, 3);
274 SHA_MIX(4, 1, 12, 6);
275 SHA_RND2(E, A, B, C, D, 4);
276 SHA_MIX(5, 2, 13, 7);
277 SHA_RND2(D, E, A, B, C, 5);
278 SHA_MIX(6, 3, 14, 8);
279 SHA_RND2(C, D, E, A, B, 6);
280 SHA_MIX(7, 4, 15, 9);
281 SHA_RND2(B, C, D, E, A, 7);
282 SHA_MIX(8, 5, 0, 10);
283 SHA_RND2(A, B, C, D, E, 8);
284 SHA_MIX(9, 6, 1, 11);
285 SHA_RND2(E, A, B, C, D, 9);
286 SHA_MIX(10, 7, 2, 12);
287 SHA_RND2(D, E, A, B, C, 10);
288 SHA_MIX(11, 8, 3, 13);
289 SHA_RND2(C, D, E, A, B, 11);
290 SHA_MIX(12, 9, 4, 14);
291 SHA_RND2(B, C, D, E, A, 12);
292 SHA_MIX(13, 10, 5, 15);
293 SHA_RND2(A, B, C, D, E, 13);
294 SHA_MIX(14, 11, 6, 0);
295 SHA_RND2(E, A, B, C, D, 14);
296 SHA_MIX(15, 12, 7, 1);
297 SHA_RND2(D, E, A, B, C, 15);
299 SHA_MIX(0, 13, 8, 2);
300 SHA_RND2(C, D, E, A, B, 0);
301 SHA_MIX(1, 14, 9, 3);
302 SHA_RND2(B, C, D, E, A, 1);
303 SHA_MIX(2, 15, 10, 4);
304 SHA_RND2(A, B, C, D, E, 2);
305 SHA_MIX(3, 0, 11, 5);
306 SHA_RND2(E, A, B, C, D, 3);
307 SHA_MIX(4, 1, 12, 6);
308 SHA_RND2(D, E, A, B, C, 4);
309 SHA_MIX(5, 2, 13, 7);
310 SHA_RND2(C, D, E, A, B, 5);
311 SHA_MIX(6, 3, 14, 8);
312 SHA_RND2(B, C, D, E, A, 6);
313 SHA_MIX(7, 4, 15, 9);
314 SHA_RND2(A, B, C, D, E, 7);
316 SHA_MIX(8, 5, 0, 10);
317 SHA_RND3(E, A, B, C, D, 8);
318 SHA_MIX(9, 6, 1, 11);
319 SHA_RND3(D, E, A, B, C, 9);
320 SHA_MIX(10, 7, 2, 12);
321 SHA_RND3(C, D, E, A, B, 10);
322 SHA_MIX(11, 8, 3, 13);
323 SHA_RND3(B, C, D, E, A, 11);
324 SHA_MIX(12, 9, 4, 14);
325 SHA_RND3(A, B, C, D, E, 12);
326 SHA_MIX(13, 10, 5, 15);
327 SHA_RND3(E, A, B, C, D, 13);
328 SHA_MIX(14, 11, 6, 0);
329 SHA_RND3(D, E, A, B, C, 14);
330 SHA_MIX(15, 12, 7, 1);
331 SHA_RND3(C, D, E, A, B, 15);
333 SHA_MIX(0, 13, 8, 2);
334 SHA_RND3(B, C, D, E, A, 0);
335 SHA_MIX(1, 14, 9, 3);
336 SHA_RND3(A, B, C, D, E, 1);
337 SHA_MIX(2, 15, 10, 4);
338 SHA_RND3(E, A, B, C, D, 2);
339 SHA_MIX(3, 0, 11, 5);
340 SHA_RND3(D, E, A, B, C, 3);
341 SHA_MIX(4, 1, 12, 6);
342 SHA_RND3(C, D, E, A, B, 4);
343 SHA_MIX(5, 2, 13, 7);
344 SHA_RND3(B, C, D, E, A, 5);
345 SHA_MIX(6, 3, 14, 8);
346 SHA_RND3(A, B, C, D, E, 6);
347 SHA_MIX(7, 4, 15, 9);
348 SHA_RND3(E, A, B, C, D, 7);
349 SHA_MIX(8, 5, 0, 10);
350 SHA_RND3(D, E, A, B, C, 8);
351 SHA_MIX(9, 6, 1, 11);
352 SHA_RND3(C, D, E, A, B, 9);
353 SHA_MIX(10, 7, 2, 12);
354 SHA_RND3(B, C, D, E, A, 10);
355 SHA_MIX(11, 8, 3, 13);
356 SHA_RND3(A, B, C, D, E, 11);
358 SHA_MIX(12, 9, 4, 14);
359 SHA_RND4(E, A, B, C, D, 12);
360 SHA_MIX(13, 10, 5, 15);
361 SHA_RND4(D, E, A, B, C, 13);
362 SHA_MIX(14, 11, 6, 0);
363 SHA_RND4(C, D, E, A, B, 14);
364 SHA_MIX(15, 12, 7, 1);
365 SHA_RND4(B, C, D, E, A, 15);
367 SHA_MIX(0, 13, 8, 2);
368 SHA_RND4(A, B, C, D, E, 0);
369 SHA_MIX(1, 14, 9, 3);
370 SHA_RND4(E, A, B, C, D, 1);
371 SHA_MIX(2, 15, 10, 4);
372 SHA_RND4(D, E, A, B, C, 2);
373 SHA_MIX(3, 0, 11, 5);
374 SHA_RND4(C, D, E, A, B, 3);
375 SHA_MIX(4, 1, 12, 6);
376 SHA_RND4(B, C, D, E, A, 4);
377 SHA_MIX(5, 2, 13, 7);
378 SHA_RND4(A, B, C, D, E, 5);
379 SHA_MIX(6, 3, 14, 8);
380 SHA_RND4(E, A, B, C, D, 6);
381 SHA_MIX(7, 4, 15, 9);
382 SHA_RND4(D, E, A, B, C, 7);
383 SHA_MIX(8, 5, 0, 10);
384 SHA_RND4(C, D, E, A, B, 8);
385 SHA_MIX(9, 6, 1, 11);
386 SHA_RND4(B, C, D, E, A, 9);
387 SHA_MIX(10, 7, 2, 12);
388 SHA_RND4(A, B, C, D, E, 10);
389 SHA_MIX(11, 8, 3, 13);
390 SHA_RND4(E, A, B, C, D, 11);
391 SHA_MIX(12, 9, 4, 14);
392 SHA_RND4(D, E, A, B, C, 12);
393 SHA_MIX(13, 10, 5, 15);
394 SHA_RND4(C, D, E, A, B, 13);
395 SHA_MIX(14, 11, 6, 0);
396 SHA_RND4(B, C, D, E, A, 14);
397 SHA_MIX(15, 12, 7, 1);
398 SHA_RND4(A, B, C, D, E, 15);
400 XH(0) = XH(0) + A;
401 XH(1) = XH(1) + B;
402 XH(2) = XH(2) + C;
403 XH(3) = XH(3) + D;
404 XH(4) = XH(4) + E;