1 /* sha1.c - Functions to compute SHA1 message digest of files or
2 memory blocks according to the NIST specification FIPS-180-1.
4 Copyright (C) 2000-2024 Free Software Foundation, Inc.
6 This program is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
20 /* Written by Scott G. Miller
22 Robert Klep <robert@ilse.nl> -- Expansion function fix
32 #ifdef HAVE_X86_SHA1_HW_SUPPORT
33 # include <x86intrin.h>
38 # include "unlocked-io.h"
41 #ifdef WORDS_BIGENDIAN
45 (((n) << 24) | (((n) & 0xff00) << 8) | (((n) >> 8) & 0xff00) | ((n) >> 24))
48 #define BLOCKSIZE 4096
49 #if BLOCKSIZE % 64 != 0
50 # error "invalid BLOCKSIZE"
53 /* This array contains the bytes used to pad the buffer to the next
54 64-byte boundary. (RFC 1321, 3.1: Step 1) */
55 static const unsigned char fillbuf
[64] = { 0x80, 0 /* , 0, 0, ... */ };
58 /* Take a pointer to a 160 bit block of data (five 32 bit ints) and
59 initialize it to the start constants of the SHA1 algorithm. This
60 must be called before using hash in the call to sha1_hash. */
62 sha1_init_ctx (struct sha1_ctx
*ctx
)
70 ctx
->total
[0] = ctx
->total
[1] = 0;
74 /* Put result from CTX in first 20 bytes following RESBUF. The result
75 must be in little endian byte order.
77 IMPORTANT: On some systems it is required that RESBUF is correctly
78 aligned for a 32-bit value. */
80 sha1_read_ctx (const struct sha1_ctx
*ctx
, void *resbuf
)
82 ((sha1_uint32
*) resbuf
)[0] = SWAP (ctx
->A
);
83 ((sha1_uint32
*) resbuf
)[1] = SWAP (ctx
->B
);
84 ((sha1_uint32
*) resbuf
)[2] = SWAP (ctx
->C
);
85 ((sha1_uint32
*) resbuf
)[3] = SWAP (ctx
->D
);
86 ((sha1_uint32
*) resbuf
)[4] = SWAP (ctx
->E
);
91 /* Process the remaining bytes in the internal buffer and the usual
92 prolog according to the standard and write the result to RESBUF.
94 IMPORTANT: On some systems it is required that RESBUF is correctly
95 aligned for a 32-bit value. */
97 sha1_finish_ctx (struct sha1_ctx
*ctx
, void *resbuf
)
99 /* Take yet unprocessed bytes into account. */
100 sha1_uint32 bytes
= ctx
->buflen
;
101 size_t size
= (bytes
< 56) ? 64 / 4 : 64 * 2 / 4;
103 /* Now count remaining bytes. */
104 ctx
->total
[0] += bytes
;
105 if (ctx
->total
[0] < bytes
)
108 /* Put the 64-bit file length in *bits* at the end of the buffer. */
109 ctx
->buffer
[size
- 2] = SWAP ((ctx
->total
[1] << 3) | (ctx
->total
[0] >> 29));
110 ctx
->buffer
[size
- 1] = SWAP (ctx
->total
[0] << 3);
112 memcpy (&((char *) ctx
->buffer
)[bytes
], fillbuf
, (size
- 2) * 4 - bytes
);
114 /* Process last bytes. */
115 sha1_process_block (ctx
->buffer
, size
* 4, ctx
);
117 return sha1_read_ctx (ctx
, resbuf
);
120 /* Compute SHA1 message digest for bytes read from STREAM. The
121 resulting message digest number will be written into the 16 bytes
122 beginning at RESBLOCK. */
124 sha1_stream (FILE *stream
, void *resblock
)
127 char buffer
[BLOCKSIZE
+ 72];
130 /* Initialize the computation context. */
131 sha1_init_ctx (&ctx
);
133 /* Iterate over full file contents. */
136 /* We read the file in blocks of BLOCKSIZE bytes. One call of the
137 computation function processes the whole buffer so that with the
138 next round of the loop another block can be read. */
142 /* Read block. Take care for partial reads. */
145 n
= fread (buffer
+ sum
, 1, BLOCKSIZE
- sum
, stream
);
149 if (sum
== BLOCKSIZE
)
154 /* Check for the error flag IFF N == 0, so that we don't
155 exit the loop after a partial read due to e.g., EAGAIN
159 goto process_partial_block
;
162 /* We've read at least one byte, so ignore errors. But always
163 check for EOF, since feof may be true even though N > 0.
164 Otherwise, we could end up calling fread after EOF. */
166 goto process_partial_block
;
169 /* Process buffer with BLOCKSIZE bytes. Note that
172 sha1_process_block (buffer
, BLOCKSIZE
, &ctx
);
175 process_partial_block
:;
177 /* Process any remaining bytes. */
179 sha1_process_bytes (buffer
, sum
, &ctx
);
181 /* Construct result in desired memory. */
182 sha1_finish_ctx (&ctx
, resblock
);
186 /* Compute SHA1 message digest for LEN bytes beginning at BUFFER. The
187 result is always in little endian byte order, so that a byte-wise
188 output yields to the wanted ASCII representation of the message
191 sha1_buffer (const char *buffer
, size_t len
, void *resblock
)
195 /* Initialize the computation context. */
196 sha1_init_ctx (&ctx
);
198 /* Process whole buffer but last len % 64 bytes. */
199 sha1_process_bytes (buffer
, len
, &ctx
);
201 /* Put result in desired memory area. */
202 return sha1_finish_ctx (&ctx
, resblock
);
206 sha1_process_bytes (const void *buffer
, size_t len
, struct sha1_ctx
*ctx
)
208 /* When we already have some bits in our internal buffer concatenate
209 both inputs first. */
210 if (ctx
->buflen
!= 0)
212 size_t left_over
= ctx
->buflen
;
213 size_t add
= 128 - left_over
> len
? len
: 128 - left_over
;
215 memcpy (&((char *) ctx
->buffer
)[left_over
], buffer
, add
);
218 if (ctx
->buflen
> 64)
220 sha1_process_block (ctx
->buffer
, ctx
->buflen
& ~63, ctx
);
223 /* The regions in the following copy operation cannot overlap. */
225 &((char *) ctx
->buffer
)[(left_over
+ add
) & ~63],
229 buffer
= (const char *) buffer
+ add
;
233 /* Process available complete blocks. */
236 #if !_STRING_ARCH_unaligned
237 # define alignof(type) offsetof (struct { char c; type x; }, x)
238 # define UNALIGNED_P(p) (((size_t) p) % alignof (sha1_uint32) != 0)
239 if (UNALIGNED_P (buffer
))
242 sha1_process_block (memcpy (ctx
->buffer
, buffer
, 64), 64, ctx
);
243 buffer
= (const char *) buffer
+ 64;
249 sha1_process_block (buffer
, len
& ~63, ctx
);
250 buffer
= (const char *) buffer
+ (len
& ~63);
255 /* Move remaining bytes in internal buffer. */
258 size_t left_over
= ctx
->buflen
;
260 memcpy (&((char *) ctx
->buffer
)[left_over
], buffer
, len
);
264 sha1_process_block (ctx
->buffer
, 64, ctx
);
266 memmove (ctx
->buffer
, &ctx
->buffer
[16], left_over
);
268 ctx
->buflen
= left_over
;
272 /* --- Code below is the primary difference between md5.c and sha1.c --- */
274 /* SHA1 round constants */
275 #define K1 0x5a827999
276 #define K2 0x6ed9eba1
277 #define K3 0x8f1bbcdc
278 #define K4 0xca62c1d6
280 /* Round functions. Note that F2 is the same as F4. */
281 #define F1(B,C,D) ( D ^ ( B & ( C ^ D ) ) )
282 #define F2(B,C,D) (B ^ C ^ D)
283 #define F3(B,C,D) ( ( B & C ) | ( D & ( B | C ) ) )
284 #define F4(B,C,D) (B ^ C ^ D)
286 /* Process LEN bytes of BUFFER, accumulating context into CTX.
287 It is assumed that LEN % 64 == 0.
288 Most of this code comes from GnuPG's cipher/sha1.c. */
291 sha1_process_block (const void *buffer
, size_t len
, struct sha1_ctx
*ctx
)
293 const sha1_uint32
*words
= (const sha1_uint32
*) buffer
;
294 size_t nwords
= len
/ sizeof (sha1_uint32
);
295 const sha1_uint32
*endp
= words
+ nwords
;
297 sha1_uint32 a
= ctx
->A
;
298 sha1_uint32 b
= ctx
->B
;
299 sha1_uint32 c
= ctx
->C
;
300 sha1_uint32 d
= ctx
->D
;
301 sha1_uint32 e
= ctx
->E
;
303 /* First increment the byte count. RFC 1321 specifies the possible
304 length of the file up to 2^64 bits. Here we only compute the
305 number of bytes. Do a double word increment. */
306 ctx
->total
[0] += len
;
307 ctx
->total
[1] += ((len
>> 31) >> 1) + (ctx
->total
[0] < len
);
309 #define rol(x, n) (((x) << (n)) | ((sha1_uint32) (x) >> (32 - (n))))
311 #define M(I) ( tm = x[I&0x0f] ^ x[(I-14)&0x0f] \
312 ^ x[(I-8)&0x0f] ^ x[(I-3)&0x0f] \
313 , (x[I&0x0f] = rol(tm, 1)) )
315 #define R(A,B,C,D,E,F,K,M) do { E += rol( A, 5 ) \
326 for (t
= 0; t
< 16; t
++)
328 x
[t
] = SWAP (*words
);
332 R( a
, b
, c
, d
, e
, F1
, K1
, x
[ 0] );
333 R( e
, a
, b
, c
, d
, F1
, K1
, x
[ 1] );
334 R( d
, e
, a
, b
, c
, F1
, K1
, x
[ 2] );
335 R( c
, d
, e
, a
, b
, F1
, K1
, x
[ 3] );
336 R( b
, c
, d
, e
, a
, F1
, K1
, x
[ 4] );
337 R( a
, b
, c
, d
, e
, F1
, K1
, x
[ 5] );
338 R( e
, a
, b
, c
, d
, F1
, K1
, x
[ 6] );
339 R( d
, e
, a
, b
, c
, F1
, K1
, x
[ 7] );
340 R( c
, d
, e
, a
, b
, F1
, K1
, x
[ 8] );
341 R( b
, c
, d
, e
, a
, F1
, K1
, x
[ 9] );
342 R( a
, b
, c
, d
, e
, F1
, K1
, x
[10] );
343 R( e
, a
, b
, c
, d
, F1
, K1
, x
[11] );
344 R( d
, e
, a
, b
, c
, F1
, K1
, x
[12] );
345 R( c
, d
, e
, a
, b
, F1
, K1
, x
[13] );
346 R( b
, c
, d
, e
, a
, F1
, K1
, x
[14] );
347 R( a
, b
, c
, d
, e
, F1
, K1
, x
[15] );
348 R( e
, a
, b
, c
, d
, F1
, K1
, M(16) );
349 R( d
, e
, a
, b
, c
, F1
, K1
, M(17) );
350 R( c
, d
, e
, a
, b
, F1
, K1
, M(18) );
351 R( b
, c
, d
, e
, a
, F1
, K1
, M(19) );
352 R( a
, b
, c
, d
, e
, F2
, K2
, M(20) );
353 R( e
, a
, b
, c
, d
, F2
, K2
, M(21) );
354 R( d
, e
, a
, b
, c
, F2
, K2
, M(22) );
355 R( c
, d
, e
, a
, b
, F2
, K2
, M(23) );
356 R( b
, c
, d
, e
, a
, F2
, K2
, M(24) );
357 R( a
, b
, c
, d
, e
, F2
, K2
, M(25) );
358 R( e
, a
, b
, c
, d
, F2
, K2
, M(26) );
359 R( d
, e
, a
, b
, c
, F2
, K2
, M(27) );
360 R( c
, d
, e
, a
, b
, F2
, K2
, M(28) );
361 R( b
, c
, d
, e
, a
, F2
, K2
, M(29) );
362 R( a
, b
, c
, d
, e
, F2
, K2
, M(30) );
363 R( e
, a
, b
, c
, d
, F2
, K2
, M(31) );
364 R( d
, e
, a
, b
, c
, F2
, K2
, M(32) );
365 R( c
, d
, e
, a
, b
, F2
, K2
, M(33) );
366 R( b
, c
, d
, e
, a
, F2
, K2
, M(34) );
367 R( a
, b
, c
, d
, e
, F2
, K2
, M(35) );
368 R( e
, a
, b
, c
, d
, F2
, K2
, M(36) );
369 R( d
, e
, a
, b
, c
, F2
, K2
, M(37) );
370 R( c
, d
, e
, a
, b
, F2
, K2
, M(38) );
371 R( b
, c
, d
, e
, a
, F2
, K2
, M(39) );
372 R( a
, b
, c
, d
, e
, F3
, K3
, M(40) );
373 R( e
, a
, b
, c
, d
, F3
, K3
, M(41) );
374 R( d
, e
, a
, b
, c
, F3
, K3
, M(42) );
375 R( c
, d
, e
, a
, b
, F3
, K3
, M(43) );
376 R( b
, c
, d
, e
, a
, F3
, K3
, M(44) );
377 R( a
, b
, c
, d
, e
, F3
, K3
, M(45) );
378 R( e
, a
, b
, c
, d
, F3
, K3
, M(46) );
379 R( d
, e
, a
, b
, c
, F3
, K3
, M(47) );
380 R( c
, d
, e
, a
, b
, F3
, K3
, M(48) );
381 R( b
, c
, d
, e
, a
, F3
, K3
, M(49) );
382 R( a
, b
, c
, d
, e
, F3
, K3
, M(50) );
383 R( e
, a
, b
, c
, d
, F3
, K3
, M(51) );
384 R( d
, e
, a
, b
, c
, F3
, K3
, M(52) );
385 R( c
, d
, e
, a
, b
, F3
, K3
, M(53) );
386 R( b
, c
, d
, e
, a
, F3
, K3
, M(54) );
387 R( a
, b
, c
, d
, e
, F3
, K3
, M(55) );
388 R( e
, a
, b
, c
, d
, F3
, K3
, M(56) );
389 R( d
, e
, a
, b
, c
, F3
, K3
, M(57) );
390 R( c
, d
, e
, a
, b
, F3
, K3
, M(58) );
391 R( b
, c
, d
, e
, a
, F3
, K3
, M(59) );
392 R( a
, b
, c
, d
, e
, F4
, K4
, M(60) );
393 R( e
, a
, b
, c
, d
, F4
, K4
, M(61) );
394 R( d
, e
, a
, b
, c
, F4
, K4
, M(62) );
395 R( c
, d
, e
, a
, b
, F4
, K4
, M(63) );
396 R( b
, c
, d
, e
, a
, F4
, K4
, M(64) );
397 R( a
, b
, c
, d
, e
, F4
, K4
, M(65) );
398 R( e
, a
, b
, c
, d
, F4
, K4
, M(66) );
399 R( d
, e
, a
, b
, c
, F4
, K4
, M(67) );
400 R( c
, d
, e
, a
, b
, F4
, K4
, M(68) );
401 R( b
, c
, d
, e
, a
, F4
, K4
, M(69) );
402 R( a
, b
, c
, d
, e
, F4
, K4
, M(70) );
403 R( e
, a
, b
, c
, d
, F4
, K4
, M(71) );
404 R( d
, e
, a
, b
, c
, F4
, K4
, M(72) );
405 R( c
, d
, e
, a
, b
, F4
, K4
, M(73) );
406 R( b
, c
, d
, e
, a
, F4
, K4
, M(74) );
407 R( a
, b
, c
, d
, e
, F4
, K4
, M(75) );
408 R( e
, a
, b
, c
, d
, F4
, K4
, M(76) );
409 R( d
, e
, a
, b
, c
, F4
, K4
, M(77) );
410 R( c
, d
, e
, a
, b
, F4
, K4
, M(78) );
411 R( b
, c
, d
, e
, a
, F4
, K4
, M(79) );
421 #if defined(HAVE_X86_SHA1_HW_SUPPORT)
422 /* HW specific version of sha1_process_bytes. */
424 static void sha1_hw_process_block (const void *, size_t, struct sha1_ctx
*);
427 sha1_hw_process_bytes (const void *buffer
, size_t len
, struct sha1_ctx
*ctx
)
429 /* When we already have some bits in our internal buffer concatenate
430 both inputs first. */
431 if (ctx
->buflen
!= 0)
433 size_t left_over
= ctx
->buflen
;
434 size_t add
= 128 - left_over
> len
? len
: 128 - left_over
;
436 memcpy (&((char *) ctx
->buffer
)[left_over
], buffer
, add
);
439 if (ctx
->buflen
> 64)
441 sha1_hw_process_block (ctx
->buffer
, ctx
->buflen
& ~63, ctx
);
444 /* The regions in the following copy operation cannot overlap. */
446 &((char *) ctx
->buffer
)[(left_over
+ add
) & ~63],
450 buffer
= (const char *) buffer
+ add
;
454 /* Process available complete blocks. */
457 #if !_STRING_ARCH_unaligned
458 # define alignof(type) offsetof (struct { char c; type x; }, x)
459 # define UNALIGNED_P(p) (((size_t) p) % alignof (sha1_uint32) != 0)
460 if (UNALIGNED_P (buffer
))
463 sha1_hw_process_block (memcpy (ctx
->buffer
, buffer
, 64), 64, ctx
);
464 buffer
= (const char *) buffer
+ 64;
470 sha1_hw_process_block (buffer
, len
& ~63, ctx
);
471 buffer
= (const char *) buffer
+ (len
& ~63);
476 /* Move remaining bytes in internal buffer. */
479 size_t left_over
= ctx
->buflen
;
481 memcpy (&((char *) ctx
->buffer
)[left_over
], buffer
, len
);
485 sha1_hw_process_block (ctx
->buffer
, 64, ctx
);
487 memmove (ctx
->buffer
, &ctx
->buffer
[16], left_over
);
489 ctx
->buflen
= left_over
;
493 /* Process LEN bytes of BUFFER, accumulating context into CTX.
494 Using CPU specific intrinsics. */
496 #ifdef HAVE_X86_SHA1_HW_SUPPORT
497 __attribute__((__target__ ("sse4.1,sha")))
500 sha1_hw_process_block (const void *buffer
, size_t len
, struct sha1_ctx
*ctx
)
502 #ifdef HAVE_X86_SHA1_HW_SUPPORT
504 https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sha-extensions.html */
505 const __m128i
*words
= (const __m128i
*) buffer
;
506 const __m128i
*endp
= (const __m128i
*) ((const char *) buffer
+ len
);
507 __m128i abcd
, abcd_save
, e0
, e0_save
, e1
, msg0
, msg1
, msg2
, msg3
;
508 const __m128i shuf_mask
509 = _mm_set_epi64x (0x0001020304050607ULL
, 0x08090a0b0c0d0e0fULL
);
510 char check
[((offsetof (struct sha1_ctx
, B
)
511 == offsetof (struct sha1_ctx
, A
) + sizeof (ctx
->A
))
512 && (offsetof (struct sha1_ctx
, C
)
513 == offsetof (struct sha1_ctx
, A
) + 2 * sizeof (ctx
->A
))
514 && (offsetof (struct sha1_ctx
, D
)
515 == offsetof (struct sha1_ctx
, A
) + 3 * sizeof (ctx
->A
)))
518 /* First increment the byte count. RFC 1321 specifies the possible
519 length of the file up to 2^64 bits. Here we only compute the
520 number of bytes. Do a double word increment. */
521 ctx
->total
[0] += len
;
522 ctx
->total
[1] += ((len
>> 31) >> 1) + (ctx
->total
[0] < len
);
525 abcd
= _mm_loadu_si128 ((const __m128i
*) &ctx
->A
);
526 e0
= _mm_set_epi32 (ctx
->E
, 0, 0, 0);
527 abcd
= _mm_shuffle_epi32 (abcd
, 0x1b); /* 0, 1, 2, 3 */
535 msg0
= _mm_loadu_si128 (words
);
536 msg0
= _mm_shuffle_epi8 (msg0
, shuf_mask
);
537 e0
= _mm_add_epi32 (e0
, msg0
);
539 abcd
= _mm_sha1rnds4_epu32 (abcd
, e0
, 0);
542 msg1
= _mm_loadu_si128 (words
+ 1);
543 msg1
= _mm_shuffle_epi8 (msg1
, shuf_mask
);
544 e1
= _mm_sha1nexte_epu32 (e1
, msg1
);
546 abcd
= _mm_sha1rnds4_epu32 (abcd
, e1
, 0);
547 msg0
= _mm_sha1msg1_epu32 (msg0
, msg1
);
550 msg2
= _mm_loadu_si128 (words
+ 2);
551 msg2
= _mm_shuffle_epi8 (msg2
, shuf_mask
);
552 e0
= _mm_sha1nexte_epu32 (e0
, msg2
);
554 abcd
= _mm_sha1rnds4_epu32 (abcd
, e0
, 0);
555 msg1
= _mm_sha1msg1_epu32 (msg1
, msg2
);
556 msg0
= _mm_xor_si128 (msg0
, msg2
);
559 msg3
= _mm_loadu_si128 (words
+ 3);
560 msg3
= _mm_shuffle_epi8 (msg3
, shuf_mask
);
561 e1
= _mm_sha1nexte_epu32 (e1
, msg3
);
563 msg0
= _mm_sha1msg2_epu32 (msg0
, msg3
);
564 abcd
= _mm_sha1rnds4_epu32 (abcd
, e1
, 0);
565 msg2
= _mm_sha1msg1_epu32 (msg2
, msg3
);
566 msg1
= _mm_xor_si128 (msg1
, msg3
);
569 e0
= _mm_sha1nexte_epu32 (e0
, msg0
);
571 msg1
= _mm_sha1msg2_epu32 (msg1
, msg0
);
572 abcd
= _mm_sha1rnds4_epu32 (abcd
, e0
, 0);
573 msg3
= _mm_sha1msg1_epu32 (msg3
, msg0
);
574 msg2
= _mm_xor_si128 (msg2
, msg0
);
577 e1
= _mm_sha1nexte_epu32 (e1
, msg1
);
579 msg2
= _mm_sha1msg2_epu32 (msg2
, msg1
);
580 abcd
= _mm_sha1rnds4_epu32 (abcd
, e1
, 1);
581 msg0
= _mm_sha1msg1_epu32 (msg0
, msg1
);
582 msg3
= _mm_xor_si128 (msg3
, msg1
);
585 e0
= _mm_sha1nexte_epu32 (e0
, msg2
);
587 msg3
= _mm_sha1msg2_epu32 (msg3
, msg2
);
588 abcd
= _mm_sha1rnds4_epu32 (abcd
, e0
, 1);
589 msg1
= _mm_sha1msg1_epu32 (msg1
, msg2
);
590 msg0
= _mm_xor_si128 (msg0
, msg2
);
593 e1
= _mm_sha1nexte_epu32 (e1
, msg3
);
595 msg0
= _mm_sha1msg2_epu32 (msg0
, msg3
);
596 abcd
= _mm_sha1rnds4_epu32 (abcd
, e1
, 1);
597 msg2
= _mm_sha1msg1_epu32 (msg2
, msg3
);
598 msg1
= _mm_xor_si128 (msg1
, msg3
);
601 e0
= _mm_sha1nexte_epu32 (e0
, msg0
);
603 msg1
= _mm_sha1msg2_epu32 (msg1
, msg0
);
604 abcd
= _mm_sha1rnds4_epu32 (abcd
, e0
, 1);
605 msg3
= _mm_sha1msg1_epu32 (msg3
, msg0
);
606 msg2
= _mm_xor_si128 (msg2
, msg0
);
609 e1
= _mm_sha1nexte_epu32 (e1
, msg1
);
611 msg2
= _mm_sha1msg2_epu32 (msg2
, msg1
);
612 abcd
= _mm_sha1rnds4_epu32 (abcd
, e1
, 1);
613 msg0
= _mm_sha1msg1_epu32 (msg0
, msg1
);
614 msg3
= _mm_xor_si128 (msg3
, msg1
);
617 e0
= _mm_sha1nexte_epu32 (e0
, msg2
);
619 msg3
= _mm_sha1msg2_epu32 (msg3
, msg2
);
620 abcd
= _mm_sha1rnds4_epu32 (abcd
, e0
, 2);
621 msg1
= _mm_sha1msg1_epu32 (msg1
, msg2
);
622 msg0
= _mm_xor_si128 (msg0
, msg2
);
625 e1
= _mm_sha1nexte_epu32 (e1
, msg3
);
627 msg0
= _mm_sha1msg2_epu32 (msg0
, msg3
);
628 abcd
= _mm_sha1rnds4_epu32 (abcd
, e1
, 2);
629 msg2
= _mm_sha1msg1_epu32 (msg2
, msg3
);
630 msg1
= _mm_xor_si128 (msg1
, msg3
);
633 e0
= _mm_sha1nexte_epu32 (e0
, msg0
);
635 msg1
= _mm_sha1msg2_epu32 (msg1
, msg0
);
636 abcd
= _mm_sha1rnds4_epu32 (abcd
, e0
, 2);
637 msg3
= _mm_sha1msg1_epu32 (msg3
, msg0
);
638 msg2
= _mm_xor_si128 (msg2
, msg0
);
641 e1
= _mm_sha1nexte_epu32 (e1
, msg1
);
643 msg2
= _mm_sha1msg2_epu32 (msg2
, msg1
);
644 abcd
= _mm_sha1rnds4_epu32 (abcd
, e1
, 2);
645 msg0
= _mm_sha1msg1_epu32 (msg0
, msg1
);
646 msg3
= _mm_xor_si128 (msg3
, msg1
);
649 e0
= _mm_sha1nexte_epu32 (e0
, msg2
);
651 msg3
= _mm_sha1msg2_epu32 (msg3
, msg2
);
652 abcd
= _mm_sha1rnds4_epu32 (abcd
, e0
, 2);
653 msg1
= _mm_sha1msg1_epu32 (msg1
, msg2
);
654 msg0
= _mm_xor_si128 (msg0
, msg2
);
657 e1
= _mm_sha1nexte_epu32 (e1
, msg3
);
659 msg0
= _mm_sha1msg2_epu32 (msg0
, msg3
);
660 abcd
= _mm_sha1rnds4_epu32 (abcd
, e1
, 3);
661 msg2
= _mm_sha1msg1_epu32 (msg2
, msg3
);
662 msg1
= _mm_xor_si128 (msg1
, msg3
);
665 e0
= _mm_sha1nexte_epu32 (e0
, msg0
);
667 msg1
= _mm_sha1msg2_epu32 (msg1
, msg0
);
668 abcd
= _mm_sha1rnds4_epu32 (abcd
, e0
, 3);
669 msg3
= _mm_sha1msg1_epu32 (msg3
, msg0
);
670 msg2
= _mm_xor_si128 (msg2
, msg0
);
673 e1
= _mm_sha1nexte_epu32 (e1
, msg1
);
675 msg2
= _mm_sha1msg2_epu32 (msg2
, msg1
);
676 abcd
= _mm_sha1rnds4_epu32 (abcd
, e1
, 3);
677 msg3
= _mm_xor_si128 (msg3
, msg1
);
680 e0
= _mm_sha1nexte_epu32 (e0
, msg2
);
682 msg3
= _mm_sha1msg2_epu32 (msg3
, msg2
);
683 abcd
= _mm_sha1rnds4_epu32 (abcd
, e0
, 3);
686 e1
= _mm_sha1nexte_epu32 (e1
, msg3
);
688 abcd
= _mm_sha1rnds4_epu32 (abcd
, e1
, 3);
691 e0
= _mm_sha1nexte_epu32 (e0
, e0_save
);
692 abcd
= _mm_add_epi32 (abcd
, abcd_save
);
697 abcd
= _mm_shuffle_epi32 (abcd
, 0x1b); /* 0, 1, 2, 3 */
698 _mm_storeu_si128 ((__m128i
*) &ctx
->A
, abcd
);
699 ctx
->E
= _mm_extract_epi32 (e0
, 3);
704 /* Return sha1_process_bytes or some hardware optimized version thereof
705 depending on current CPU. */
707 sha1_process_bytes_fn
708 sha1_choose_process_bytes (void)
710 #ifdef HAVE_X86_SHA1_HW_SUPPORT
711 unsigned int eax
, ebx
, ecx
, edx
;
712 if (__get_cpuid_count (7, 0, &eax
, &ebx
, &ecx
, &edx
)
713 && (ebx
& bit_SHA
) != 0
714 && __get_cpuid (1, &eax
, &ebx
, &ecx
, &edx
)
715 && (ecx
& bit_SSE4_1
) != 0)
716 return sha1_hw_process_bytes
;
718 return sha1_process_bytes
;