7 #include "arch-detect.hpp"
9 //Since this isn't used for anything too performance-sensitive, just write a implementation, no need to specially
14 //Initial state of SHA256.
15 const uint32_t sha256_initial_state
[] = {
16 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
19 //The round constants.
20 const uint32_t k
[] = {
21 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
22 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
23 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
24 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
25 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
26 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
27 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
28 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
32 inline uint32_t rotate_r(uint32_t num
)
34 return (num
>> p
) | (num
<< (32 - p
));
37 inline uint32_t sigma0(uint32_t num
)
39 return rotate_r
<2>(num
) ^ rotate_r
<13>(num
) ^ rotate_r
<22>(num
);
42 inline uint32_t sigma1(uint32_t num
)
44 return rotate_r
<6>(num
) ^ rotate_r
<11>(num
) ^ rotate_r
<25>(num
);
47 inline uint32_t esigma0(uint32_t num
)
49 return rotate_r
<7>(num
) ^ rotate_r
<18>(num
) ^ (num
>> 3);
52 inline uint32_t esigma1(uint32_t num
)
54 return rotate_r
<17>(num
) ^ rotate_r
<19>(num
) ^ (num
>> 10);
57 inline uint32_t majority(uint32_t a
, uint32_t b
, uint32_t c
)
59 return ((a
& b
) ^ (a
& c
) ^ (b
& c
));
62 inline uint32_t choose(uint32_t k
, uint32_t a
, uint32_t b
)
64 return (k
& a
) | ((~k
) & b
);
67 #define SHOW(a,b,c,d,e,f,g,h) "\t" << hex::to32(a) << "\t" << hex::to32(b) << "\t" << hex::to32(c) << "\t" \
68 << hex::to32(d) << "\t" << hex::to32(e) << "\t" << hex::to32(f) << "\t" << hex::to32(g) << "\t" \
71 #define WROUND(i, shift) \
72 Xsigma0 = esigma0(datablock[(i + shift + 1) & 15]); \
73 Xsigma1 = esigma1(datablock[(i + shift + 14) & 15]); \
74 datablock[(i + shift) & 15] += Xsigma0 + Xsigma1 + datablock[(i + shift + 9) & 15];
76 #define ROUND(a,b,c,d,e,f,g,h, i, l) \
77 X = h + k[i | l] + datablock[(i & 8) | l] + sigma1(e) + choose(e, f, g); \
78 h = X + sigma0(a) + majority(a, b, c); \
81 #define ROUND8A(a, b, c, d, e, f, g, h, i) \
82 ROUND(a, b, c, d, e, f, g, h, i, 0); \
83 ROUND(h, a, b, c, d, e, f, g, i, 1); \
84 ROUND(g, h, a, b, c, d, e, f, i, 2); \
85 ROUND(f, g, h, a, b, c, d, e, i, 3); \
86 ROUND(e, f, g, h, a, b, c, d, i, 4); \
87 ROUND(d, e, f, g, h, a, b, c, i, 5); \
88 ROUND(c, d, e, f, g, h, a, b, i, 6); \
89 ROUND(b, c, d, e, f, g, h, a, i, 7)
91 #define ROUND8B(a, b, c, d, e, f, g, h, i) \
93 ROUND(a, b, c, d, e, f, g, h, i, 0); \
95 ROUND(h, a, b, c, d, e, f, g, i, 1); \
97 ROUND(g, h, a, b, c, d, e, f, i, 2); \
99 ROUND(f, g, h, a, b, c, d, e, i, 3); \
101 ROUND(e, f, g, h, a, b, c, d, i, 4); \
103 ROUND(d, e, f, g, h, a, b, c, i, 5); \
105 ROUND(c, d, e, f, g, h, a, b, i, 6); \
107 ROUND(b, c, d, e, f, g, h, a, i, 7)
110 void compress_sha256(uint32_t* state
, uint32_t* datablock
, unsigned& blockbytes
)
112 uint32_t a
= state
[0];
113 uint32_t b
= state
[1];
114 uint32_t c
= state
[2];
115 uint32_t d
= state
[3];
116 uint32_t e
= state
[4];
117 uint32_t f
= state
[5];
118 uint32_t g
= state
[6];
119 uint32_t h
= state
[7];
120 uint32_t X
, Xsigma0
, Xsigma1
;
121 ROUND8A(a
, b
, c
, d
, e
, f
, g
, h
, 0);
122 ROUND8A(a
, b
, c
, d
, e
, f
, g
, h
, 8);
123 ROUND8B(a
, b
, c
, d
, e
, f
, g
, h
, 16);
124 ROUND8B(a
, b
, c
, d
, e
, f
, g
, h
, 24);
125 ROUND8B(a
, b
, c
, d
, e
, f
, g
, h
, 32);
126 ROUND8B(a
, b
, c
, d
, e
, f
, g
, h
, 40);
127 ROUND8B(a
, b
, c
, d
, e
, f
, g
, h
, 48);
128 ROUND8B(a
, b
, c
, d
, e
, f
, g
, h
, 56);
137 memset(datablock
, 0, 64);
142 void sha256::real_init()
144 for(unsigned i
= 0; i
< 8; i
++)
145 state
[i
] = sha256_initial_state
[i
];
146 memset(datablock
, 0, sizeof(datablock
));
151 void sha256::real_destroy()
155 void sha256::real_finish(uint8_t* hash
)
157 datablock
[blockbytes
/ 4] |= (static_cast<uint32_t>(0x80) << (24 - blockbytes
% 4 * 8));
159 //We can't fit the length into this block.
160 compress_sha256(state
, datablock
, blockbytes
);
162 datablock
[14] = totalbytes
>> 29;
163 datablock
[15] = totalbytes
<< 3;
164 compress_sha256(state
, datablock
, blockbytes
);
165 for(unsigned i
= 0; i
< 32; i
++)
166 hash
[i
] = state
[i
/ 4] >> (24 - i
% 4 * 8);
169 void sha256::real_write(const uint8_t* data
, size_t datalen
)
172 //First pad blockbytes to multiple of four.
174 while(blockbytes
& 3 && i
< datalen
) {
175 datablock
[blockbytes
/ 4] |= (static_cast<uint32_t>(data
[i
]) << (24 - blockbytes
% 4 * 8));
178 compress_sha256(state
, datablock
, blockbytes
);
181 size_t blocks
= (datalen
- i
) / 4;
182 unsigned ptr
= blockbytes
/ 4;
183 //Then process four bytes ata time.
184 for(size_t j
= 0; j
< blocks
; j
++) {
185 uint32_t x
= *reinterpret_cast<const uint32_t*>(data
+ i
);
186 asm("bswap %0" : "+r"(x
));
188 ptr
= (ptr
+ 1) & 15;
192 compress_sha256(state
, datablock
, blockbytes
);
194 //And finally process tail.
196 datablock
[blockbytes
/ 4] |= (static_cast<uint32_t>(data
[i
]) << (24 - blockbytes
% 4 * 8));
199 compress_sha256(state
, datablock
, blockbytes
);
203 for(size_t i
= 0; i
< datalen
; i
++) {
204 datablock
[blockbytes
/ 4] |= (static_cast<uint32_t>(data
[i
]) << (24 - blockbytes
% 4 * 8));
207 compress_sha256(state
, datablock
, blockbytes
);
210 totalbytes
+= datalen
;
213 #ifdef SHA256_SELFTEST
215 #define TEST_LOOPS 100000
216 #define TEST_DATASET 4096
217 #include <sys/time.h>
219 int main(int argc
, char** argv
)
222 i
.write(argv
[1], strlen(argv
[1]));
223 i
.write(argv
[2], strlen(argv
[2]));
224 std::cerr
<< i
.read() << std::endl
;
227 char buffer
[TEST_DATASET
] = {0};
228 gettimeofday(&t1
, NULL
);
230 for(unsigned j
= 0; j
< TEST_LOOPS
; j
++)
231 i2
.write(buffer
, TEST_DATASET
);
232 gettimeofday(&t2
, NULL
);
233 uint64_t _t1
= (uint64_t)t1
.tv_sec
* 1000000 + t1
.tv_usec
;
234 uint64_t _t2
= (uint64_t)t2
.tv_sec
* 1000000 + t2
.tv_usec
;
235 std::cerr
<< "Hashing performance: " << static_cast<double>(TEST_LOOPS
* TEST_DATASET
) / (_t2
- _t1
)
236 << "MB/s." << std::endl
;