Upload UI
[lsnes.git] / src / library / sha256.cpp
blob71e413c3e4c9b825b947d51e64d9956abb96e0d8
1 #include "sha256.hpp"
2 #include <cstdint>
3 #include <sstream>
4 #include <iostream>
5 #include <iomanip>
6 #include "arch-detect.hpp"
8 //Since this isn't used for anything too performance-sensitive, just write a implementation, no need to specially
9 //optimize.
11 namespace
13 //Initial state of SHA256.
14 const uint32_t sha256_initial_state[] = {
15 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
18 //The round constants.
19 const uint32_t k[] = {
20 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
21 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
22 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
23 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
24 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
25 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
26 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
27 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
30 template<unsigned p>
31 inline uint32_t rotate_r(uint32_t num)
33 return (num >> p) | (num << (32 - p));
36 inline uint32_t sigma0(uint32_t num)
38 return rotate_r<2>(num) ^ rotate_r<13>(num) ^ rotate_r<22>(num);
41 inline uint32_t sigma1(uint32_t num)
43 return rotate_r<6>(num) ^ rotate_r<11>(num) ^ rotate_r<25>(num);
46 inline uint32_t esigma0(uint32_t num)
48 return rotate_r<7>(num) ^ rotate_r<18>(num) ^ (num >> 3);
51 inline uint32_t esigma1(uint32_t num)
53 return rotate_r<17>(num) ^ rotate_r<19>(num) ^ (num >> 10);
56 inline uint32_t majority(uint32_t a, uint32_t b, uint32_t c)
58 return ((a & b) ^ (a & c) ^ (b & c));
61 inline uint32_t choose(uint32_t k, uint32_t a, uint32_t b)
63 return (k & a) | ((~k) & b);
66 std::string format32(uint32_t num)
68 std::ostringstream y;
69 y << std::hex << std::setw(8) << std::setfill('0') << num;
70 return y.str();
73 #define SHOW(a,b,c,d,e,f,g,h) "\t" << format32(a) << "\t" << format32(b) << "\t" << format32(c) << "\t" << format32(d) << "\t" << format32(e) << "\t" << format32(f) << "\t" << format32(g) << "\t" << format32(h)
75 #define WROUND(i, shift) \
76 Xsigma0 = esigma0(datablock[(i + shift + 1) & 15]); \
77 Xsigma1 = esigma1(datablock[(i + shift + 14) & 15]); \
78 datablock[(i + shift) & 15] += Xsigma0 + Xsigma1 + datablock[(i + shift + 9) & 15];
80 #define ROUND(a,b,c,d,e,f,g,h, i, l) \
81 X = h + k[i | l] + datablock[(i & 8) | l] + sigma1(e) + choose(e, f, g); \
82 h = X + sigma0(a) + majority(a, b, c); \
83 d += X; \
85 #define ROUND8A(a, b, c, d, e, f, g, h, i) \
86 ROUND(a, b, c, d, e, f, g, h, i, 0); \
87 ROUND(h, a, b, c, d, e, f, g, i, 1); \
88 ROUND(g, h, a, b, c, d, e, f, i, 2); \
89 ROUND(f, g, h, a, b, c, d, e, i, 3); \
90 ROUND(e, f, g, h, a, b, c, d, i, 4); \
91 ROUND(d, e, f, g, h, a, b, c, i, 5); \
92 ROUND(c, d, e, f, g, h, a, b, i, 6); \
93 ROUND(b, c, d, e, f, g, h, a, i, 7)
95 #define ROUND8B(a, b, c, d, e, f, g, h, i) \
96 WROUND(i, 0); \
97 ROUND(a, b, c, d, e, f, g, h, i, 0); \
98 WROUND(i, 1); \
99 ROUND(h, a, b, c, d, e, f, g, i, 1); \
100 WROUND(i, 2); \
101 ROUND(g, h, a, b, c, d, e, f, i, 2); \
102 WROUND(i, 3); \
103 ROUND(f, g, h, a, b, c, d, e, i, 3); \
104 WROUND(i, 4); \
105 ROUND(e, f, g, h, a, b, c, d, i, 4); \
106 WROUND(i, 5); \
107 ROUND(d, e, f, g, h, a, b, c, i, 5); \
108 WROUND(i, 6); \
109 ROUND(c, d, e, f, g, h, a, b, i, 6); \
110 WROUND(i, 7); \
111 ROUND(b, c, d, e, f, g, h, a, i, 7)
114 void compress_sha256(uint32_t* state, uint32_t* datablock, unsigned& blockbytes)
116 uint32_t a = state[0];
117 uint32_t b = state[1];
118 uint32_t c = state[2];
119 uint32_t d = state[3];
120 uint32_t e = state[4];
121 uint32_t f = state[5];
122 uint32_t g = state[6];
123 uint32_t h = state[7];
124 uint32_t X, Xsigma0, Xsigma1;
125 ROUND8A(a, b, c, d, e, f, g, h, 0);
126 ROUND8A(a, b, c, d, e, f, g, h, 8);
127 ROUND8B(a, b, c, d, e, f, g, h, 16);
128 ROUND8B(a, b, c, d, e, f, g, h, 24);
129 ROUND8B(a, b, c, d, e, f, g, h, 32);
130 ROUND8B(a, b, c, d, e, f, g, h, 40);
131 ROUND8B(a, b, c, d, e, f, g, h, 48);
132 ROUND8B(a, b, c, d, e, f, g, h, 56);
133 state[0] += a;
134 state[1] += b;
135 state[2] += c;
136 state[3] += d;
137 state[4] += e;
138 state[5] += f;
139 state[6] += g;
140 state[7] += h;
141 memset(datablock, 0, 64);
142 blockbytes = 0;
146 void sha256::real_init()
148 for(unsigned i = 0; i < 8; i++)
149 state[i] = sha256_initial_state[i];
150 memset(datablock, 0, sizeof(datablock));
151 blockbytes = 0;
152 totalbytes = 0;
155 void sha256::real_destroy()
159 void sha256::real_finish(uint8_t* hash)
161 datablock[blockbytes / 4] |= (static_cast<uint32_t>(0x80) << (24 - blockbytes % 4 * 8));
162 if(blockbytes > 55)
163 //We can't fit the length into this block.
164 compress_sha256(state, datablock, blockbytes);
165 //Write the length.
166 datablock[14] = totalbytes >> 29;
167 datablock[15] = totalbytes << 3;
168 compress_sha256(state, datablock, blockbytes);
169 for(unsigned i = 0; i < 32; i++)
170 hash[i] = state[i / 4] >> (24 - i % 4 * 8);
173 void sha256::real_write(const uint8_t* data, size_t datalen)
175 #ifdef ARCH_IS_I386
176 //First pad blockbytes to multiple of four.
177 size_t i = 0;
178 while(blockbytes & 3 && i < datalen) {
179 datablock[blockbytes / 4] |= (static_cast<uint32_t>(data[i]) << (24 - blockbytes % 4 * 8));
180 blockbytes++;
181 if(blockbytes == 64)
182 compress_sha256(state, datablock, blockbytes);
183 i++;
185 size_t blocks = (datalen - i) / 4;
186 unsigned ptr = blockbytes / 4;
187 //Then process four bytes ata time.
188 for(size_t j = 0; j < blocks; j++) {
189 uint32_t x = *reinterpret_cast<const uint32_t*>(data + i);
190 asm("bswap %0" : "+r"(x));
191 datablock[ptr] = x;
192 ptr = (ptr + 1) & 15;
193 i += 4;
194 blockbytes += 4;
195 if(blockbytes == 64)
196 compress_sha256(state, datablock, blockbytes);
198 //And finally process tail.
199 while(i < datalen) {
200 datablock[blockbytes / 4] |= (static_cast<uint32_t>(data[i]) << (24 - blockbytes % 4 * 8));
201 blockbytes++;
202 if(blockbytes == 64)
203 compress_sha256(state, datablock, blockbytes);
204 i++;
206 #else
207 for(size_t i = 0; i < datalen; i++) {
208 datablock[blockbytes / 4] |= (static_cast<uint32_t>(data[i]) << (24 - blockbytes % 4 * 8));
209 blockbytes++;
210 if(blockbytes == 64)
211 compress_sha256(state, datablock, blockbytes);
213 #endif
214 totalbytes += datalen;
217 #ifdef SHA256_SELFTEST
219 #define TEST_LOOPS 100000
220 #define TEST_DATASET 4096
221 #include <sys/time.h>
223 int main(int argc, char** argv)
225 sha256 i;
226 i.write(argv[1], strlen(argv[1]));
227 i.write(argv[2], strlen(argv[2]));
228 std::cerr << i.read() << std::endl;
229 struct timeval t1;
230 struct timeval t2;
231 char buffer[TEST_DATASET] = {0};
232 gettimeofday(&t1, NULL);
233 sha256 i2;
234 for(unsigned j = 0; j < TEST_LOOPS; j++)
235 i2.write(buffer, TEST_DATASET);
236 gettimeofday(&t2, NULL);
237 uint64_t _t1 = (uint64_t)t1.tv_sec * 1000000 + t1.tv_usec;
238 uint64_t _t2 = (uint64_t)t2.tv_sec * 1000000 + t2.tv_usec;
239 std::cerr << "Hashing performance: " << static_cast<double>(TEST_LOOPS * TEST_DATASET) / (_t2 - _t1)
240 << "MB/s." << std::endl;
243 #endif