Lua: Don't lua_error() out of context with pending dtors
[lsnes.git] / src / library / sha256.cpp
blob0575a47559cf9fdbd1ea8fce2d95f45c2002bfac
1 #include "sha256.hpp"
2 #include "hex.hpp"
3 #include <cstdint>
4 #include <sstream>
5 #include <iostream>
6 #include <iomanip>
7 #include "arch-detect.hpp"
9 //Since this isn't used for anything too performance-sensitive, just write a implementation, no need to specially
10 //optimize.
12 namespace
14 //Initial state of SHA256.
15 const uint32_t sha256_initial_state[] = {
16 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
19 //The round constants.
20 const uint32_t k[] = {
21 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
22 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
23 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
24 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
25 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
26 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
27 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
28 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
31 template<unsigned p>
32 inline uint32_t rotate_r(uint32_t num)
34 return (num >> p) | (num << (32 - p));
37 inline uint32_t sigma0(uint32_t num)
39 return rotate_r<2>(num) ^ rotate_r<13>(num) ^ rotate_r<22>(num);
42 inline uint32_t sigma1(uint32_t num)
44 return rotate_r<6>(num) ^ rotate_r<11>(num) ^ rotate_r<25>(num);
47 inline uint32_t esigma0(uint32_t num)
49 return rotate_r<7>(num) ^ rotate_r<18>(num) ^ (num >> 3);
52 inline uint32_t esigma1(uint32_t num)
54 return rotate_r<17>(num) ^ rotate_r<19>(num) ^ (num >> 10);
57 inline uint32_t majority(uint32_t a, uint32_t b, uint32_t c)
59 return ((a & b) ^ (a & c) ^ (b & c));
62 inline uint32_t choose(uint32_t k, uint32_t a, uint32_t b)
64 return (k & a) | ((~k) & b);
67 #define SHOW(a,b,c,d,e,f,g,h) "\t" << hex::to32(a) << "\t" << hex::to32(b) << "\t" << hex::to32(c) << "\t" \
68 << hex::to32(d) << "\t" << hex::to32(e) << "\t" << hex::to32(f) << "\t" << hex::to32(g) << "\t" \
69 << hex::to32(h)
71 #define WROUND(i, shift) \
72 Xsigma0 = esigma0(datablock[(i + shift + 1) & 15]); \
73 Xsigma1 = esigma1(datablock[(i + shift + 14) & 15]); \
74 datablock[(i + shift) & 15] += Xsigma0 + Xsigma1 + datablock[(i + shift + 9) & 15];
76 #define ROUND(a,b,c,d,e,f,g,h, i, l) \
77 X = h + k[i | l] + datablock[(i & 8) | l] + sigma1(e) + choose(e, f, g); \
78 h = X + sigma0(a) + majority(a, b, c); \
79 d += X; \
81 #define ROUND8A(a, b, c, d, e, f, g, h, i) \
82 ROUND(a, b, c, d, e, f, g, h, i, 0); \
83 ROUND(h, a, b, c, d, e, f, g, i, 1); \
84 ROUND(g, h, a, b, c, d, e, f, i, 2); \
85 ROUND(f, g, h, a, b, c, d, e, i, 3); \
86 ROUND(e, f, g, h, a, b, c, d, i, 4); \
87 ROUND(d, e, f, g, h, a, b, c, i, 5); \
88 ROUND(c, d, e, f, g, h, a, b, i, 6); \
89 ROUND(b, c, d, e, f, g, h, a, i, 7)
91 #define ROUND8B(a, b, c, d, e, f, g, h, i) \
92 WROUND(i, 0); \
93 ROUND(a, b, c, d, e, f, g, h, i, 0); \
94 WROUND(i, 1); \
95 ROUND(h, a, b, c, d, e, f, g, i, 1); \
96 WROUND(i, 2); \
97 ROUND(g, h, a, b, c, d, e, f, i, 2); \
98 WROUND(i, 3); \
99 ROUND(f, g, h, a, b, c, d, e, i, 3); \
100 WROUND(i, 4); \
101 ROUND(e, f, g, h, a, b, c, d, i, 4); \
102 WROUND(i, 5); \
103 ROUND(d, e, f, g, h, a, b, c, i, 5); \
104 WROUND(i, 6); \
105 ROUND(c, d, e, f, g, h, a, b, i, 6); \
106 WROUND(i, 7); \
107 ROUND(b, c, d, e, f, g, h, a, i, 7)
110 void compress_sha256(uint32_t* state, uint32_t* datablock, unsigned& blockbytes)
112 uint32_t a = state[0];
113 uint32_t b = state[1];
114 uint32_t c = state[2];
115 uint32_t d = state[3];
116 uint32_t e = state[4];
117 uint32_t f = state[5];
118 uint32_t g = state[6];
119 uint32_t h = state[7];
120 uint32_t X, Xsigma0, Xsigma1;
121 ROUND8A(a, b, c, d, e, f, g, h, 0);
122 ROUND8A(a, b, c, d, e, f, g, h, 8);
123 ROUND8B(a, b, c, d, e, f, g, h, 16);
124 ROUND8B(a, b, c, d, e, f, g, h, 24);
125 ROUND8B(a, b, c, d, e, f, g, h, 32);
126 ROUND8B(a, b, c, d, e, f, g, h, 40);
127 ROUND8B(a, b, c, d, e, f, g, h, 48);
128 ROUND8B(a, b, c, d, e, f, g, h, 56);
129 state[0] += a;
130 state[1] += b;
131 state[2] += c;
132 state[3] += d;
133 state[4] += e;
134 state[5] += f;
135 state[6] += g;
136 state[7] += h;
137 memset(datablock, 0, 64);
138 blockbytes = 0;
142 void sha256::real_init()
144 for(unsigned i = 0; i < 8; i++)
145 state[i] = sha256_initial_state[i];
146 memset(datablock, 0, sizeof(datablock));
147 blockbytes = 0;
148 totalbytes = 0;
151 void sha256::real_destroy()
155 void sha256::real_finish(uint8_t* hash)
157 datablock[blockbytes / 4] |= (static_cast<uint32_t>(0x80) << (24 - blockbytes % 4 * 8));
158 if(blockbytes > 55)
159 //We can't fit the length into this block.
160 compress_sha256(state, datablock, blockbytes);
161 //Write the length.
162 datablock[14] = totalbytes >> 29;
163 datablock[15] = totalbytes << 3;
164 compress_sha256(state, datablock, blockbytes);
165 for(unsigned i = 0; i < 32; i++)
166 hash[i] = state[i / 4] >> (24 - i % 4 * 8);
169 void sha256::real_write(const uint8_t* data, size_t datalen)
171 #ifdef ARCH_IS_I386
172 //First pad blockbytes to multiple of four.
173 size_t i = 0;
174 while(blockbytes & 3 && i < datalen) {
175 datablock[blockbytes / 4] |= (static_cast<uint32_t>(data[i]) << (24 - blockbytes % 4 * 8));
176 blockbytes++;
177 if(blockbytes == 64)
178 compress_sha256(state, datablock, blockbytes);
179 i++;
181 size_t blocks = (datalen - i) / 4;
182 unsigned ptr = blockbytes / 4;
183 //Then process four bytes ata time.
184 for(size_t j = 0; j < blocks; j++) {
185 uint32_t x = *reinterpret_cast<const uint32_t*>(data + i);
186 asm("bswap %0" : "+r"(x));
187 datablock[ptr] = x;
188 ptr = (ptr + 1) & 15;
189 i += 4;
190 blockbytes += 4;
191 if(blockbytes == 64)
192 compress_sha256(state, datablock, blockbytes);
194 //And finally process tail.
195 while(i < datalen) {
196 datablock[blockbytes / 4] |= (static_cast<uint32_t>(data[i]) << (24 - blockbytes % 4 * 8));
197 blockbytes++;
198 if(blockbytes == 64)
199 compress_sha256(state, datablock, blockbytes);
200 i++;
202 #else
203 for(size_t i = 0; i < datalen; i++) {
204 datablock[blockbytes / 4] |= (static_cast<uint32_t>(data[i]) << (24 - blockbytes % 4 * 8));
205 blockbytes++;
206 if(blockbytes == 64)
207 compress_sha256(state, datablock, blockbytes);
209 #endif
210 totalbytes += datalen;
213 #ifdef SHA256_SELFTEST
215 #define TEST_LOOPS 100000
216 #define TEST_DATASET 4096
217 #include <sys/time.h>
219 int main(int argc, char** argv)
221 sha256 i;
222 i.write(argv[1], strlen(argv[1]));
223 i.write(argv[2], strlen(argv[2]));
224 std::cerr << i.read() << std::endl;
225 struct timeval t1;
226 struct timeval t2;
227 char buffer[TEST_DATASET] = {0};
228 gettimeofday(&t1, NULL);
229 sha256 i2;
230 for(unsigned j = 0; j < TEST_LOOPS; j++)
231 i2.write(buffer, TEST_DATASET);
232 gettimeofday(&t2, NULL);
233 uint64_t _t1 = (uint64_t)t1.tv_sec * 1000000 + t1.tv_usec;
234 uint64_t _t2 = (uint64_t)t2.tv_sec * 1000000 + t2.tv_usec;
235 std::cerr << "Hashing performance: " << static_cast<double>(TEST_LOOPS * TEST_DATASET) / (_t2 - _t1)
236 << "MB/s." << std::endl;
239 #endif