src/library/sha256.cpp

   1 #include "sha256.hpp"
   2 #include "hex.hpp"
   3 #include <cstdint>
   4 #include <sstream>
   5 #include <iostream>
   6 #include <iomanip>
   7 #include "arch-detect.hpp"
   8
   9 //Since this isn't used for anything too performance-sensitive, just write a implementation, no need to specially
  10 //optimize.
  11
  12 namespace
  13 {
  14         //Initial state of SHA256.
  15         const uint32_t sha256_initial_state[] = {
  16                 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
  17         };
  18
  19         //The round constants.
  20         const uint32_t k[] = {
  21                 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
  22                 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
  23                 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
  24                 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
  25                 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
  26                 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
  27                 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
  28                 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
  29         };
  30
  31         template<unsigned p>
  32         inline uint32_t rotate_r(uint32_t num)
  33         {
  34                 return (num >> p) | (num << (32 - p));
  35         }
  36
  37         inline uint32_t sigma0(uint32_t num)
  38         {
  39                 return rotate_r<2>(num) ^ rotate_r<13>(num) ^ rotate_r<22>(num);
  40         }
  41
  42         inline uint32_t sigma1(uint32_t num)
  43         {
  44                 return rotate_r<6>(num) ^ rotate_r<11>(num) ^ rotate_r<25>(num);
  45         }
  46
  47         inline uint32_t esigma0(uint32_t num)
  48         {
  49                 return rotate_r<7>(num) ^ rotate_r<18>(num) ^ (num >> 3);
  50         }
  51
  52         inline uint32_t esigma1(uint32_t num)
  53         {
  54                 return rotate_r<17>(num) ^ rotate_r<19>(num) ^ (num >> 10);
  55         }
  56
  57         inline uint32_t majority(uint32_t a, uint32_t b, uint32_t c)
  58         {
  59                 return ((a & b) ^ (a & c) ^ (b & c));
  60         }
  61
  62         inline uint32_t choose(uint32_t k, uint32_t a, uint32_t b)
  63         {
  64                 return (k & a) | ((~k) & b);
  65         }
  66
  67 #define SHOW(a,b,c,d,e,f,g,h) "\t" << hex::to32(a) << "\t" << hex::to32(b) << "\t" << hex::to32(c) << "\t" \
  68         << hex::to32(d) << "\t" << hex::to32(e) << "\t" << hex::to32(f) << "\t" << hex::to32(g) << "\t" \
  69         << hex::to32(h)
  70
  71 #define WROUND(i, shift) \
  72         Xsigma0 = esigma0(datablock[(i + shift + 1) & 15]); \
  73         Xsigma1 = esigma1(datablock[(i + shift + 14) & 15]); \
  74         datablock[(i + shift) & 15] += Xsigma0 + Xsigma1 + datablock[(i + shift + 9) & 15];
  75
  76 #define ROUND(a,b,c,d,e,f,g,h, i, l) \
  77         X = h + k[i | l] + datablock[(i & 8) | l] + sigma1(e) + choose(e, f, g); \
  78         h = X + sigma0(a) + majority(a, b, c); \
  79         d += X; \
  80
  81 #define ROUND8A(a, b, c, d, e, f, g, h, i) \
  82         ROUND(a, b, c, d, e, f, g, h, i, 0); \
  83         ROUND(h, a, b, c, d, e, f, g, i, 1); \
  84         ROUND(g, h, a, b, c, d, e, f, i, 2); \
  85         ROUND(f, g, h, a, b, c, d, e, i, 3); \
  86         ROUND(e, f, g, h, a, b, c, d, i, 4); \
  87         ROUND(d, e, f, g, h, a, b, c, i, 5); \
  88         ROUND(c, d, e, f, g, h, a, b, i, 6); \
  89         ROUND(b, c, d, e, f, g, h, a, i, 7)
  90
  91 #define ROUND8B(a, b, c, d, e, f, g, h, i) \
  92         WROUND(i, 0); \
  93         ROUND(a, b, c, d, e, f, g, h, i, 0); \
  94         WROUND(i, 1); \
  95         ROUND(h, a, b, c, d, e, f, g, i, 1); \
  96         WROUND(i, 2); \
  97         ROUND(g, h, a, b, c, d, e, f, i, 2); \
  98         WROUND(i, 3); \
  99         ROUND(f, g, h, a, b, c, d, e, i, 3); \
 100         WROUND(i, 4); \
 101         ROUND(e, f, g, h, a, b, c, d, i, 4); \
 102         WROUND(i, 5); \
 103         ROUND(d, e, f, g, h, a, b, c, i, 5); \
 104         WROUND(i, 6); \
 105         ROUND(c, d, e, f, g, h, a, b, i, 6); \
 106         WROUND(i, 7); \
 107         ROUND(b, c, d, e, f, g, h, a, i, 7)
 108
 109
 110         void compress_sha256(uint32_t* state, uint32_t* datablock, unsigned& blockbytes)
 111         {
 112                 uint32_t a = state[0];
 113                 uint32_t b = state[1];
 114                 uint32_t c = state[2];
 115                 uint32_t d = state[3];
 116                 uint32_t e = state[4];
 117                 uint32_t f = state[5];
 118                 uint32_t g = state[6];
 119                 uint32_t h = state[7];
 120                 uint32_t X, Xsigma0, Xsigma1;
 121                 ROUND8A(a, b, c, d, e, f, g, h, 0);
 122                 ROUND8A(a, b, c, d, e, f, g, h, 8);
 123                 ROUND8B(a, b, c, d, e, f, g, h, 16);
 124                 ROUND8B(a, b, c, d, e, f, g, h, 24);
 125                 ROUND8B(a, b, c, d, e, f, g, h, 32);
 126                 ROUND8B(a, b, c, d, e, f, g, h, 40);
 127                 ROUND8B(a, b, c, d, e, f, g, h, 48);
 128                 ROUND8B(a, b, c, d, e, f, g, h, 56);
 129                 state[0] += a;
 130                 state[1] += b;
 131                 state[2] += c;
 132                 state[3] += d;
 133                 state[4] += e;
 134                 state[5] += f;
 135                 state[6] += g;
 136                 state[7] += h;
 137                 memset(datablock, 0, 64);
 138                 blockbytes = 0;
 139         }
 140 }
 141
 142 void sha256::real_init()
 143 {
 144         for(unsigned i = 0; i < 8; i++)
 145                 state[i] = sha256_initial_state[i];
 146         memset(datablock, 0, sizeof(datablock));
 147         blockbytes = 0;
 148         totalbytes = 0;
 149 }
 150
 151 void sha256::real_destroy()
 152 {
 153 }
 154
 155 void sha256::real_finish(uint8_t* hash)
 156 {
 157         datablock[blockbytes / 4] |= (static_cast<uint32_t>(0x80) << (24 - blockbytes % 4 * 8));
 158         if(blockbytes > 55)
 159                 //We can't fit the length into this block.
 160                 compress_sha256(state, datablock, blockbytes);
 161         //Write the length.
 162         datablock[14] = totalbytes >> 29;
 163         datablock[15] = totalbytes << 3;
 164         compress_sha256(state, datablock, blockbytes);
 165         for(unsigned i = 0; i < 32; i++)
 166                 hash[i] = state[i / 4] >> (24 - i % 4 * 8);
 167 }
 168
 169 void sha256::real_write(const uint8_t* data, size_t datalen)
 170 {
 171 #ifdef ARCH_IS_I386
 172         //First pad blockbytes to multiple of four.
 173         size_t i = 0;
 174         while(blockbytes & 3 && i < datalen) {
 175                 datablock[blockbytes / 4] |= (static_cast<uint32_t>(data[i]) << (24 - blockbytes % 4 * 8));
 176                 blockbytes++;
 177                 if(blockbytes == 64)
 178                         compress_sha256(state, datablock, blockbytes);
 179                 i++;
 180         }
 181         size_t blocks = (datalen - i) / 4;
 182         unsigned ptr = blockbytes / 4;
 183         //Then process four bytes ata time.
 184         for(size_t j = 0; j < blocks; j++) {
 185                 uint32_t x = *reinterpret_cast<const uint32_t*>(data + i);
 186                 asm("bswap %0" : "+r"(x));
 187                 datablock[ptr] = x;
 188                 ptr = (ptr + 1) & 15;
 189                 i += 4;
 190                 blockbytes += 4;
 191                 if(blockbytes == 64)
 192                         compress_sha256(state, datablock, blockbytes);
 193         }
 194         //And finally process tail.
 195         while(i < datalen) {
 196                 datablock[blockbytes / 4] |= (static_cast<uint32_t>(data[i]) << (24 - blockbytes % 4 * 8));
 197                 blockbytes++;
 198                 if(blockbytes == 64)
 199                         compress_sha256(state, datablock, blockbytes);
 200                 i++;
 201         }
 202 #else
 203         for(size_t i = 0; i < datalen; i++) {
 204                 datablock[blockbytes / 4] |= (static_cast<uint32_t>(data[i]) << (24 - blockbytes % 4 * 8));
 205                 blockbytes++;
 206                 if(blockbytes == 64)
 207                         compress_sha256(state, datablock, blockbytes);
 208         }
 209 #endif
 210         totalbytes += datalen;
 211 }
 212
 213 #ifdef SHA256_SELFTEST
 214
 215 #define TEST_LOOPS 100000
 216 #define TEST_DATASET 4096
 217 #include <sys/time.h>
 218
 219 int main(int argc, char** argv)
 220 {
 221         sha256 i;
 222         i.write(argv[1], strlen(argv[1]));
 223         i.write(argv[2], strlen(argv[2]));
 224         std::cerr << i.read() << std::endl;
 225         struct timeval t1;
 226         struct timeval t2;
 227         char buffer[TEST_DATASET] = {0};
 228         gettimeofday(&t1, NULL);
 229         sha256 i2;
 230         for(unsigned j = 0; j < TEST_LOOPS; j++)
 231                 i2.write(buffer, TEST_DATASET);
 232         gettimeofday(&t2, NULL);
 233         uint64_t _t1 = (uint64_t)t1.tv_sec * 1000000 + t1.tv_usec;
 234         uint64_t _t2 = (uint64_t)t2.tv_sec * 1000000 + t2.tv_usec;
 235         std::cerr << "Hashing performance: " << static_cast<double>(TEST_LOOPS * TEST_DATASET) / (_t2 - _t1)
 236                 << "MB/s." << std::endl;
 237 }
 238
 239 #endif