src/TortoisePlink/crypto/aesgcm-sw.c

   1 /*
   2  * Implementation of the GCM polynomial hash in pure software.
   3  *
   4  * I don't know of a faster way to do this in a side-channel safe
   5  * manner than by precomputing a giant table and iterating over the
   6  * whole thing.
   7  *
   8  * The original GCM reference suggests that you precompute the effects
   9  * of multiplying a 128-bit value by the fixed key, in the form of a
  10  * table indexed by some number of bits of the input value, so that
  11  * you end up computing something of the form
  12  *
  13  *   table1[x & 0xFF] ^ table2[(x>>8) & 0xFF] ^ ... ^ table15[(x>>120) & 0xFF]
  14  *
  15  * But that was obviously written before cache and timing leaks were
  16  * known about. What's a time-safe approach?
  17  *
  18  * Well, the above technique isn't fixed to 8 bits of input per table.
  19  * You could trade off the number of tables against the size of each
  20  * table. At one extreme of this tradeoff, you have 128 tables each
  21  * indexed by a single input bit - which is to say, you have 128
  22  * values, each 128 bits wide, and you XOR together the subset of
  23  * those values corresponding to the input bits, which you can do by
  24  * making a bitmask out of each input bit using standard constant-
  25  * time-coding bit twiddling techniques.
  26  *
  27  * That's pretty unpleasant when GCM is supposed to be a fast
  28  * algorithm, but I don't know of a better approach that meets current
  29  * security standards! Suggestions welcome, if they can get through
  30  * testsc.
  31  */
  32
  33 #include "ssh.h"
  34 #include "aesgcm.h"
  35
  36 /*
  37  * Store a 128-bit value in the most convenient form standard C will
  38  * let us, namely two uint64_t giving its most and least significant
  39  * halves.
  40  */
  41 typedef struct {
  42     uint64_t hi, lo;
  43 } value128_t;
  44
  45 typedef struct aesgcm_sw {
  46     AESGCM_COMMON_FIELDS;
  47
  48     /* Accumulator for the current evaluation, and mask that will be
  49      * XORed in at the end. High  */
  50     value128_t acc, mask;
  51
  52     /*
  53      * Table of values to XOR in for each bit, representing the effect
  54      * of multiplying by the fixed key. The key itself doesn't need to
  55      * be stored separately, because it's never used. (However, it is
  56      * also the first entry in the table, so if you _do_ need it,
  57      * there it is.)
  58      *
  59      * Table is indexed from the low bit of the input upwards.
  60      */
  61     value128_t table[128];
  62 } aesgcm_sw;
  63
  64 static bool aesgcm_sw_available(void)
  65 {
  66     return true;  /* pure software implementation, always available */
  67 }
  68
  69 static void aesgcm_sw_setkey_impl(aesgcm_sw *gcm, const unsigned char *var)
  70 {
  71     value128_t v;
  72     v.hi = GET_64BIT_MSB_FIRST(var);
  73     v.lo = GET_64BIT_MSB_FIRST(var + 8);
  74
  75     /*
  76      * Prepare the table. This has to be done in reverse order, so
  77      * that the original value of the variable corresponds to
  78      * table[127], because AES-GCM works in the bit-reversal of its
  79      * logical specification so that's where the logical constant term
  80      * lives. (See more detailed comment in aesgcm-ref-poly.c.)
  81      */
  82     for (size_t i = 0; i < 128; i++) {
  83         gcm->table[127 - i] = v;
  84
  85         /* Multiply v by x, which means shifting right (bit reversal
  86          * again) and then adding 0xE1 at the top if we shifted a 1 out. */
  87         uint64_t lobit = v.lo & 1;
  88         v.lo = (v.lo >> 1) ^ (v.hi << 63);
  89         v.hi = (v.hi >> 1) ^ (0xE100000000000000ULL & -lobit);
  90     }
  91 }
  92
  93 static inline void aesgcm_sw_setup(aesgcm_sw *gcm, const unsigned char *mask)
  94 {
  95     gcm->mask.hi = GET_64BIT_MSB_FIRST(mask);
  96     gcm->mask.lo = GET_64BIT_MSB_FIRST(mask + 8);
  97     gcm->acc.hi = gcm->acc.lo = 0;
  98 }
  99
 100 static inline void aesgcm_sw_coeff(aesgcm_sw *gcm, const unsigned char *coeff)
 101 {
 102     /* XOR in the new coefficient */
 103     gcm->acc.hi ^= GET_64BIT_MSB_FIRST(coeff);
 104     gcm->acc.lo ^= GET_64BIT_MSB_FIRST(coeff + 8);
 105
 106     /* And now just loop over the bits of acc, making up a new value
 107      * by XORing together the entries of 'table' corresponding to set
 108      * bits. */
 109
 110     value128_t out;
 111     out.lo = out.hi = 0;
 112
 113     const value128_t *tableptr = gcm->table;
 114
 115     for (size_t i = 0; i < 64; i++) {
 116         uint64_t bit = 1 & gcm->acc.lo;
 117         gcm->acc.lo >>= 1;
 118         uint64_t mask = -bit;
 119         out.hi ^= mask & tableptr->hi;
 120         out.lo ^= mask & tableptr->lo;
 121         tableptr++;
 122     }
 123     for (size_t i = 0; i < 64; i++) {
 124         uint64_t bit = 1 & gcm->acc.hi;
 125         gcm->acc.hi >>= 1;
 126         uint64_t mask = -bit;
 127         out.hi ^= mask & tableptr->hi;
 128         out.lo ^= mask & tableptr->lo;
 129         tableptr++;
 130     }
 131
 132     gcm->acc = out;
 133 }
 134
 135 static inline void aesgcm_sw_output(aesgcm_sw *gcm, unsigned char *output)
 136 {
 137     PUT_64BIT_MSB_FIRST(output, gcm->acc.hi ^ gcm->mask.hi);
 138     PUT_64BIT_MSB_FIRST(output + 8, gcm->acc.lo ^ gcm->mask.lo);
 139     smemclr(&gcm->acc, 16);
 140     smemclr(&gcm->mask, 16);
 141 }
 142
 143 #define AESGCM_FLAVOUR sw
 144 #define AESGCM_NAME "unaccelerated"
 145 #include "aesgcm-footer.h"