2 * Implementation of the GCM polynomial hash in pure software.
4 * I don't know of a faster way to do this in a side-channel safe
5 * manner than by precomputing a giant table and iterating over the
8 * The original GCM reference suggests that you precompute the effects
9 * of multiplying a 128-bit value by the fixed key, in the form of a
10 * table indexed by some number of bits of the input value, so that
11 * you end up computing something of the form
13 * table1[x & 0xFF] ^ table2[(x>>8) & 0xFF] ^ ... ^ table15[(x>>120) & 0xFF]
15 * But that was obviously written before cache and timing leaks were
16 * known about. What's a time-safe approach?
18 * Well, the above technique isn't fixed to 8 bits of input per table.
19 * You could trade off the number of tables against the size of each
20 * table. At one extreme of this tradeoff, you have 128 tables each
21 * indexed by a single input bit - which is to say, you have 128
22 * values, each 128 bits wide, and you XOR together the subset of
23 * those values corresponding to the input bits, which you can do by
24 * making a bitmask out of each input bit using standard constant-
25 * time-coding bit twiddling techniques.
27 * That's pretty unpleasant when GCM is supposed to be a fast
28 * algorithm, but I don't know of a better approach that meets current
29 * security standards! Suggestions welcome, if they can get through
37 * Store a 128-bit value in the most convenient form standard C will
38 * let us, namely two uint64_t giving its most and least significant
45 typedef struct aesgcm_sw
{
48 /* Accumulator for the current evaluation, and mask that will be
49 * XORed in at the end. High */
53 * Table of values to XOR in for each bit, representing the effect
54 * of multiplying by the fixed key. The key itself doesn't need to
55 * be stored separately, because it's never used. (However, it is
56 * also the first entry in the table, so if you _do_ need it,
59 * Table is indexed from the low bit of the input upwards.
61 value128_t table
[128];
64 static bool aesgcm_sw_available(void)
66 return true; /* pure software implementation, always available */
69 static void aesgcm_sw_setkey_impl(aesgcm_sw
*gcm
, const unsigned char *var
)
72 v
.hi
= GET_64BIT_MSB_FIRST(var
);
73 v
.lo
= GET_64BIT_MSB_FIRST(var
+ 8);
76 * Prepare the table. This has to be done in reverse order, so
77 * that the original value of the variable corresponds to
78 * table[127], because AES-GCM works in the bit-reversal of its
79 * logical specification so that's where the logical constant term
80 * lives. (See more detailed comment in aesgcm-ref-poly.c.)
82 for (size_t i
= 0; i
< 128; i
++) {
83 gcm
->table
[127 - i
] = v
;
85 /* Multiply v by x, which means shifting right (bit reversal
86 * again) and then adding 0xE1 at the top if we shifted a 1 out. */
87 uint64_t lobit
= v
.lo
& 1;
88 v
.lo
= (v
.lo
>> 1) ^ (v
.hi
<< 63);
89 v
.hi
= (v
.hi
>> 1) ^ (0xE100000000000000ULL
& -lobit
);
93 static inline void aesgcm_sw_setup(aesgcm_sw
*gcm
, const unsigned char *mask
)
95 gcm
->mask
.hi
= GET_64BIT_MSB_FIRST(mask
);
96 gcm
->mask
.lo
= GET_64BIT_MSB_FIRST(mask
+ 8);
97 gcm
->acc
.hi
= gcm
->acc
.lo
= 0;
100 static inline void aesgcm_sw_coeff(aesgcm_sw
*gcm
, const unsigned char *coeff
)
102 /* XOR in the new coefficient */
103 gcm
->acc
.hi
^= GET_64BIT_MSB_FIRST(coeff
);
104 gcm
->acc
.lo
^= GET_64BIT_MSB_FIRST(coeff
+ 8);
106 /* And now just loop over the bits of acc, making up a new value
107 * by XORing together the entries of 'table' corresponding to set
113 const value128_t
*tableptr
= gcm
->table
;
115 for (size_t i
= 0; i
< 64; i
++) {
116 uint64_t bit
= 1 & gcm
->acc
.lo
;
118 uint64_t mask
= -bit
;
119 out
.hi
^= mask
& tableptr
->hi
;
120 out
.lo
^= mask
& tableptr
->lo
;
123 for (size_t i
= 0; i
< 64; i
++) {
124 uint64_t bit
= 1 & gcm
->acc
.hi
;
126 uint64_t mask
= -bit
;
127 out
.hi
^= mask
& tableptr
->hi
;
128 out
.lo
^= mask
& tableptr
->lo
;
135 static inline void aesgcm_sw_output(aesgcm_sw
*gcm
, unsigned char *output
)
137 PUT_64BIT_MSB_FIRST(output
, gcm
->acc
.hi
^ gcm
->mask
.hi
);
138 PUT_64BIT_MSB_FIRST(output
+ 8, gcm
->acc
.lo
^ gcm
->mask
.lo
);
139 smemclr(&gcm
->acc
, 16);
140 smemclr(&gcm
->mask
, 16);
143 #define AESGCM_FLAVOUR sw
144 #define AESGCM_NAME "unaccelerated"
145 #include "aesgcm-footer.h"