main/aescrypt.c

   1 /*
   2  ---------------------------------------------------------------------------
   3  Copyright (c) 2003, Dr Brian Gladman <brg@gladman.me.uk>, Worcester, UK.
   4  All rights reserved.
   5
   6  LICENSE TERMS
   7
   8  The free distribution and use of this software in both source and binary
   9  form is allowed (with or without changes) provided that:
  10
  11    1. distributions of this source code include the above copyright
  12       notice, this list of conditions and the following disclaimer;
  13
  14    2. distributions in binary form include the above copyright
  15       notice, this list of conditions and the following disclaimer
  16       in the documentation and/or other associated materials;
  17
  18    3. the copyright holder's name is not used to endorse products
  19       built using this software without specific written permission.
  20
  21  ALTERNATIVELY, provided that this notice is retained in full, this product
  22  may be distributed under the terms of the GNU General Public License (GPL),
  23  in which case the provisions of the GPL apply INSTEAD OF those given above.
  24
  25  DISCLAIMER
  26
  27  This software is provided 'as is' with no explicit or implied warranties
  28  in respect of its properties, including, but not limited to, correctness
  29  and/or fitness for purpose.
  30  ---------------------------------------------------------------------------
  31  Issue Date: 26/08/2003
  32
  33 */
  34
  35 /*! \file
  36  *
  37  * \brief  This file contains the code for implementing encryption and decryption
  38  * for AES (Rijndael) for block and key sizes of 16, 24 and 32 bytes. It
  39  * can optionally be replaced by code written in assembler using NASM. For
  40  * further details see the file aesopt.h
  41  *
  42  * \author Dr Brian Gladman <brg@gladman.me.uk>
  43  */
  44
  45 #include "aesopt.h"
  46
  47 #if defined(__cplusplus)
  48 extern "C"
  49 {
  50 #endif
  51
  52 #define si(y,x,k,c) (s(y,c) = word_in(x, c) ^ (k)[c])
  53 #define so(y,x,c)   word_out(y, c, s(x,c))
  54
  55 #if defined(ARRAYS)
  56 #define locals(y,x)     x[4],y[4]
  57 #else
  58 #define locals(y,x)     x##0,x##1,x##2,x##3,y##0,y##1,y##2,y##3
  59 #endif
  60
  61 #define l_copy(y, x)    s(y,0) = s(x,0); s(y,1) = s(x,1); \
  62                         s(y,2) = s(x,2); s(y,3) = s(x,3);
  63 #define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3)
  64 #define state_out(y,x)  so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3)
  65 #define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3)
  66
  67 #if defined(ENCRYPTION) && !defined(AES_ASM)
  68
  69 /* Visual C++ .Net v7.1 provides the fastest encryption code when using
  70    Pentium optimiation with small code but this is poor for decryption
  71    so we need to control this with the following VC++ pragmas
  72 */
  73
  74 #if defined(_MSC_VER)
  75 #pragma optimize( "s", on )
  76 #endif
  77
  78 /* Given the column (c) of the output state variable, the following
  79    macros give the input state variables which are needed in its
  80    computation for each row (r) of the state. All the alternative
  81    macros give the same end values but expand into different ways
  82    of calculating these values.  In particular the complex macro
  83    used for dynamically variable block sizes is designed to expand
  84    to a compile time constant whenever possible but will expand to
  85    conditional clauses on some branches (I am grateful to Frank
  86    Yellin for this construction)
  87 */
  88
  89 #define fwd_var(x,r,c)\
  90  ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\
  91  : r == 1 ? ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))\
  92  : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\
  93  :          ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2)))
  94
  95 #if defined(FT4_SET)
  96 #undef  dec_fmvars
  97 #define fwd_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,n),fwd_var,rf1,c))
  98 #elif defined(FT1_SET)
  99 #undef  dec_fmvars
 100 #define fwd_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ one_table(x,upr,t_use(f,n),fwd_var,rf1,c))
 101 #else
 102 #define fwd_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ fwd_mcol(no_table(x,t_use(s,box),fwd_var,rf1,c)))
 103 #endif
 104
 105 #if defined(FL4_SET)
 106 #define fwd_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,l),fwd_var,rf1,c))
 107 #elif defined(FL1_SET)
 108 #define fwd_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ one_table(x,ups,t_use(f,l),fwd_var,rf1,c))
 109 #else
 110 #define fwd_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ no_table(x,t_use(s,box),fwd_var,rf1,c))
 111 #endif
 112
 113 aes_rval aes_encrypt(const void *in_blk, void *out_blk, const aes_encrypt_ctx cx[1])
 114 {   aes_32t         locals(b0, b1);
 115     const aes_32t   *kp = cx->ks;
 116 #ifdef dec_fmvars
 117     dec_fmvars; /* declare variables for fwd_mcol() if needed */
 118 #endif
 119
 120     aes_32t nr = (kp[45] ^ kp[52] ^ kp[53] ? kp[52] : 14);
 121
 122 #ifdef AES_ERR_CHK
 123     if(   (nr != 10 || !(kp[0] | kp[3] | kp[4]))
 124        && (nr != 12 || !(kp[0] | kp[5] | kp[6]))
 125        && (nr != 14 || !(kp[0] | kp[7] | kp[8])) )
 126         return aes_error;
 127 #endif
 128
 129     state_in(b0, in_blk, kp);
 130
 131 #if (ENC_UNROLL == FULL)
 132
 133     switch(nr)
 134     {
 135     case 14:
 136         round(fwd_rnd,  b1, b0, kp + 1 * N_COLS);
 137         round(fwd_rnd,  b0, b1, kp + 2 * N_COLS);
 138         kp += 2 * N_COLS;
 139     case 12:
 140         round(fwd_rnd,  b1, b0, kp + 1 * N_COLS);
 141         round(fwd_rnd,  b0, b1, kp + 2 * N_COLS);
 142         kp += 2 * N_COLS;
 143     case 10:
 144         round(fwd_rnd,  b1, b0, kp + 1 * N_COLS);
 145         round(fwd_rnd,  b0, b1, kp + 2 * N_COLS);
 146         round(fwd_rnd,  b1, b0, kp + 3 * N_COLS);
 147         round(fwd_rnd,  b0, b1, kp + 4 * N_COLS);
 148         round(fwd_rnd,  b1, b0, kp + 5 * N_COLS);
 149         round(fwd_rnd,  b0, b1, kp + 6 * N_COLS);
 150         round(fwd_rnd,  b1, b0, kp + 7 * N_COLS);
 151         round(fwd_rnd,  b0, b1, kp + 8 * N_COLS);
 152         round(fwd_rnd,  b1, b0, kp + 9 * N_COLS);
 153         round(fwd_lrnd, b0, b1, kp +10 * N_COLS);
 154     }
 155
 156 #else
 157
 158 #if (ENC_UNROLL == PARTIAL)
 159     {   aes_32t    rnd;
 160         for(rnd = 0; rnd < (nr >> 1) - 1; ++rnd)
 161         {
 162             kp += N_COLS;
 163             round(fwd_rnd, b1, b0, kp);
 164             kp += N_COLS;
 165             round(fwd_rnd, b0, b1, kp);
 166         }
 167         kp += N_COLS;
 168         round(fwd_rnd,  b1, b0, kp);
 169 #else
 170     {   aes_32t    rnd;
 171         for(rnd = 0; rnd < nr - 1; ++rnd)
 172         {
 173             kp += N_COLS;
 174             round(fwd_rnd, b1, b0, kp);
 175             l_copy(b0, b1);
 176         }
 177 #endif
 178         kp += N_COLS;
 179         round(fwd_lrnd, b0, b1, kp);
 180     }
 181 #endif
 182
 183     state_out(out_blk, b0);
 184 #ifdef AES_ERR_CHK
 185     return aes_good;
 186 #endif
 187 }
 188
 189 #endif
 190
 191 #if defined(DECRYPTION) && !defined(AES_ASM)
 192
 193 /* Visual C++ .Net v7.1 provides the fastest encryption code when using
 194    Pentium optimiation with small code but this is poor for decryption
 195    so we need to control this with the following VC++ pragmas
 196 */
 197
 198 #if defined(_MSC_VER)
 199 #pragma optimize( "t", on )
 200 #endif
 201
 202 /* Given the column (c) of the output state variable, the following
 203    macros give the input state variables which are needed in its
 204    computation for each row (r) of the state. All the alternative
 205    macros give the same end values but expand into different ways
 206    of calculating these values.  In particular the complex macro
 207    used for dynamically variable block sizes is designed to expand
 208    to a compile time constant whenever possible but will expand to
 209    conditional clauses on some branches (I am grateful to Frank
 210    Yellin for this construction)
 211 */
 212
 213 #define inv_var(x,r,c)\
 214  ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\
 215  : r == 1 ? ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2))\
 216  : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\
 217  :          ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0)))
 218
 219 #if defined(IT4_SET)
 220 #undef  dec_imvars
 221 #define inv_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ four_tables(x,t_use(i,n),inv_var,rf1,c))
 222 #elif defined(IT1_SET)
 223 #undef  dec_imvars
 224 #define inv_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ one_table(x,upr,t_use(i,n),inv_var,rf1,c))
 225 #else
 226 #define inv_rnd(y,x,k,c)    (s(y,c) = inv_mcol((k)[c] ^ no_table(x,t_use(i,box),inv_var,rf1,c)))
 227 #endif
 228
 229 #if defined(IL4_SET)
 230 #define inv_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ four_tables(x,t_use(i,l),inv_var,rf1,c))
 231 #elif defined(IL1_SET)
 232 #define inv_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ one_table(x,ups,t_use(i,l),inv_var,rf1,c))
 233 #else
 234 #define inv_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ no_table(x,t_use(i,box),inv_var,rf1,c))
 235 #endif
 236
 237 aes_rval aes_decrypt(const void *in_blk, void *out_blk, const aes_decrypt_ctx cx[1])
 238 {   aes_32t        locals(b0, b1);
 239 #ifdef dec_imvars
 240     dec_imvars; /* declare variables for inv_mcol() if needed */
 241 #endif
 242
 243     aes_32t nr = (cx->ks[45] ^ cx->ks[52] ^ cx->ks[53] ? cx->ks[52] : 14);
 244     const aes_32t *kp = cx->ks + nr * N_COLS;
 245
 246 #ifdef AES_ERR_CHK
 247     if(   (nr != 10 || !(cx->ks[0] | cx->ks[3] | cx->ks[4]))
 248        && (nr != 12 || !(cx->ks[0] | cx->ks[5] | cx->ks[6]))
 249        && (nr != 14 || !(cx->ks[0] | cx->ks[7] | cx->ks[8])) )
 250         return aes_error;
 251 #endif
 252
 253     state_in(b0, in_blk, kp);
 254
 255 #if (DEC_UNROLL == FULL)
 256
 257     switch(nr)
 258     {
 259     case 14:
 260         round(inv_rnd,  b1, b0, kp -  1 * N_COLS);
 261         round(inv_rnd,  b0, b1, kp -  2 * N_COLS);
 262         kp -= 2 * N_COLS;
 263     case 12:
 264         round(inv_rnd,  b1, b0, kp -  1 * N_COLS);
 265         round(inv_rnd,  b0, b1, kp -  2 * N_COLS);
 266         kp -= 2 * N_COLS;
 267     case 10:
 268         round(inv_rnd,  b1, b0, kp -  1 * N_COLS);
 269         round(inv_rnd,  b0, b1, kp -  2 * N_COLS);
 270         round(inv_rnd,  b1, b0, kp -  3 * N_COLS);
 271         round(inv_rnd,  b0, b1, kp -  4 * N_COLS);
 272         round(inv_rnd,  b1, b0, kp -  5 * N_COLS);
 273         round(inv_rnd,  b0, b1, kp -  6 * N_COLS);
 274         round(inv_rnd,  b1, b0, kp -  7 * N_COLS);
 275         round(inv_rnd,  b0, b1, kp -  8 * N_COLS);
 276         round(inv_rnd,  b1, b0, kp -  9 * N_COLS);
 277         round(inv_lrnd, b0, b1, kp - 10 * N_COLS);
 278     }
 279
 280 #else
 281
 282 #if (DEC_UNROLL == PARTIAL)
 283     {   aes_32t    rnd;
 284         for(rnd = 0; rnd < (nr >> 1) - 1; ++rnd)
 285         {
 286             kp -= N_COLS;
 287             round(inv_rnd, b1, b0, kp);
 288             kp -= N_COLS;
 289             round(inv_rnd, b0, b1, kp);
 290         }
 291         kp -= N_COLS;
 292         round(inv_rnd, b1, b0, kp);
 293 #else
 294     {   aes_32t    rnd;
 295         for(rnd = 0; rnd < nr - 1; ++rnd)
 296         {
 297             kp -= N_COLS;
 298             round(inv_rnd, b1, b0, kp);
 299             l_copy(b0, b1);
 300         }
 301 #endif
 302         kp -= N_COLS;
 303         round(inv_lrnd, b0, b1, kp);
 304     }
 305 #endif
 306
 307     state_out(out_blk, b0);
 308 #ifdef AES_ERR_CHK
 309     return aes_good;
 310 #endif
 311 }
 312
 313 #endif
 314
 315 #if defined(__cplusplus)
 316 }
 317 #endif