src/crypto/ctaes/ctaes.c

   1  /*********************************************************************
   2  * Copyright (c) 2016 Pieter Wuille                                   *
   3  * Distributed under the MIT software license, see the accompanying   *
   4  * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
   5  **********************************************************************/
   6
   7 /* Constant time, unoptimized, concise, plain C, AES implementation
   8  * Based On:
   9  *   Emilia Kasper and Peter Schwabe, Faster and Timing-Attack Resistant AES-GCM
  10  *   http://www.iacr.org/archive/ches2009/57470001/57470001.pdf
  11  * But using 8 16-bit integers representing a single AES state rather than 8 128-bit
  12  * integers representing 8 AES states.
  13  */
  14
  15 #include "ctaes.h"
  16
  17 /* Slice variable slice_i contains the i'th bit of the 16 state variables in this order:
  18  *  0  1  2  3
  19  *  4  5  6  7
  20  *  8  9 10 11
  21  * 12 13 14 15
  22  */
  23
  24 /** Convert a byte to sliced form, storing it corresponding to given row and column in s */
  25 static void LoadByte(AES_state* s, unsigned char byte, int r, int c) {
  26     int i;
  27     for (i = 0; i < 8; i++) {
  28         s->slice[i] |= (byte & 1) << (r * 4 + c);
  29         byte >>= 1;
  30     }
  31 }
  32
  33 /** Load 16 bytes of data into 8 sliced integers */
  34 static void LoadBytes(AES_state *s, const unsigned char* data16) {
  35     int c;
  36     for (c = 0; c < 4; c++) {
  37         int r;
  38         for (r = 0; r < 4; r++) {
  39             LoadByte(s, *(data16++), r, c);
  40         }
  41     }
  42 }
  43
  44 /** Convert 8 sliced integers into 16 bytes of data */
  45 static void SaveBytes(unsigned char* data16, const AES_state *s) {
  46     int c;
  47     for (c = 0; c < 4; c++) {
  48         int r;
  49         for (r = 0; r < 4; r++) {
  50             int b;
  51             uint8_t v = 0;
  52             for (b = 0; b < 8; b++) {
  53                 v |= ((s->slice[b] >> (r * 4 + c)) & 1) << b;
  54             }
  55             *(data16++) = v;
  56         }
  57     }
  58 }
  59
  60 /* S-box implementation based on the gate logic from:
  61  *   Joan Boyar and Rene Peralta, A depth-16 circuit for the AES S-box.
  62  *   https://eprint.iacr.org/2011/332.pdf
  63 */
  64 static void SubBytes(AES_state *s, int inv) {
  65     /* Load the bit slices */
  66     uint16_t U0 = s->slice[7], U1 = s->slice[6], U2 = s->slice[5], U3 = s->slice[4];
  67     uint16_t U4 = s->slice[3], U5 = s->slice[2], U6 = s->slice[1], U7 = s->slice[0];
  68
  69     uint16_t T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16;
  70     uint16_t T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, D;
  71     uint16_t M1, M6, M11, M13, M15, M20, M21, M22, M23, M25, M37, M38, M39, M40;
  72     uint16_t M41, M42, M43, M44, M45, M46, M47, M48, M49, M50, M51, M52, M53, M54;
  73     uint16_t M55, M56, M57, M58, M59, M60, M61, M62, M63;
  74
  75     if (inv) {
  76         uint16_t R5, R13, R17, R18, R19;
  77         /* Undo linear postprocessing */
  78         T23 = U0 ^ U3;
  79         T22 = ~(U1 ^ U3);
  80         T2 = ~(U0 ^ U1);
  81         T1 = U3 ^ U4;
  82         T24 = ~(U4 ^ U7);
  83         R5 = U6 ^ U7;
  84         T8 = ~(U1 ^ T23);
  85         T19 = T22 ^ R5;
  86         T9 = ~(U7 ^ T1);
  87         T10 = T2 ^ T24;
  88         T13 = T2 ^ R5;
  89         T3 = T1 ^ R5;
  90         T25 = ~(U2 ^ T1);
  91         R13 = U1 ^ U6;
  92         T17 = ~(U2 ^ T19);
  93         T20 = T24 ^ R13;
  94         T4 = U4 ^ T8;
  95         R17 = ~(U2 ^ U5);
  96         R18 = ~(U5 ^ U6);
  97         R19 = ~(U2 ^ U4);
  98         D = U0 ^ R17;
  99         T6 = T22 ^ R17;
 100         T16 = R13 ^ R19;
 101         T27 = T1 ^ R18;
 102         T15 = T10 ^ T27;
 103         T14 = T10 ^ R18;
 104         T26 = T3 ^ T16;
 105     } else {
 106         /* Linear preprocessing. */
 107         T1 = U0 ^ U3;
 108         T2 = U0 ^ U5;
 109         T3 = U0 ^ U6;
 110         T4 = U3 ^ U5;
 111         T5 = U4 ^ U6;
 112         T6 = T1 ^ T5;
 113         T7 = U1 ^ U2;
 114         T8 = U7 ^ T6;
 115         T9 = U7 ^ T7;
 116         T10 = T6 ^ T7;
 117         T11 = U1 ^ U5;
 118         T12 = U2 ^ U5;
 119         T13 = T3 ^ T4;
 120         T14 = T6 ^ T11;
 121         T15 = T5 ^ T11;
 122         T16 = T5 ^ T12;
 123         T17 = T9 ^ T16;
 124         T18 = U3 ^ U7;
 125         T19 = T7 ^ T18;
 126         T20 = T1 ^ T19;
 127         T21 = U6 ^ U7;
 128         T22 = T7 ^ T21;
 129         T23 = T2 ^ T22;
 130         T24 = T2 ^ T10;
 131         T25 = T20 ^ T17;
 132         T26 = T3 ^ T16;
 133         T27 = T1 ^ T12;
 134         D = U7;
 135     }
 136
 137     /* Non-linear transformation (shared between the forward and backward case) */
 138     M1 = T13 & T6;
 139     M6 = T3 & T16;
 140     M11 = T1 & T15;
 141     M13 = (T4 & T27) ^ M11;
 142     M15 = (T2 & T10) ^ M11;
 143     M20 = T14 ^ M1 ^ (T23 & T8) ^ M13;
 144     M21 = (T19 & D) ^ M1 ^ T24 ^ M15;
 145     M22 = T26 ^ M6 ^ (T22 & T9) ^ M13;
 146     M23 = (T20 & T17) ^ M6 ^ M15 ^ T25;
 147     M25 = M22 & M20;
 148     M37 = M21 ^ ((M20 ^ M21) & (M23 ^ M25));
 149     M38 = M20 ^ M25 ^ (M21 | (M20 & M23));
 150     M39 = M23 ^ ((M22 ^ M23) & (M21 ^ M25));
 151     M40 = M22 ^ M25 ^ (M23 | (M21 & M22));
 152     M41 = M38 ^ M40;
 153     M42 = M37 ^ M39;
 154     M43 = M37 ^ M38;
 155     M44 = M39 ^ M40;
 156     M45 = M42 ^ M41;
 157     M46 = M44 & T6;
 158     M47 = M40 & T8;
 159     M48 = M39 & D;
 160     M49 = M43 & T16;
 161     M50 = M38 & T9;
 162     M51 = M37 & T17;
 163     M52 = M42 & T15;
 164     M53 = M45 & T27;
 165     M54 = M41 & T10;
 166     M55 = M44 & T13;
 167     M56 = M40 & T23;
 168     M57 = M39 & T19;
 169     M58 = M43 & T3;
 170     M59 = M38 & T22;
 171     M60 = M37 & T20;
 172     M61 = M42 & T1;
 173     M62 = M45 & T4;
 174     M63 = M41 & T2;
 175
 176     if (inv){
 177         /* Undo linear preprocessing */
 178         uint16_t P0 = M52 ^ M61;
 179         uint16_t P1 = M58 ^ M59;
 180         uint16_t P2 = M54 ^ M62;
 181         uint16_t P3 = M47 ^ M50;
 182         uint16_t P4 = M48 ^ M56;
 183         uint16_t P5 = M46 ^ M51;
 184         uint16_t P6 = M49 ^ M60;
 185         uint16_t P7 = P0 ^ P1;
 186         uint16_t P8 = M50 ^ M53;
 187         uint16_t P9 = M55 ^ M63;
 188         uint16_t P10 = M57 ^ P4;
 189         uint16_t P11 = P0 ^ P3;
 190         uint16_t P12 = M46 ^ M48;
 191         uint16_t P13 = M49 ^ M51;
 192         uint16_t P14 = M49 ^ M62;
 193         uint16_t P15 = M54 ^ M59;
 194         uint16_t P16 = M57 ^ M61;
 195         uint16_t P17 = M58 ^ P2;
 196         uint16_t P18 = M63 ^ P5;
 197         uint16_t P19 = P2 ^ P3;
 198         uint16_t P20 = P4 ^ P6;
 199         uint16_t P22 = P2 ^ P7;
 200         uint16_t P23 = P7 ^ P8;
 201         uint16_t P24 = P5 ^ P7;
 202         uint16_t P25 = P6 ^ P10;
 203         uint16_t P26 = P9 ^ P11;
 204         uint16_t P27 = P10 ^ P18;
 205         uint16_t P28 = P11 ^ P25;
 206         uint16_t P29 = P15 ^ P20;
 207         s->slice[7] = P13 ^ P22;
 208         s->slice[6] = P26 ^ P29;
 209         s->slice[5] = P17 ^ P28;
 210         s->slice[4] = P12 ^ P22;
 211         s->slice[3] = P23 ^ P27;
 212         s->slice[2] = P19 ^ P24;
 213         s->slice[1] = P14 ^ P23;
 214         s->slice[0] = P9 ^ P16;
 215     } else {
 216         /* Linear postprocessing */
 217         uint16_t L0 = M61 ^ M62;
 218         uint16_t L1 = M50 ^ M56;
 219         uint16_t L2 = M46 ^ M48;
 220         uint16_t L3 = M47 ^ M55;
 221         uint16_t L4 = M54 ^ M58;
 222         uint16_t L5 = M49 ^ M61;
 223         uint16_t L6 = M62 ^ L5;
 224         uint16_t L7 = M46 ^ L3;
 225         uint16_t L8 = M51 ^ M59;
 226         uint16_t L9 = M52 ^ M53;
 227         uint16_t L10 = M53 ^ L4;
 228         uint16_t L11 = M60 ^ L2;
 229         uint16_t L12 = M48 ^ M51;
 230         uint16_t L13 = M50 ^ L0;
 231         uint16_t L14 = M52 ^ M61;
 232         uint16_t L15 = M55 ^ L1;
 233         uint16_t L16 = M56 ^ L0;
 234         uint16_t L17 = M57 ^ L1;
 235         uint16_t L18 = M58 ^ L8;
 236         uint16_t L19 = M63 ^ L4;
 237         uint16_t L20 = L0 ^ L1;
 238         uint16_t L21 = L1 ^ L7;
 239         uint16_t L22 = L3 ^ L12;
 240         uint16_t L23 = L18 ^ L2;
 241         uint16_t L24 = L15 ^ L9;
 242         uint16_t L25 = L6 ^ L10;
 243         uint16_t L26 = L7 ^ L9;
 244         uint16_t L27 = L8 ^ L10;
 245         uint16_t L28 = L11 ^ L14;
 246         uint16_t L29 = L11 ^ L17;
 247         s->slice[7] = L6 ^ L24;
 248         s->slice[6] = ~(L16 ^ L26);
 249         s->slice[5] = ~(L19 ^ L28);
 250         s->slice[4] = L6 ^ L21;
 251         s->slice[3] = L20 ^ L22;
 252         s->slice[2] = L25 ^ L29;
 253         s->slice[1] = ~(L13 ^ L27);
 254         s->slice[0] = ~(L6 ^ L23);
 255     }
 256 }
 257
 258 #define BIT_RANGE(from,to) (((1 << ((to) - (from))) - 1) << (from))
 259
 260 #define BIT_RANGE_LEFT(x,from,to,shift) (((x) & BIT_RANGE((from), (to))) << (shift))
 261 #define BIT_RANGE_RIGHT(x,from,to,shift) (((x) & BIT_RANGE((from), (to))) >> (shift))
 262
 263 static void ShiftRows(AES_state* s) {
 264     int i;
 265     for (i = 0; i < 8; i++) {
 266         uint16_t v = s->slice[i];
 267         s->slice[i] =
 268             (v & BIT_RANGE(0, 4)) |
 269             BIT_RANGE_LEFT(v, 4, 5, 3) | BIT_RANGE_RIGHT(v, 5, 8, 1) |
 270             BIT_RANGE_LEFT(v, 8, 10, 2) | BIT_RANGE_RIGHT(v, 10, 12, 2) |
 271             BIT_RANGE_LEFT(v, 12, 15, 1) | BIT_RANGE_RIGHT(v, 15, 16, 3);
 272     }
 273 }
 274
 275 static void InvShiftRows(AES_state* s) {
 276     int i;
 277     for (i = 0; i < 8; i++) {
 278         uint16_t v = s->slice[i];
 279         s->slice[i] =
 280             (v & BIT_RANGE(0, 4)) |
 281             BIT_RANGE_LEFT(v, 4, 7, 1) | BIT_RANGE_RIGHT(v, 7, 8, 3) |
 282             BIT_RANGE_LEFT(v, 8, 10, 2) | BIT_RANGE_RIGHT(v, 10, 12, 2) |
 283             BIT_RANGE_LEFT(v, 12, 13, 3) | BIT_RANGE_RIGHT(v, 13, 16, 1);
 284     }
 285 }
 286
 287 #define ROT(x,b) (((x) >> ((b) * 4)) | ((x) << ((4-(b)) * 4)))
 288
 289 static void MixColumns(AES_state* s, int inv) {
 290     /* The MixColumns transform treats the bytes of the columns of the state as
 291      * coefficients of a 3rd degree polynomial over GF(2^8) and multiplies them
 292      * by the fixed polynomial a(x) = {03}x^3 + {01}x^2 + {01}x + {02}, modulo
 293      * x^4 + {01}.
 294      *
 295      * In the inverse transform, we multiply by the inverse of a(x),
 296      * a^-1(x) = {0b}x^3 + {0d}x^2 + {09}x + {0e}. This is equal to
 297      * a(x) * ({04}x^2 + {05}), so we can reuse the forward transform's code
 298      * (found in OpenSSL's bsaes-x86_64.pl, attributed to Jussi Kivilinna)
 299      *
 300      * In the bitsliced representation, a multiplication of every column by x
 301      * mod x^4 + 1 is simply a right rotation.
 302      */
 303
 304     /* Shared for both directions is a multiplication by a(x), which can be
 305      * rewritten as (x^3 + x^2 + x) + {02}*(x^3 + {01}).
 306      *
 307      * First compute s into the s? variables, (x^3 + {01}) * s into the s?_01
 308      * variables and (x^3 + x^2 + x)*s into the s?_123 variables.
 309      */
 310     uint16_t s0 = s->slice[0], s1 = s->slice[1], s2 = s->slice[2], s3 = s->slice[3];
 311     uint16_t s4 = s->slice[4], s5 = s->slice[5], s6 = s->slice[6], s7 = s->slice[7];
 312     uint16_t s0_01 = s0 ^ ROT(s0, 1), s0_123 = ROT(s0_01, 1) ^ ROT(s0, 3);
 313     uint16_t s1_01 = s1 ^ ROT(s1, 1), s1_123 = ROT(s1_01, 1) ^ ROT(s1, 3);
 314     uint16_t s2_01 = s2 ^ ROT(s2, 1), s2_123 = ROT(s2_01, 1) ^ ROT(s2, 3);
 315     uint16_t s3_01 = s3 ^ ROT(s3, 1), s3_123 = ROT(s3_01, 1) ^ ROT(s3, 3);
 316     uint16_t s4_01 = s4 ^ ROT(s4, 1), s4_123 = ROT(s4_01, 1) ^ ROT(s4, 3);
 317     uint16_t s5_01 = s5 ^ ROT(s5, 1), s5_123 = ROT(s5_01, 1) ^ ROT(s5, 3);
 318     uint16_t s6_01 = s6 ^ ROT(s6, 1), s6_123 = ROT(s6_01, 1) ^ ROT(s6, 3);
 319     uint16_t s7_01 = s7 ^ ROT(s7, 1), s7_123 = ROT(s7_01, 1) ^ ROT(s7, 3);
 320     /* Now compute s = s?_123 + {02} * s?_01. */
 321     s->slice[0] = s7_01 ^ s0_123;
 322     s->slice[1] = s7_01 ^ s0_01 ^ s1_123;
 323     s->slice[2] = s1_01 ^ s2_123;
 324     s->slice[3] = s7_01 ^ s2_01 ^ s3_123;
 325     s->slice[4] = s7_01 ^ s3_01 ^ s4_123;
 326     s->slice[5] = s4_01 ^ s5_123;
 327     s->slice[6] = s5_01 ^ s6_123;
 328     s->slice[7] = s6_01 ^ s7_123;
 329     if (inv) {
 330         /* In the reverse direction, we further need to multiply by
 331          * {04}x^2 + {05}, which can be written as {04} * (x^2 + {01}) + {01}.
 332          *
 333          * First compute (x^2 + {01}) * s into the t?_02 variables: */
 334         uint16_t t0_02 = s->slice[0] ^ ROT(s->slice[0], 2);
 335         uint16_t t1_02 = s->slice[1] ^ ROT(s->slice[1], 2);
 336         uint16_t t2_02 = s->slice[2] ^ ROT(s->slice[2], 2);
 337         uint16_t t3_02 = s->slice[3] ^ ROT(s->slice[3], 2);
 338         uint16_t t4_02 = s->slice[4] ^ ROT(s->slice[4], 2);
 339         uint16_t t5_02 = s->slice[5] ^ ROT(s->slice[5], 2);
 340         uint16_t t6_02 = s->slice[6] ^ ROT(s->slice[6], 2);
 341         uint16_t t7_02 = s->slice[7] ^ ROT(s->slice[7], 2);
 342         /* And then update s += {04} * t?_02 */
 343         s->slice[0] ^= t6_02;
 344         s->slice[1] ^= t6_02 ^ t7_02;
 345         s->slice[2] ^= t0_02 ^ t7_02;
 346         s->slice[3] ^= t1_02 ^ t6_02;
 347         s->slice[4] ^= t2_02 ^ t6_02 ^ t7_02;
 348         s->slice[5] ^= t3_02 ^ t7_02;
 349         s->slice[6] ^= t4_02;
 350         s->slice[7] ^= t5_02;
 351     }
 352 }
 353
 354 static void AddRoundKey(AES_state* s, const AES_state* round) {
 355     int b;
 356     for (b = 0; b < 8; b++) {
 357         s->slice[b] ^= round->slice[b];
 358     }
 359 }
 360
 361 /** column_0(s) = column_c(a) */
 362 static void GetOneColumn(AES_state* s, const AES_state* a, int c) {
 363     int b;
 364     for (b = 0; b < 8; b++) {
 365         s->slice[b] = (a->slice[b] >> c) & 0x1111;
 366     }
 367 }
 368
 369 /** column_c1(r) |= (column_0(s) ^= column_c2(a)) */
 370 static void KeySetupColumnMix(AES_state* s, AES_state* r, const AES_state* a, int c1, int c2) {
 371     int b;
 372     for (b = 0; b < 8; b++) {
 373         r->slice[b] |= ((s->slice[b] ^= ((a->slice[b] >> c2) & 0x1111)) & 0x1111) << c1;
 374     }
 375 }
 376
 377 /** Rotate the rows in s one position upwards, and xor in r */
 378 static void KeySetupTransform(AES_state* s, const AES_state* r) {
 379     int b;
 380     for (b = 0; b < 8; b++) {
 381         s->slice[b] = ((s->slice[b] >> 4) | (s->slice[b] << 12)) ^ r->slice[b];
 382     }
 383 }
 384
 385 /* Multiply the cells in s by x, as polynomials over GF(2) mod x^8 + x^4 + x^3 + x + 1 */
 386 static void MultX(AES_state* s) {
 387     uint16_t top = s->slice[7];
 388     s->slice[7] = s->slice[6];
 389     s->slice[6] = s->slice[5];
 390     s->slice[5] = s->slice[4];
 391     s->slice[4] = s->slice[3] ^ top;
 392     s->slice[3] = s->slice[2] ^ top;
 393     s->slice[2] = s->slice[1];
 394     s->slice[1] = s->slice[0] ^ top;
 395     s->slice[0] = top;
 396 }
 397
 398 /** Expand the cipher key into the key schedule.
 399  *
 400  *  state must be a pointer to an array of size nrounds + 1.
 401  *  key must be a pointer to 4 * nkeywords bytes.
 402  *
 403  *  AES128 uses nkeywords = 4, nrounds = 10
 404  *  AES192 uses nkeywords = 6, nrounds = 12
 405  *  AES256 uses nkeywords = 8, nrounds = 14
 406  */
 407 static void AES_setup(AES_state* rounds, const uint8_t* key, int nkeywords, int nrounds)
 408 {
 409     int i;
 410
 411     /* The one-byte round constant */
 412     AES_state rcon = {{1,0,0,0,0,0,0,0}};
 413     /* The number of the word being generated, modulo nkeywords */
 414     int pos = 0;
 415     /* The column representing the word currently being processed */
 416     AES_state column;
 417
 418     for (i = 0; i < nrounds + 1; i++) {
 419         int b;
 420         for (b = 0; b < 8; b++) {
 421             rounds[i].slice[b] = 0;
 422         }
 423     }
 424
 425     /* The first nkeywords round columns are just taken from the key directly. */
 426     for (i = 0; i < nkeywords; i++) {
 427         int r;
 428         for (r = 0; r < 4; r++) {
 429             LoadByte(&rounds[i >> 2], *(key++), r, i & 3);
 430         }
 431     }
 432
 433     GetOneColumn(&column, &rounds[(nkeywords - 1) >> 2], (nkeywords - 1) & 3);
 434
 435     for (i = nkeywords; i < 4 * (nrounds + 1); i++) {
 436         /* Transform column */
 437         if (pos == 0) {
 438             SubBytes(&column, 0);
 439             KeySetupTransform(&column, &rcon);
 440             MultX(&rcon);
 441         } else if (nkeywords > 6 && pos == 4) {
 442             SubBytes(&column, 0);
 443         }
 444         if (++pos == nkeywords) pos = 0;
 445         KeySetupColumnMix(&column, &rounds[i >> 2], &rounds[(i - nkeywords) >> 2], i & 3, (i - nkeywords) & 3);
 446     }
 447 }
 448
 449 static void AES_encrypt(const AES_state* rounds, int nrounds, unsigned char* cipher16, const unsigned char* plain16) {
 450     AES_state s = {{0}};
 451     int round;
 452
 453     LoadBytes(&s, plain16);
 454     AddRoundKey(&s, rounds++);
 455
 456     for (round = 1; round < nrounds; round++) {
 457         SubBytes(&s, 0);
 458         ShiftRows(&s);
 459         MixColumns(&s, 0);
 460         AddRoundKey(&s, rounds++);
 461     }
 462
 463     SubBytes(&s, 0);
 464     ShiftRows(&s);
 465     AddRoundKey(&s, rounds);
 466
 467     SaveBytes(cipher16, &s);
 468 }
 469
 470 static void AES_decrypt(const AES_state* rounds, int nrounds, unsigned char* plain16, const unsigned char* cipher16) {
 471     /* Most AES decryption implementations use the alternate scheme
 472      * (the Equivalent Inverse Cipher), which allows for more code reuse between
 473      * the encryption and decryption code, but requires separate setup for both.
 474      */
 475     AES_state s = {{0}};
 476     int round;
 477
 478     rounds += nrounds;
 479
 480     LoadBytes(&s, cipher16);
 481     AddRoundKey(&s, rounds--);
 482
 483     for (round = 1; round < nrounds; round++) {
 484         InvShiftRows(&s);
 485         SubBytes(&s, 1);
 486         AddRoundKey(&s, rounds--);
 487         MixColumns(&s, 1);
 488     }
 489
 490     InvShiftRows(&s);
 491     SubBytes(&s, 1);
 492     AddRoundKey(&s, rounds);
 493
 494     SaveBytes(plain16, &s);
 495 }
 496
 497 void AES128_init(AES128_ctx* ctx, const unsigned char* key16) {
 498     AES_setup(ctx->rk, key16, 4, 10);
 499 }
 500
 501 void AES128_encrypt(const AES128_ctx* ctx, size_t blocks, unsigned char* cipher16, const unsigned char* plain16) {
 502     while (blocks--) {
 503         AES_encrypt(ctx->rk, 10, cipher16, plain16);
 504         cipher16 += 16;
 505         plain16 += 16;
 506     }
 507 }
 508
 509 void AES128_decrypt(const AES128_ctx* ctx, size_t blocks, unsigned char* plain16, const unsigned char* cipher16) {
 510     while (blocks--) {
 511         AES_decrypt(ctx->rk, 10, plain16, cipher16);
 512         cipher16 += 16;
 513         plain16 += 16;
 514     }
 515 }
 516
 517 void AES192_init(AES192_ctx* ctx, const unsigned char* key24) {
 518     AES_setup(ctx->rk, key24, 6, 12);
 519 }
 520
 521 void AES192_encrypt(const AES192_ctx* ctx, size_t blocks, unsigned char* cipher16, const unsigned char* plain16) {
 522     while (blocks--) {
 523         AES_encrypt(ctx->rk, 12, cipher16, plain16);
 524         cipher16 += 16;
 525         plain16 += 16;
 526     }
 527
 528 }
 529
 530 void AES192_decrypt(const AES192_ctx* ctx, size_t blocks, unsigned char* plain16, const unsigned char* cipher16) {
 531     while (blocks--) {
 532         AES_decrypt(ctx->rk, 12, plain16, cipher16);
 533         cipher16 += 16;
 534         plain16 += 16;
 535     }
 536 }
 537
 538 void AES256_init(AES256_ctx* ctx, const unsigned char* key32) {
 539     AES_setup(ctx->rk, key32, 8, 14);
 540 }
 541
 542 void AES256_encrypt(const AES256_ctx* ctx, size_t blocks, unsigned char* cipher16, const unsigned char* plain16) {
 543     while (blocks--) {
 544         AES_encrypt(ctx->rk, 14, cipher16, plain16);
 545         cipher16 += 16;
 546         plain16 += 16;
 547     }
 548 }
 549
 550 void AES256_decrypt(const AES256_ctx* ctx, size_t blocks, unsigned char* plain16, const unsigned char* cipher16) {
 551     while (blocks--) {
 552         AES_decrypt(ctx->rk, 14, plain16, cipher16);
 553         cipher16 += 16;
 554         plain16 += 16;
 555     }
 556 }