2 /* Hongjun Wu, January 2007*/
5 #include "ecrypt-sync.h"
10 #define first_round(ctx,x0,y0) { \
12 z0 = (x0) ^ ctx->round_key[0][0]; \
15 (y0) = tem0 ^ ctx->first_round_output_x0; \
18 #define second_round(ctx,x0,y0,y1,y2,y3) { \
20 u32 tem0,tem7,tem10,tem13; \
23 (y0) = tem0 ^ ctx->second_round_output[0]; \
24 t7 = (u8)((x0)>>24); \
26 (y1) = tem7 ^ ctx->second_round_output[1]; \
27 t10 = (u8)((x0)>>16); \
29 (y2) = tem10 ^ ctx->second_round_output[2]; \
30 t13 = (u8)((x0)>>8); \
32 (y3) = tem13 ^ ctx->second_round_output[3]; \
35 #define round(ctx,x0,x1,x2,x3,y0,y1,y2,y3,r) { \
40 u32 tem0,tem1,tem2,tem3; \
41 u32 tem4,tem5,tem6,tem7; \
42 u32 tem8,tem9,tem10,tem11; \
43 u32 tem12,tem13,tem14,tem15;\
48 tem1 = tem0 ^ T1[t1]; \
49 t2 = (u8)((x2)>>16); \
50 tem2 = tem1 ^ T2[t2]; \
51 t3 = (u8)((x3)>>24); \
52 tem3 = tem2 ^ T3[t3]; \
53 (y0) = tem3 ^ ctx->round_key[r][0]; \
58 tem5 = tem4 ^ T1[t5]; \
59 t6 = (u8)((x3)>>16); \
60 tem6 = tem5 ^ T2[t6]; \
61 t7 = (u8)((x0)>>24); \
62 tem7 = tem6 ^ T3[t7]; \
63 (y1) = tem7 ^ ctx->round_key[r][1]; \
68 tem9 = tem8 ^ T1[t9]; \
69 t10 = (u8)((x0)>>16); \
70 tem10 = tem9 ^ T2[t10]; \
71 t11 = (u8)((x1)>>24); \
72 tem11 = tem10 ^ T3[t11];\
73 (y2) = tem11 ^ ctx->round_key[r][2]; \
77 t13 = (u8)((x0)>>8); \
78 tem13 = tem12 ^ T1[t13];\
79 t14 = (u8)((x1)>>16); \
80 tem14 = tem13 ^ T2[t14];\
81 t15 = (u8)((x2)>>24); \
82 tem15 = tem14 ^ T3[t15];\
83 (y3) = tem15 ^ ctx->round_key[r][3]; \
86 /* 22.14 cycles/byte*/
87 #define last_round(ctx,x0,x1,x2,x3,output,r) { \
94 output[0] = Sbox[t0]; \
95 t7 = (u8)((x0)>>24); \
96 output[7] = Sbox[t7]; \
97 t10 = (u8)((x0)>>16); \
98 output[10] = Sbox[t10]; \
99 t13 = (u8)((x0)>>8); \
100 output[13] = Sbox[t13]; \
102 t1 = (u8)((x1)>>8); \
103 output[1] = Sbox[t1]; \
105 output[4] = Sbox[t4]; \
106 t11 = (u8)((x1)>>24); \
107 output[11] = Sbox[t11]; \
108 t14 = (u8)((x1)>>16); \
109 output[14] = Sbox[t14]; \
111 t2 = (u8)((x2)>>16); \
112 output[2] = Sbox[t2]; \
113 t5 = (u8)((x2)>>8); \
114 output[5] = Sbox[t5]; \
116 output[8] = Sbox[t8]; \
117 t15 = (u8)((x2)>>24); \
118 output[15] = Sbox[t15]; \
120 t3 = (u8)((x3)>>24); \
121 output[3] = Sbox[t3]; \
122 t6 = (u8)((x3)>>16); \
123 output[6] = Sbox[t6]; \
124 t9 = (u8)((x3)>>8); \
125 output[9] = Sbox[t9]; \
127 output[12] = Sbox[t12]; \
131 ((u32*)output)[0] ^= ctx->round_key[r][0]; \
132 ((u32*)output)[1] ^= ctx->round_key[r][1]; \
133 ((u32*)output)[2] ^= ctx->round_key[r][2]; \
134 ((u32*)output)[3] ^= ctx->round_key[r][3]; \
138 #define aes256_enc_block(x,output,ctx) {\
152 first_round(ctx,x[0],y0);\
153 second_round(ctx,y0,z0,z1,z2,z3);\
154 round(ctx,z0,z1,z2,z3,a0,a1,a2,a3,3);\
155 round(ctx,a0,a1,a2,a3,b0,b1,b2,b3,4);\
156 round(ctx,b0,b1,b2,b3,c0,c1,c2,c3,5);\
157 round(ctx,c0,c1,c2,c3,d0,d1,d2,d3,6);\
158 round(ctx,d0,d1,d2,d3,e0,e1,e2,e3,7);\
159 round(ctx,e0,e1,e2,e3,f0,f1,f2,f3,8);\
160 round(ctx,f0,f1,f2,f3,g0,g1,g2,g3,9);\
161 round(ctx,g0,g1,g2,g3,h0,h1,h2,h3,10);\
162 round(ctx,h0,h1,h2,h3,i0,i1,i2,i3,11);\
163 round(ctx,i0,i1,i2,i3,j0,j1,j2,j3,12);\
164 round(ctx,j0,j1,j2,j3,k0,k1,k2,k3,13);\
165 last_round(ctx,k0,k1,k2,k3,(output),14);\
168 /*compute the intermediate values for the first two rounds*/
169 void partial_precompute_tworounds(ECRYPT_ctx
* ctx
)
171 u32 x0
,x1
,x2
,x3
,y0
,y1
,y2
,y3
;
173 x0
= ctx
->counter
[0] ^ ctx
->round_key
[0][0];
174 x1
= ctx
->counter
[1] ^ ctx
->round_key
[0][1];
175 x2
= ctx
->counter
[2] ^ ctx
->round_key
[0][2];
176 x3
= ctx
->counter
[3] ^ ctx
->round_key
[0][3];
178 round(ctx
,x0
,x1
,x2
,x3
,y0
,y1
,y2
,y3
,1);
179 ctx
->first_round_output_x0
= y0
^ T0
[0];
181 round(ctx
,y0
,y1
,y2
,y3
,x0
,x1
,x2
,x3
,2);
182 ctx
->second_round_output
[0] = x0
^ T0
[0];
183 ctx
->second_round_output
[1] = x1
^ T3
[0];
184 ctx
->second_round_output
[2] = x2
^ T2
[0];
185 ctx
->second_round_output
[3] = x3
^ T1
[0];