staging: crypto: skein: remove trailing whitespace
[linux-2.6/btrfs-unstable.git] / drivers / staging / skein / skein_block.c
blobd315f547feaecec180f3f62314d2961b886c0e46
1 /***********************************************************************
2 **
3 ** Implementation of the Skein block functions.
4 **
5 ** Source code author: Doug Whiting, 2008.
6 **
7 ** This algorithm and source code is released to the public domain.
8 **
9 ** Compile-time switches:
11 ** SKEIN_USE_ASM -- set bits (256/512/1024) to select which
12 ** versions use ASM code for block processing
13 ** [default: use C for all block sizes]
15 ************************************************************************/
17 #include <linux/string.h>
18 #include <skein.h>
20 #ifndef SKEIN_USE_ASM
21 #define SKEIN_USE_ASM (0) /* default is all C code (no ASM) */
22 #endif
24 #ifndef SKEIN_LOOP
25 #define SKEIN_LOOP 001 /* default: unroll 256 and 512, but not 1024 */
26 #endif
28 #define BLK_BITS (WCNT*64) /* some useful definitions for code here */
29 #define KW_TWK_BASE (0)
30 #define KW_KEY_BASE (3)
31 #define ks (kw + KW_KEY_BASE)
32 #define ts (kw + KW_TWK_BASE)
34 #ifdef SKEIN_DEBUG
35 #define DebugSaveTweak(ctx) { ctx->h.T[0] = ts[0]; ctx->h.T[1] = ts[1]; }
36 #else
37 #define DebugSaveTweak(ctx)
38 #endif
40 /***************************** Skein_256 ******************************/
41 #if !(SKEIN_USE_ASM & 256)
42 void Skein_256_Process_Block(struct skein_256_ctx *ctx, const u8 *blkPtr, size_t blkCnt, size_t byteCntAdd)
43 { /* do it in C */
44 enum {
45 WCNT = SKEIN_256_STATE_WORDS
47 #undef RCNT
48 #define RCNT (SKEIN_256_ROUNDS_TOTAL/8)
50 #ifdef SKEIN_LOOP /* configure how much to unroll the loop */
51 #define SKEIN_UNROLL_256 (((SKEIN_LOOP)/100)%10)
52 #else
53 #define SKEIN_UNROLL_256 (0)
54 #endif
56 #if SKEIN_UNROLL_256
57 #if (RCNT % SKEIN_UNROLL_256)
58 #error "Invalid SKEIN_UNROLL_256" /* sanity check on unroll count */
59 #endif
60 size_t r;
61 u64 kw[WCNT+4+RCNT*2]; /* key schedule words : chaining vars + tweak + "rotation"*/
62 #else
63 u64 kw[WCNT+4]; /* key schedule words : chaining vars + tweak */
64 #endif
65 u64 X0, X1, X2, X3; /* local copy of context vars, for speed */
66 u64 w[WCNT]; /* local copy of input block */
67 #ifdef SKEIN_DEBUG
68 const u64 *Xptr[4]; /* use for debugging (help compiler put Xn in registers) */
69 Xptr[0] = &X0; Xptr[1] = &X1; Xptr[2] = &X2; Xptr[3] = &X3;
70 #endif
71 Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */
72 ts[0] = ctx->h.T[0];
73 ts[1] = ctx->h.T[1];
74 do {
75 /* this implementation only supports 2**64 input bytes (no carry out here) */
76 ts[0] += byteCntAdd; /* update processed length */
78 /* precompute the key schedule for this block */
79 ks[0] = ctx->X[0];
80 ks[1] = ctx->X[1];
81 ks[2] = ctx->X[2];
82 ks[3] = ctx->X[3];
83 ks[4] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^ SKEIN_KS_PARITY;
85 ts[2] = ts[0] ^ ts[1];
87 Skein_Get64_LSB_First(w, blkPtr, WCNT); /* get input block in little-endian format */
88 DebugSaveTweak(ctx);
89 Skein_Show_Block(BLK_BITS, &ctx->h, ctx->X, blkPtr, w, ks, ts);
91 X0 = w[0] + ks[0]; /* do the first full key injection */
92 X1 = w[1] + ks[1] + ts[0];
93 X2 = w[2] + ks[2] + ts[1];
94 X3 = w[3] + ks[3];
96 Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INITIAL, Xptr); /* show starting state values */
98 blkPtr += SKEIN_256_BLOCK_BYTES;
100 /* run the rounds */
102 #define Round256(p0, p1, p2, p3, ROT, rNum) \
103 X##p0 += X##p1; X##p1 = RotL_64(X##p1, ROT##_0); X##p1 ^= X##p0; \
104 X##p2 += X##p3; X##p3 = RotL_64(X##p3, ROT##_1); X##p3 ^= X##p2; \
106 #if SKEIN_UNROLL_256 == 0
107 #define R256(p0, p1, p2, p3, ROT, rNum) /* fully unrolled */ \
108 Round256(p0, p1, p2, p3, ROT, rNum) \
109 Skein_Show_R_Ptr(BLK_BITS, &ctx->h, rNum, Xptr);
111 #define I256(R) \
112 X0 += ks[((R)+1) % 5]; /* inject the key schedule value */ \
113 X1 += ks[((R)+2) % 5] + ts[((R)+1) % 3]; \
114 X2 += ks[((R)+3) % 5] + ts[((R)+2) % 3]; \
115 X3 += ks[((R)+4) % 5] + (R)+1; \
116 Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
117 #else /* looping version */
118 #define R256(p0, p1, p2, p3, ROT, rNum) \
119 Round256(p0, p1, p2, p3, ROT, rNum) \
120 Skein_Show_R_Ptr(BLK_BITS, &ctx->h, 4 * (r - 1) + rNum, Xptr);
122 #define I256(R) \
123 X0 += ks[r+(R)+0]; /* inject the key schedule value */ \
124 X1 += ks[r+(R)+1] + ts[r+(R)+0]; \
125 X2 += ks[r+(R)+2] + ts[r+(R)+1]; \
126 X3 += ks[r+(R)+3] + r+(R); \
127 ks[r + (R) + 4] = ks[r + (R) - 1]; /* rotate key schedule */\
128 ts[r + (R) + 2] = ts[r + (R) - 1]; \
129 Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
131 for (r = 1; r < 2 * RCNT; r += 2 * SKEIN_UNROLL_256) /* loop thru it */
132 #endif
134 #define R256_8_rounds(R) \
135 R256(0, 1, 2, 3, R_256_0, 8 * (R) + 1); \
136 R256(0, 3, 2, 1, R_256_1, 8 * (R) + 2); \
137 R256(0, 1, 2, 3, R_256_2, 8 * (R) + 3); \
138 R256(0, 3, 2, 1, R_256_3, 8 * (R) + 4); \
139 I256(2 * (R)); \
140 R256(0, 1, 2, 3, R_256_4, 8 * (R) + 5); \
141 R256(0, 3, 2, 1, R_256_5, 8 * (R) + 6); \
142 R256(0, 1, 2, 3, R_256_6, 8 * (R) + 7); \
143 R256(0, 3, 2, 1, R_256_7, 8 * (R) + 8); \
144 I256(2 * (R) + 1);
146 R256_8_rounds(0);
148 #define R256_Unroll_R(NN) ((SKEIN_UNROLL_256 == 0 && SKEIN_256_ROUNDS_TOTAL/8 > (NN)) || (SKEIN_UNROLL_256 > (NN)))
150 #if R256_Unroll_R(1)
151 R256_8_rounds(1);
152 #endif
153 #if R256_Unroll_R(2)
154 R256_8_rounds(2);
155 #endif
156 #if R256_Unroll_R(3)
157 R256_8_rounds(3);
158 #endif
159 #if R256_Unroll_R(4)
160 R256_8_rounds(4);
161 #endif
162 #if R256_Unroll_R(5)
163 R256_8_rounds(5);
164 #endif
165 #if R256_Unroll_R(6)
166 R256_8_rounds(6);
167 #endif
168 #if R256_Unroll_R(7)
169 R256_8_rounds(7);
170 #endif
171 #if R256_Unroll_R(8)
172 R256_8_rounds(8);
173 #endif
174 #if R256_Unroll_R(9)
175 R256_8_rounds(9);
176 #endif
177 #if R256_Unroll_R(10)
178 R256_8_rounds(10);
179 #endif
180 #if R256_Unroll_R(11)
181 R256_8_rounds(11);
182 #endif
183 #if R256_Unroll_R(12)
184 R256_8_rounds(12);
185 #endif
186 #if R256_Unroll_R(13)
187 R256_8_rounds(13);
188 #endif
189 #if R256_Unroll_R(14)
190 R256_8_rounds(14);
191 #endif
192 #if (SKEIN_UNROLL_256 > 14)
193 #error "need more unrolling in Skein_256_Process_Block"
194 #endif
196 /* do the final "feedforward" xor, update context chaining vars */
197 ctx->X[0] = X0 ^ w[0];
198 ctx->X[1] = X1 ^ w[1];
199 ctx->X[2] = X2 ^ w[2];
200 ctx->X[3] = X3 ^ w[3];
202 Skein_Show_Round(BLK_BITS, &ctx->h, SKEIN_RND_FEED_FWD, ctx->X);
204 ts[1] &= ~SKEIN_T1_FLAG_FIRST;
206 while (--blkCnt);
207 ctx->h.T[0] = ts[0];
208 ctx->h.T[1] = ts[1];
211 #if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
212 size_t Skein_256_Process_Block_CodeSize(void)
214 return ((u8 *) Skein_256_Process_Block_CodeSize) -
215 ((u8 *) Skein_256_Process_Block);
217 unsigned int Skein_256_Unroll_Cnt(void)
219 return SKEIN_UNROLL_256;
221 #endif
222 #endif
224 /***************************** Skein_512 ******************************/
225 #if !(SKEIN_USE_ASM & 512)
226 void Skein_512_Process_Block(struct skein_512_ctx *ctx, const u8 *blkPtr, size_t blkCnt, size_t byteCntAdd)
227 { /* do it in C */
228 enum {
229 WCNT = SKEIN_512_STATE_WORDS
231 #undef RCNT
232 #define RCNT (SKEIN_512_ROUNDS_TOTAL/8)
234 #ifdef SKEIN_LOOP /* configure how much to unroll the loop */
235 #define SKEIN_UNROLL_512 (((SKEIN_LOOP)/10)%10)
236 #else
237 #define SKEIN_UNROLL_512 (0)
238 #endif
240 #if SKEIN_UNROLL_512
241 #if (RCNT % SKEIN_UNROLL_512)
242 #error "Invalid SKEIN_UNROLL_512" /* sanity check on unroll count */
243 #endif
244 size_t r;
245 u64 kw[WCNT+4+RCNT*2]; /* key schedule words : chaining vars + tweak + "rotation"*/
246 #else
247 u64 kw[WCNT+4]; /* key schedule words : chaining vars + tweak */
248 #endif
249 u64 X0, X1, X2, X3, X4, X5, X6, X7; /* local copy of vars, for speed */
250 u64 w[WCNT]; /* local copy of input block */
251 #ifdef SKEIN_DEBUG
252 const u64 *Xptr[8]; /* use for debugging (help compiler put Xn in registers) */
253 Xptr[0] = &X0; Xptr[1] = &X1; Xptr[2] = &X2; Xptr[3] = &X3;
254 Xptr[4] = &X4; Xptr[5] = &X5; Xptr[6] = &X6; Xptr[7] = &X7;
255 #endif
257 Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */
258 ts[0] = ctx->h.T[0];
259 ts[1] = ctx->h.T[1];
260 do {
261 /* this implementation only supports 2**64 input bytes (no carry out here) */
262 ts[0] += byteCntAdd; /* update processed length */
264 /* precompute the key schedule for this block */
265 ks[0] = ctx->X[0];
266 ks[1] = ctx->X[1];
267 ks[2] = ctx->X[2];
268 ks[3] = ctx->X[3];
269 ks[4] = ctx->X[4];
270 ks[5] = ctx->X[5];
271 ks[6] = ctx->X[6];
272 ks[7] = ctx->X[7];
273 ks[8] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^
274 ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^ SKEIN_KS_PARITY;
276 ts[2] = ts[0] ^ ts[1];
278 Skein_Get64_LSB_First(w, blkPtr, WCNT); /* get input block in little-endian format */
279 DebugSaveTweak(ctx);
280 Skein_Show_Block(BLK_BITS, &ctx->h, ctx->X, blkPtr, w, ks, ts);
282 X0 = w[0] + ks[0]; /* do the first full key injection */
283 X1 = w[1] + ks[1];
284 X2 = w[2] + ks[2];
285 X3 = w[3] + ks[3];
286 X4 = w[4] + ks[4];
287 X5 = w[5] + ks[5] + ts[0];
288 X6 = w[6] + ks[6] + ts[1];
289 X7 = w[7] + ks[7];
291 blkPtr += SKEIN_512_BLOCK_BYTES;
293 Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INITIAL, Xptr);
294 /* run the rounds */
295 #define Round512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum) \
296 X##p0 += X##p1; X##p1 = RotL_64(X##p1, ROT##_0); X##p1 ^= X##p0; \
297 X##p2 += X##p3; X##p3 = RotL_64(X##p3, ROT##_1); X##p3 ^= X##p2; \
298 X##p4 += X##p5; X##p5 = RotL_64(X##p5, ROT##_2); X##p5 ^= X##p4; \
299 X##p6 += X##p7; X##p7 = RotL_64(X##p7, ROT##_3); X##p7 ^= X##p6; \
301 #if SKEIN_UNROLL_512 == 0
302 #define R512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum) /* unrolled */ \
303 Round512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum) \
304 Skein_Show_R_Ptr(BLK_BITS, &ctx->h, rNum, Xptr);
306 #define I512(R) \
307 X0 += ks[((R) + 1) % 9]; /* inject the key schedule value */ \
308 X1 += ks[((R) + 2) % 9]; \
309 X2 += ks[((R) + 3) % 9]; \
310 X3 += ks[((R) + 4) % 9]; \
311 X4 += ks[((R) + 5) % 9]; \
312 X5 += ks[((R) + 6) % 9] + ts[((R) + 1) % 3]; \
313 X6 += ks[((R) + 7) % 9] + ts[((R) + 2) % 3]; \
314 X7 += ks[((R) + 8) % 9] + (R) + 1; \
315 Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
316 #else /* looping version */
317 #define R512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum) \
318 Round512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum) \
319 Skein_Show_R_Ptr(BLK_BITS, &ctx->h, 4 * (r - 1) + rNum, Xptr);
321 #define I512(R) \
322 X0 += ks[r + (R) + 0]; /* inject the key schedule value */ \
323 X1 += ks[r + (R) + 1]; \
324 X2 += ks[r + (R) + 2]; \
325 X3 += ks[r + (R) + 3]; \
326 X4 += ks[r + (R) + 4]; \
327 X5 += ks[r + (R) + 5] + ts[r + (R) + 0]; \
328 X6 += ks[r + (R) + 6] + ts[r + (R) + 1]; \
329 X7 += ks[r + (R) + 7] + r + (R); \
330 ks[r + (R) + 8] = ks[r + (R) - 1]; /* rotate key schedule */ \
331 ts[r + (R) + 2] = ts[r + (R) - 1]; \
332 Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
334 for (r = 1; r < 2 * RCNT; r += 2 * SKEIN_UNROLL_512) /* loop thru it */
335 #endif /* end of looped code definitions */
337 #define R512_8_rounds(R) /* do 8 full rounds */ \
338 R512(0, 1, 2, 3, 4, 5, 6, 7, R_512_0, 8 * (R) + 1); \
339 R512(2, 1, 4, 7, 6, 5, 0, 3, R_512_1, 8 * (R) + 2); \
340 R512(4, 1, 6, 3, 0, 5, 2, 7, R_512_2, 8 * (R) + 3); \
341 R512(6, 1, 0, 7, 2, 5, 4, 3, R_512_3, 8 * (R) + 4); \
342 I512(2 * (R)); \
343 R512(0, 1, 2, 3, 4, 5, 6, 7, R_512_4, 8 * (R) + 5); \
344 R512(2, 1, 4, 7, 6, 5, 0, 3, R_512_5, 8 * (R) + 6); \
345 R512(4, 1, 6, 3, 0, 5, 2, 7, R_512_6, 8 * (R) + 7); \
346 R512(6, 1, 0, 7, 2, 5, 4, 3, R_512_7, 8 * (R) + 8); \
347 I512(2 * (R) + 1); /* and key injection */
349 R512_8_rounds(0);
351 #define R512_Unroll_R(NN) ((SKEIN_UNROLL_512 == 0 && SKEIN_512_ROUNDS_TOTAL/8 > (NN)) || (SKEIN_UNROLL_512 > (NN)))
353 #if R512_Unroll_R(1)
354 R512_8_rounds(1);
355 #endif
356 #if R512_Unroll_R(2)
357 R512_8_rounds(2);
358 #endif
359 #if R512_Unroll_R(3)
360 R512_8_rounds(3);
361 #endif
362 #if R512_Unroll_R(4)
363 R512_8_rounds(4);
364 #endif
365 #if R512_Unroll_R(5)
366 R512_8_rounds(5);
367 #endif
368 #if R512_Unroll_R(6)
369 R512_8_rounds(6);
370 #endif
371 #if R512_Unroll_R(7)
372 R512_8_rounds(7);
373 #endif
374 #if R512_Unroll_R(8)
375 R512_8_rounds(8);
376 #endif
377 #if R512_Unroll_R(9)
378 R512_8_rounds(9);
379 #endif
380 #if R512_Unroll_R(10)
381 R512_8_rounds(10);
382 #endif
383 #if R512_Unroll_R(11)
384 R512_8_rounds(11);
385 #endif
386 #if R512_Unroll_R(12)
387 R512_8_rounds(12);
388 #endif
389 #if R512_Unroll_R(13)
390 R512_8_rounds(13);
391 #endif
392 #if R512_Unroll_R(14)
393 R512_8_rounds(14);
394 #endif
395 #if (SKEIN_UNROLL_512 > 14)
396 #error "need more unrolling in Skein_512_Process_Block"
397 #endif
400 /* do the final "feedforward" xor, update context chaining vars */
401 ctx->X[0] = X0 ^ w[0];
402 ctx->X[1] = X1 ^ w[1];
403 ctx->X[2] = X2 ^ w[2];
404 ctx->X[3] = X3 ^ w[3];
405 ctx->X[4] = X4 ^ w[4];
406 ctx->X[5] = X5 ^ w[5];
407 ctx->X[6] = X6 ^ w[6];
408 ctx->X[7] = X7 ^ w[7];
409 Skein_Show_Round(BLK_BITS, &ctx->h, SKEIN_RND_FEED_FWD, ctx->X);
411 ts[1] &= ~SKEIN_T1_FLAG_FIRST;
413 while (--blkCnt);
414 ctx->h.T[0] = ts[0];
415 ctx->h.T[1] = ts[1];
418 #if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
419 size_t Skein_512_Process_Block_CodeSize(void)
421 return ((u8 *) Skein_512_Process_Block_CodeSize) -
422 ((u8 *) Skein_512_Process_Block);
424 unsigned int Skein_512_Unroll_Cnt(void)
426 return SKEIN_UNROLL_512;
428 #endif
429 #endif
431 /***************************** Skein1024 ******************************/
432 #if !(SKEIN_USE_ASM & 1024)
433 void Skein1024_Process_Block(struct skein1024_ctx *ctx, const u8 *blkPtr, size_t blkCnt, size_t byteCntAdd)
434 { /* do it in C, always looping (unrolled is bigger AND slower!) */
435 enum {
436 WCNT = SKEIN1024_STATE_WORDS
438 #undef RCNT
439 #define RCNT (SKEIN1024_ROUNDS_TOTAL/8)
441 #ifdef SKEIN_LOOP /* configure how much to unroll the loop */
442 #define SKEIN_UNROLL_1024 ((SKEIN_LOOP)%10)
443 #else
444 #define SKEIN_UNROLL_1024 (0)
445 #endif
447 #if (SKEIN_UNROLL_1024 != 0)
448 #if (RCNT % SKEIN_UNROLL_1024)
449 #error "Invalid SKEIN_UNROLL_1024" /* sanity check on unroll count */
450 #endif
451 size_t r;
452 u64 kw[WCNT+4+RCNT*2]; /* key schedule words : chaining vars + tweak + "rotation"*/
453 #else
454 u64 kw[WCNT+4]; /* key schedule words : chaining vars + tweak */
455 #endif
457 u64 X00, X01, X02, X03, X04, X05, X06, X07, /* local copy of vars, for speed */
458 X08, X09, X10, X11, X12, X13, X14, X15;
459 u64 w[WCNT]; /* local copy of input block */
460 #ifdef SKEIN_DEBUG
461 const u64 *Xptr[16]; /* use for debugging (help compiler put Xn in registers) */
462 Xptr[0] = &X00; Xptr[1] = &X01; Xptr[2] = &X02; Xptr[3] = &X03;
463 Xptr[4] = &X04; Xptr[5] = &X05; Xptr[6] = &X06; Xptr[7] = &X07;
464 Xptr[8] = &X08; Xptr[9] = &X09; Xptr[10] = &X10; Xptr[11] = &X11;
465 Xptr[12] = &X12; Xptr[13] = &X13; Xptr[14] = &X14; Xptr[15] = &X15;
466 #endif
468 Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */
469 ts[0] = ctx->h.T[0];
470 ts[1] = ctx->h.T[1];
471 do {
472 /* this implementation only supports 2**64 input bytes (no carry out here) */
473 ts[0] += byteCntAdd; /* update processed length */
475 /* precompute the key schedule for this block */
476 ks[0] = ctx->X[0];
477 ks[1] = ctx->X[1];
478 ks[2] = ctx->X[2];
479 ks[3] = ctx->X[3];
480 ks[4] = ctx->X[4];
481 ks[5] = ctx->X[5];
482 ks[6] = ctx->X[6];
483 ks[7] = ctx->X[7];
484 ks[8] = ctx->X[8];
485 ks[9] = ctx->X[9];
486 ks[10] = ctx->X[10];
487 ks[11] = ctx->X[11];
488 ks[12] = ctx->X[12];
489 ks[13] = ctx->X[13];
490 ks[14] = ctx->X[14];
491 ks[15] = ctx->X[15];
492 ks[16] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^
493 ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^
494 ks[8] ^ ks[9] ^ ks[10] ^ ks[11] ^
495 ks[12] ^ ks[13] ^ ks[14] ^ ks[15] ^ SKEIN_KS_PARITY;
497 ts[2] = ts[0] ^ ts[1];
499 Skein_Get64_LSB_First(w, blkPtr, WCNT); /* get input block in little-endian format */
500 DebugSaveTweak(ctx);
501 Skein_Show_Block(BLK_BITS, &ctx->h, ctx->X, blkPtr, w, ks, ts);
503 X00 = w[0] + ks[0]; /* do the first full key injection */
504 X01 = w[1] + ks[1];
505 X02 = w[2] + ks[2];
506 X03 = w[3] + ks[3];
507 X04 = w[4] + ks[4];
508 X05 = w[5] + ks[5];
509 X06 = w[6] + ks[6];
510 X07 = w[7] + ks[7];
511 X08 = w[8] + ks[8];
512 X09 = w[9] + ks[9];
513 X10 = w[10] + ks[10];
514 X11 = w[11] + ks[11];
515 X12 = w[12] + ks[12];
516 X13 = w[13] + ks[13] + ts[0];
517 X14 = w[14] + ks[14] + ts[1];
518 X15 = w[15] + ks[15];
520 Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INITIAL, Xptr);
522 #define Round1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, pE, pF, ROT, rNum) \
523 X##p0 += X##p1; X##p1 = RotL_64(X##p1, ROT##_0); X##p1 ^= X##p0; \
524 X##p2 += X##p3; X##p3 = RotL_64(X##p3, ROT##_1); X##p3 ^= X##p2; \
525 X##p4 += X##p5; X##p5 = RotL_64(X##p5, ROT##_2); X##p5 ^= X##p4; \
526 X##p6 += X##p7; X##p7 = RotL_64(X##p7, ROT##_3); X##p7 ^= X##p6; \
527 X##p8 += X##p9; X##p9 = RotL_64(X##p9, ROT##_4); X##p9 ^= X##p8; \
528 X##pA += X##pB; X##pB = RotL_64(X##pB, ROT##_5); X##pB ^= X##pA; \
529 X##pC += X##pD; X##pD = RotL_64(X##pD, ROT##_6); X##pD ^= X##pC; \
530 X##pE += X##pF; X##pF = RotL_64(X##pF, ROT##_7); X##pF ^= X##pE; \
532 #if SKEIN_UNROLL_1024 == 0
533 #define R1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, pE, pF, ROT, rn) \
534 Round1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, pE, pF, ROT, rn) \
535 Skein_Show_R_Ptr(BLK_BITS, &ctx->h, rn, Xptr);
537 #define I1024(R) \
538 X00 += ks[((R) + 1) % 17]; /* inject the key schedule value */ \
539 X01 += ks[((R) + 2) % 17]; \
540 X02 += ks[((R) + 3) % 17]; \
541 X03 += ks[((R) + 4) % 17]; \
542 X04 += ks[((R) + 5) % 17]; \
543 X05 += ks[((R) + 6) % 17]; \
544 X06 += ks[((R) + 7) % 17]; \
545 X07 += ks[((R) + 8) % 17]; \
546 X08 += ks[((R) + 9) % 17]; \
547 X09 += ks[((R) + 10) % 17]; \
548 X10 += ks[((R) + 11) % 17]; \
549 X11 += ks[((R) + 12) % 17]; \
550 X12 += ks[((R) + 13) % 17]; \
551 X13 += ks[((R) + 14) % 17] + ts[((R) + 1) % 3]; \
552 X14 += ks[((R) + 15) % 17] + ts[((R) + 2) % 3]; \
553 X15 += ks[((R) + 16) % 17] + (R) + 1; \
554 Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
555 #else /* looping version */
556 #define R1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, pE, pF, ROT, rn) \
557 Round1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, pE, pF, ROT, rn) \
558 Skein_Show_R_Ptr(BLK_BITS, &ctx->h, 4 * (r - 1) + rn, Xptr);
560 #define I1024(R) \
561 X00 += ks[r + (R) + 0]; /* inject the key schedule value */ \
562 X01 += ks[r + (R) + 1]; \
563 X02 += ks[r + (R) + 2]; \
564 X03 += ks[r + (R) + 3]; \
565 X04 += ks[r + (R) + 4]; \
566 X05 += ks[r + (R) + 5]; \
567 X06 += ks[r + (R) + 6]; \
568 X07 += ks[r + (R) + 7]; \
569 X08 += ks[r + (R) + 8]; \
570 X09 += ks[r + (R) + 9]; \
571 X10 += ks[r + (R) + 10]; \
572 X11 += ks[r + (R) + 11]; \
573 X12 += ks[r + (R) + 12]; \
574 X13 += ks[r + (R) + 13] + ts[r + (R) + 0]; \
575 X14 += ks[r + (R) + 14] + ts[r + (R) + 1]; \
576 X15 += ks[r + (R) + 15] + r + (R); \
577 ks[r + (R) + 16] = ks[r + (R) - 1]; /* rotate key schedule */\
578 ts[r + (R) + 2] = ts[r + (R) - 1]; \
579 Skein_Show_R_Ptr(BLK_BITSi, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
581 for (r = 1; r <= 2 * RCNT; r += 2 * SKEIN_UNROLL_1024) /* loop thru it */
582 #endif
584 #define R1024_8_rounds(R) /* do 8 full rounds */ \
585 R1024(00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13, 14, 15, R1024_0, 8*(R) + 1); \
586 R1024(00, 09, 02, 13, 06, 11, 04, 15, 10, 07, 12, 03, 14, 05, 08, 01, R1024_1, 8*(R) + 2); \
587 R1024(00, 07, 02, 05, 04, 03, 06, 01, 12, 15, 14, 13, 08, 11, 10, 09, R1024_2, 8*(R) + 3); \
588 R1024(00, 15, 02, 11, 06, 13, 04, 09, 14, 01, 08, 05, 10, 03, 12, 07, R1024_3, 8*(R) + 4); \
589 I1024(2*(R)); \
590 R1024(00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13, 14, 15, R1024_4, 8*(R) + 5); \
591 R1024(00, 09, 02, 13, 06, 11, 04, 15, 10, 07, 12, 03, 14, 05, 08, 01, R1024_5, 8*(R) + 6); \
592 R1024(00, 07, 02, 05, 04, 03, 06, 01, 12, 15, 14, 13, 08, 11, 10, 09, R1024_6, 8*(R) + 7); \
593 R1024(00, 15, 02, 11, 06, 13, 04, 09, 14, 01, 08, 05, 10, 03, 12, 07, R1024_7, 8*(R) + 8); \
594 I1024(2*(R)+1);
596 R1024_8_rounds(0);
598 #define R1024_Unroll_R(NN) ((SKEIN_UNROLL_1024 == 0 && SKEIN1024_ROUNDS_TOTAL/8 > (NN)) || (SKEIN_UNROLL_1024 > (NN)))
600 #if R1024_Unroll_R(1)
601 R1024_8_rounds(1);
602 #endif
603 #if R1024_Unroll_R(2)
604 R1024_8_rounds(2);
605 #endif
606 #if R1024_Unroll_R(3)
607 R1024_8_rounds(3);
608 #endif
609 #if R1024_Unroll_R(4)
610 R1024_8_rounds(4);
611 #endif
612 #if R1024_Unroll_R(5)
613 R1024_8_rounds(5);
614 #endif
615 #if R1024_Unroll_R(6)
616 R1024_8_rounds(6);
617 #endif
618 #if R1024_Unroll_R(7)
619 R1024_8_rounds(7);
620 #endif
621 #if R1024_Unroll_R(8)
622 R1024_8_rounds(8);
623 #endif
624 #if R1024_Unroll_R(9)
625 R1024_8_rounds(9);
626 #endif
627 #if R1024_Unroll_R(10)
628 R1024_8_rounds(10);
629 #endif
630 #if R1024_Unroll_R(11)
631 R1024_8_rounds(11);
632 #endif
633 #if R1024_Unroll_R(12)
634 R1024_8_rounds(12);
635 #endif
636 #if R1024_Unroll_R(13)
637 R1024_8_rounds(13);
638 #endif
639 #if R1024_Unroll_R(14)
640 R1024_8_rounds(14);
641 #endif
642 #if (SKEIN_UNROLL_1024 > 14)
643 #error "need more unrolling in Skein_1024_Process_Block"
644 #endif
646 /* do the final "feedforward" xor, update context chaining vars */
648 ctx->X[0] = X00 ^ w[0];
649 ctx->X[1] = X01 ^ w[1];
650 ctx->X[2] = X02 ^ w[2];
651 ctx->X[3] = X03 ^ w[3];
652 ctx->X[4] = X04 ^ w[4];
653 ctx->X[5] = X05 ^ w[5];
654 ctx->X[6] = X06 ^ w[6];
655 ctx->X[7] = X07 ^ w[7];
656 ctx->X[8] = X08 ^ w[8];
657 ctx->X[9] = X09 ^ w[9];
658 ctx->X[10] = X10 ^ w[10];
659 ctx->X[11] = X11 ^ w[11];
660 ctx->X[12] = X12 ^ w[12];
661 ctx->X[13] = X13 ^ w[13];
662 ctx->X[14] = X14 ^ w[14];
663 ctx->X[15] = X15 ^ w[15];
665 Skein_Show_Round(BLK_BITS, &ctx->h, SKEIN_RND_FEED_FWD, ctx->X);
667 ts[1] &= ~SKEIN_T1_FLAG_FIRST;
668 blkPtr += SKEIN1024_BLOCK_BYTES;
670 while (--blkCnt);
671 ctx->h.T[0] = ts[0];
672 ctx->h.T[1] = ts[1];
675 #if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
676 size_t Skein1024_Process_Block_CodeSize(void)
678 return ((u8 *) Skein1024_Process_Block_CodeSize) -
679 ((u8 *) Skein1024_Process_Block);
681 unsigned int Skein1024_Unroll_Cnt(void)
683 return SKEIN_UNROLL_1024;
685 #endif
686 #endif