[gbx]
[oscam.git] / ffdecsa / stream.c
blob1bda8521c588b1a4f55c6ef6ed2f9fdea3563ba5
1 /* FFdecsa -- fast decsa algorithm
3 * Copyright (C) 2003-2004 fatih89r
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 // define statics only once, when STREAM_INIT
23 #ifdef STREAM_INIT
24 struct stream_regs {
25 group A[32+10][4]; // 32 because we will move back (virtual shift register)
26 group B[32+10][4]; // 32 because we will move back (virtual shift register)
27 group X[4];
28 group Y[4];
29 group Z[4];
30 group D[4];
31 group E[4];
32 group F[4];
33 group p;
34 group q;
35 group r;
38 static inline void trasp64_32_88ccw(unsigned char *data){
39 /* 64 rows of 32 bits transposition (bytes transp. - 8x8 rotate counterclockwise)*/
40 #define row ((unsigned int *)data)
41 int i,j;
42 for(j=0;j<64;j+=32){
43 unsigned int t,b;
44 for(i=0;i<16;i++){
45 t=row[j+i];
46 b=row[j+16+i];
47 row[j+i] = (t&0x0000ffff) | ((b )<<16);
48 row[j+16+i]=((t )>>16) | (b&0xffff0000) ;
51 for(j=0;j<64;j+=16){
52 unsigned int t,b;
53 for(i=0;i<8;i++){
54 t=row[j+i];
55 b=row[j+8+i];
56 row[j+i] = (t&0x00ff00ff) | ((b&0x00ff00ff)<<8);
57 row[j+8+i] =((t&0xff00ff00)>>8) | (b&0xff00ff00);
60 for(j=0;j<64;j+=8){
61 unsigned int t,b;
62 for(i=0;i<4;i++){
63 t=row[j+i];
64 b=row[j+4+i];
65 row[j+i] =((t&0x0f0f0f0f)<<4) | (b&0x0f0f0f0f);
66 row[j+4+i] = (t&0xf0f0f0f0) | ((b&0xf0f0f0f0)>>4);
69 for(j=0;j<64;j+=4){
70 unsigned int t,b;
71 for(i=0;i<2;i++){
72 t=row[j+i];
73 b=row[j+2+i];
74 row[j+i] =((t&0x33333333)<<2) | (b&0x33333333);
75 row[j+2+i] = (t&0xcccccccc) | ((b&0xcccccccc)>>2);
78 for(j=0;j<64;j+=2){
79 unsigned int t,b;
80 for(i=0;i<1;i++){
81 t=row[j+i];
82 b=row[j+1+i];
83 row[j+i] =((t&0x55555555)<<1) | (b&0x55555555);
84 row[j+1+i] = (t&0xaaaaaaaa) | ((b&0xaaaaaaaa)>>1);
87 #undef row
90 static inline void trasp64_32_88cw(unsigned char *data){
91 /* 64 rows of 32 bits transposition (bytes transp. - 8x8 rotate clockwise)*/
92 #define row ((unsigned int *)data)
93 int i,j;
94 for(j=0;j<64;j+=32){
95 unsigned int t,b;
96 for(i=0;i<16;i++){
97 t=row[j+i];
98 b=row[j+16+i];
99 row[j+i] = (t&0x0000ffff) | ((b )<<16);
100 row[j+16+i]=((t )>>16) | (b&0xffff0000) ;
103 for(j=0;j<64;j+=16){
104 unsigned int t,b;
105 for(i=0;i<8;i++){
106 t=row[j+i];
107 b=row[j+8+i];
108 row[j+i] = (t&0x00ff00ff) | ((b&0x00ff00ff)<<8);
109 row[j+8+i] =((t&0xff00ff00)>>8) | (b&0xff00ff00);
112 for(j=0;j<64;j+=8){
113 unsigned int t,b;
114 for(i=0;i<4;i++){
115 t=row[j+i];
116 b=row[j+4+i];
117 row[j+i] =((t&0xf0f0f0f0)>>4) | (b&0xf0f0f0f0);
118 row[j+4+i]= (t&0x0f0f0f0f) | ((b&0x0f0f0f0f)<<4);
121 for(j=0;j<64;j+=4){
122 unsigned int t,b;
123 for(i=0;i<2;i++){
124 t=row[j+i];
125 b=row[j+2+i];
126 row[j+i] =((t&0xcccccccc)>>2) | (b&0xcccccccc);
127 row[j+2+i]= (t&0x33333333) | ((b&0x33333333)<<2);
130 for(j=0;j<64;j+=2){
131 unsigned int t,b;
132 for(i=0;i<1;i++){
133 t=row[j+i];
134 b=row[j+1+i];
135 row[j+i] =((t&0xaaaaaaaa)>>1) | (b&0xaaaaaaaa);
136 row[j+1+i]= (t&0x55555555) | ((b&0x55555555)<<1);
139 #undef row
142 //64-64----------------------------------------------------------
143 static inline void trasp64_64_88ccw(unsigned char *data){
144 /* 64 rows of 64 bits transposition (bytes transp. - 8x8 rotate counterclockwise)*/
145 #define row ((unsigned long long int *)data)
146 int i,j;
147 for(j=0;j<64;j+=64){
148 unsigned long long int t,b;
149 for(i=0;i<32;i++){
150 t=row[j+i];
151 b=row[j+32+i];
152 row[j+i] = (t&0x00000000ffffffffULL) | ((b )<<32);
153 row[j+32+i]=((t )>>32) | (b&0xffffffff00000000ULL) ;
156 for(j=0;j<64;j+=32){
157 unsigned long long int t,b;
158 for(i=0;i<16;i++){
159 t=row[j+i];
160 b=row[j+16+i];
161 row[j+i] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16);
162 row[j+16+i]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ;
165 for(j=0;j<64;j+=16){
166 unsigned long long int t,b;
167 for(i=0;i<8;i++){
168 t=row[j+i];
169 b=row[j+8+i];
170 row[j+i] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8);
171 row[j+8+i] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL);
174 for(j=0;j<64;j+=8){
175 unsigned long long int t,b;
176 for(i=0;i<4;i++){
177 t=row[j+i];
178 b=row[j+4+i];
179 row[j+i] =((t&0x0f0f0f0f0f0f0f0fULL)<<4) | (b&0x0f0f0f0f0f0f0f0fULL);
180 row[j+4+i] = (t&0xf0f0f0f0f0f0f0f0ULL) | ((b&0xf0f0f0f0f0f0f0f0ULL)>>4);
183 for(j=0;j<64;j+=4){
184 unsigned long long int t,b;
185 for(i=0;i<2;i++){
186 t=row[j+i];
187 b=row[j+2+i];
188 row[j+i] =((t&0x3333333333333333ULL)<<2) | (b&0x3333333333333333ULL);
189 row[j+2+i] = (t&0xccccccccccccccccULL) | ((b&0xccccccccccccccccULL)>>2);
192 for(j=0;j<64;j+=2){
193 unsigned long long int t,b;
194 for(i=0;i<1;i++){
195 t=row[j+i];
196 b=row[j+1+i];
197 row[j+i] =((t&0x5555555555555555ULL)<<1) | (b&0x5555555555555555ULL);
198 row[j+1+i] = (t&0xaaaaaaaaaaaaaaaaULL) | ((b&0xaaaaaaaaaaaaaaaaULL)>>1);
201 #undef row
204 static inline void trasp64_64_88cw(unsigned char *data){
205 /* 64 rows of 64 bits transposition (bytes transp. - 8x8 rotate clockwise)*/
206 #define row ((unsigned long long int *)data)
207 int i,j;
208 for(j=0;j<64;j+=64){
209 unsigned long long int t,b;
210 for(i=0;i<32;i++){
211 t=row[j+i];
212 b=row[j+32+i];
213 row[j+i] = (t&0x00000000ffffffffULL) | ((b )<<32);
214 row[j+32+i]=((t )>>32) | (b&0xffffffff00000000ULL) ;
217 for(j=0;j<64;j+=32){
218 unsigned long long int t,b;
219 for(i=0;i<16;i++){
220 t=row[j+i];
221 b=row[j+16+i];
222 row[j+i] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16);
223 row[j+16+i]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ;
226 for(j=0;j<64;j+=16){
227 unsigned long long int t,b;
228 for(i=0;i<8;i++){
229 t=row[j+i];
230 b=row[j+8+i];
231 row[j+i] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8);
232 row[j+8+i] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL);
235 for(j=0;j<64;j+=8){
236 unsigned long long int t,b;
237 for(i=0;i<4;i++){
238 t=row[j+i];
239 b=row[j+4+i];
240 row[j+i] =((t&0xf0f0f0f0f0f0f0f0ULL)>>4) | (b&0xf0f0f0f0f0f0f0f0ULL);
241 row[j+4+i] = (t&0x0f0f0f0f0f0f0f0fULL) | ((b&0x0f0f0f0f0f0f0f0fULL)<<4);
244 for(j=0;j<64;j+=4){
245 unsigned long long int t,b;
246 for(i=0;i<2;i++){
247 t=row[j+i];
248 b=row[j+2+i];
249 row[j+i] =((t&0xccccccccccccccccULL)>>2) | (b&0xccccccccccccccccULL);
250 row[j+2+i] = (t&0x3333333333333333ULL) | ((b&0x3333333333333333ULL)<<2);
253 for(j=0;j<64;j+=2){
254 unsigned long long int t,b;
255 for(i=0;i<1;i++){
256 t=row[j+i];
257 b=row[j+1+i];
258 row[j+i] =((t&0xaaaaaaaaaaaaaaaaULL)>>1) | (b&0xaaaaaaaaaaaaaaaaULL);
259 row[j+1+i] = (t&0x5555555555555555ULL) | ((b&0x5555555555555555ULL)<<1);
262 #undef row
265 //64-128----------------------------------------------------------
266 static inline void trasp64_128_88ccw(unsigned char *data){
267 /* 64 rows of 128 bits transposition (bytes transp. - 8x8 rotate counterclockwise)*/
268 #define halfrow ((unsigned long long int *)data)
269 int i,j;
270 for(j=0;j<64;j+=64){
271 unsigned long long int t,b;
272 for(i=0;i<32;i++){
273 t=halfrow[2*(j+i)];
274 b=halfrow[2*(j+32+i)];
275 halfrow[2*(j+i)] = (t&0x00000000ffffffffULL) | ((b )<<32);
276 halfrow[2*(j+32+i)]=((t )>>32) | (b&0xffffffff00000000ULL) ;
277 t=halfrow[2*(j+i)+1];
278 b=halfrow[2*(j+32+i)+1];
279 halfrow[2*(j+i)+1] = (t&0x00000000ffffffffULL) | ((b )<<32);
280 halfrow[2*(j+32+i)+1]=((t )>>32) | (b&0xffffffff00000000ULL) ;
283 for(j=0;j<64;j+=32){
284 unsigned long long int t,b;
285 for(i=0;i<16;i++){
286 t=halfrow[2*(j+i)];
287 b=halfrow[2*(j+16+i)];
288 halfrow[2*(j+i)] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16);
289 halfrow[2*(j+16+i)]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ;
290 t=halfrow[2*(j+i)+1];
291 b=halfrow[2*(j+16+i)+1];
292 halfrow[2*(j+i)+1] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16);
293 halfrow[2*(j+16+i)+1]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ;
296 for(j=0;j<64;j+=16){
297 unsigned long long int t,b;
298 for(i=0;i<8;i++){
299 t=halfrow[2*(j+i)];
300 b=halfrow[2*(j+8+i)];
301 halfrow[2*(j+i)] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8);
302 halfrow[2*(j+8+i)] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL);
303 t=halfrow[2*(j+i)+1];
304 b=halfrow[2*(j+8+i)+1];
305 halfrow[2*(j+i)+1] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8);
306 halfrow[2*(j+8+i)+1] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL);
309 for(j=0;j<64;j+=8){
310 unsigned long long int t,b;
311 for(i=0;i<4;i++){
312 t=halfrow[2*(j+i)];
313 b=halfrow[2*(j+4+i)];
314 halfrow[2*(j+i)] =((t&0x0f0f0f0f0f0f0f0fULL)<<4) | (b&0x0f0f0f0f0f0f0f0fULL);
315 halfrow[2*(j+4+i)] = (t&0xf0f0f0f0f0f0f0f0ULL) | ((b&0xf0f0f0f0f0f0f0f0ULL)>>4);
316 t=halfrow[2*(j+i)+1];
317 b=halfrow[2*(j+4+i)+1];
318 halfrow[2*(j+i)+1] =((t&0x0f0f0f0f0f0f0f0fULL)<<4) | (b&0x0f0f0f0f0f0f0f0fULL);
319 halfrow[2*(j+4+i)+1] = (t&0xf0f0f0f0f0f0f0f0ULL) | ((b&0xf0f0f0f0f0f0f0f0ULL)>>4);
322 for(j=0;j<64;j+=4){
323 unsigned long long int t,b;
324 for(i=0;i<2;i++){
325 t=halfrow[2*(j+i)];
326 b=halfrow[2*(j+2+i)];
327 halfrow[2*(j+i)] =((t&0x3333333333333333ULL)<<2) | (b&0x3333333333333333ULL);
328 halfrow[2*(j+2+i)] = (t&0xccccccccccccccccULL) | ((b&0xccccccccccccccccULL)>>2);
329 t=halfrow[2*(j+i)+1];
330 b=halfrow[2*(j+2+i)+1];
331 halfrow[2*(j+i)+1] =((t&0x3333333333333333ULL)<<2) | (b&0x3333333333333333ULL);
332 halfrow[2*(j+2+i)+1] = (t&0xccccccccccccccccULL) | ((b&0xccccccccccccccccULL)>>2);
335 for(j=0;j<64;j+=2){
336 unsigned long long int t,b;
337 for(i=0;i<1;i++){
338 t=halfrow[2*(j+i)];
339 b=halfrow[2*(j+1+i)];
340 halfrow[2*(j+i)] =((t&0x5555555555555555ULL)<<1) | (b&0x5555555555555555ULL);
341 halfrow[2*(j+1+i)] = (t&0xaaaaaaaaaaaaaaaaULL) | ((b&0xaaaaaaaaaaaaaaaaULL)>>1);
342 t=halfrow[2*(j+i)+1];
343 b=halfrow[2*(j+1+i)+1];
344 halfrow[2*(j+i)+1] =((t&0x5555555555555555ULL)<<1) | (b&0x5555555555555555ULL);
345 halfrow[2*(j+1+i)+1] = (t&0xaaaaaaaaaaaaaaaaULL) | ((b&0xaaaaaaaaaaaaaaaaULL)>>1);
348 #undef halfrow
351 static inline void trasp64_128_88cw(unsigned char *data){
352 /* 64 rows of 128 bits transposition (bytes transp. - 8x8 rotate clockwise)*/
353 #define halfrow ((unsigned long long int *)data)
354 int i,j;
355 for(j=0;j<64;j+=64){
356 unsigned long long int t,b;
357 for(i=0;i<32;i++){
358 t=halfrow[2*(j+i)];
359 b=halfrow[2*(j+32+i)];
360 halfrow[2*(j+i)] = (t&0x00000000ffffffffULL) | ((b )<<32);
361 halfrow[2*(j+32+i)]=((t )>>32) | (b&0xffffffff00000000ULL) ;
362 t=halfrow[2*(j+i)+1];
363 b=halfrow[2*(j+32+i)+1];
364 halfrow[2*(j+i)+1] = (t&0x00000000ffffffffULL) | ((b )<<32);
365 halfrow[2*(j+32+i)+1]=((t )>>32) | (b&0xffffffff00000000ULL) ;
368 for(j=0;j<64;j+=32){
369 unsigned long long int t,b;
370 for(i=0;i<16;i++){
371 t=halfrow[2*(j+i)];
372 b=halfrow[2*(j+16+i)];
373 halfrow[2*(j+i)] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16);
374 halfrow[2*(j+16+i)]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ;
375 t=halfrow[2*(j+i)+1];
376 b=halfrow[2*(j+16+i)+1];
377 halfrow[2*(j+i)+1] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16);
378 halfrow[2*(j+16+i)+1]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ;
381 for(j=0;j<64;j+=16){
382 unsigned long long int t,b;
383 for(i=0;i<8;i++){
384 t=halfrow[2*(j+i)];
385 b=halfrow[2*(j+8+i)];
386 halfrow[2*(j+i)] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8);
387 halfrow[2*(j+8+i)] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL);
388 t=halfrow[2*(j+i)+1];
389 b=halfrow[2*(j+8+i)+1];
390 halfrow[2*(j+i)+1] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8);
391 halfrow[2*(j+8+i)+1] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL);
394 for(j=0;j<64;j+=8){
395 unsigned long long int t,b;
396 for(i=0;i<4;i++){
397 t=halfrow[2*(j+i)];
398 b=halfrow[2*(j+4+i)];
399 halfrow[2*(j+i)] =((t&0xf0f0f0f0f0f0f0f0ULL)>>4) | (b&0xf0f0f0f0f0f0f0f0ULL);
400 halfrow[2*(j+4+i)] = (t&0x0f0f0f0f0f0f0f0fULL) | ((b&0x0f0f0f0f0f0f0f0fULL)<<4);
401 t=halfrow[2*(j+i)+1];
402 b=halfrow[2*(j+4+i)+1];
403 halfrow[2*(j+i)+1] =((t&0xf0f0f0f0f0f0f0f0ULL)>>4) | (b&0xf0f0f0f0f0f0f0f0ULL);
404 halfrow[2*(j+4+i)+1] = (t&0x0f0f0f0f0f0f0f0fULL) | ((b&0x0f0f0f0f0f0f0f0fULL)<<4);
407 for(j=0;j<64;j+=4){
408 unsigned long long int t,b;
409 for(i=0;i<2;i++){
410 t=halfrow[2*(j+i)];
411 b=halfrow[2*(j+2+i)];
412 halfrow[2*(j+i)] =((t&0xccccccccccccccccULL)>>2) | (b&0xccccccccccccccccULL);
413 halfrow[2*(j+2+i)] = (t&0x3333333333333333ULL) | ((b&0x3333333333333333ULL)<<2);
414 t=halfrow[2*(j+i)+1];
415 b=halfrow[2*(j+2+i)+1];
416 halfrow[2*(j+i)+1] =((t&0xccccccccccccccccULL)>>2) | (b&0xccccccccccccccccULL);
417 halfrow[2*(j+2+i)+1] = (t&0x3333333333333333ULL) | ((b&0x3333333333333333ULL)<<2);
420 for(j=0;j<64;j+=2){
421 unsigned long long int t,b;
422 for(i=0;i<1;i++){
423 t=halfrow[2*(j+i)];
424 b=halfrow[2*(j+1+i)];
425 halfrow[2*(j+i)] =((t&0xaaaaaaaaaaaaaaaaULL)>>1) | (b&0xaaaaaaaaaaaaaaaaULL);
426 halfrow[2*(j+1+i)] = (t&0x5555555555555555ULL) | ((b&0x5555555555555555ULL)<<1);
427 t=halfrow[2*(j+i)+1];
428 b=halfrow[2*(j+1+i)+1];
429 halfrow[2*(j+i)+1] =((t&0xaaaaaaaaaaaaaaaaULL)>>1) | (b&0xaaaaaaaaaaaaaaaaULL);
430 halfrow[2*(j+1+i)+1] = (t&0x5555555555555555ULL) | ((b&0x5555555555555555ULL)<<1);
433 #undef halfrow
435 #endif
438 #ifdef STREAM_INIT
439 void stream_cypher_group_init(
440 struct stream_regs *regs,
441 group iA[8][4], // [In] iA00,iA01,...iA73 32 groups | Derived from key.
442 group iB[8][4], // [In] iB00,iB01,...iB73 32 groups | Derived from key.
443 unsigned char *sb) // [In] (SB0,SB1,...SB7)...x32 32*8 bytes | Extra input.
444 #endif
445 #ifdef STREAM_NORMAL
446 void stream_cypher_group_normal(
447 struct stream_regs *regs,
448 unsigned char *cb) // [Out] (CB0,CB1,...CB7)...x32 32*8 bytes | Output.
449 #endif
451 #ifdef STREAM_INIT
452 group in1[4];
453 group in2[4];
454 #endif
455 group extra_B[4];
456 group fa,fb,fc,fd,fe;
457 group s1a,s1b,s2a,s2b,s3a,s3b,s4a,s4b,s5a,s5b,s6a,s6b,s7a,s7b;
458 group next_E[4];
459 group tmp0,tmp1,tmp2,tmp3,tmp4;
460 #ifdef STREAM_INIT
461 group *sb_g=(group *)sb;
462 #endif
463 #ifdef STREAM_NORMAL
464 group *cb_g=(group *)cb;
465 #endif
466 int aboff;
467 int i,j,k,b;
468 int dbg;
470 #ifdef STREAM_INIT
471 DBG(fprintf(stderr,":::::::::: BEGIN STREAM INIT\n"));
472 #endif
473 #ifdef STREAM_NORMAL
474 DBG(fprintf(stderr,":::::::::: BEGIN STREAM NORMAL\n"));
475 #endif
476 #ifdef STREAM_INIT
477 for(j=0;j<64;j++){
478 DBG(fprintf(stderr,"precall prerot stream_in[%2i]=",j));
479 DBG(dump_mem("",sb+BYPG*j,BYPG,BYPG));
482 DBG(dump_mem("stream_prerot ",sb,GROUP_PARALLELISM*8,BYPG));
483 #if GROUP_PARALLELISM==32
484 trasp64_32_88ccw(sb);
485 #endif
486 #if GROUP_PARALLELISM==64
487 trasp64_64_88ccw(sb);
488 #endif
489 #if GROUP_PARALLELISM==128
490 trasp64_128_88ccw(sb);
491 #endif
492 DBG(dump_mem("stream_postrot",sb,GROUP_PARALLELISM*8,BYPG));
494 for(j=0;j<64;j++){
495 DBG(fprintf(stderr,"precall stream_in[%2i]=",j));
496 DBG(dump_mem("",sb+BYPG*j,BYPG,BYPG));
498 #endif
500 aboff=32;
502 #ifdef STREAM_INIT
503 // load first 32 bits of ck into A[aboff+0]..A[aboff+7]
504 // load last 32 bits of ck into B[aboff+0]..B[aboff+7]
505 // all other regs = 0
506 for(i=0;i<8;i++){
507 for(b=0;b<4;b++){
508 DBG(fprintf(stderr,"dbg from iA A[%i][%i]=",i,b));
509 DBG(dump_mem("",(unsigned char *)&iA[i][b],BYPG,BYPG));
510 DBG(fprintf(stderr," dbg from iB B[%i][%i]=",i,b));
511 DBG(dump_mem("",(unsigned char *)&iB[i][b],BYPG,BYPG));
512 regs->A[aboff+i][b]=iA[i][b];
513 regs->B[aboff+i][b]=iB[i][b];
516 for(b=0;b<4;b++){
517 regs->A[aboff+8][b]=FF0();
518 regs->A[aboff+9][b]=FF0();
519 regs->B[aboff+8][b]=FF0();
520 regs->B[aboff+9][b]=FF0();
522 for(b=0;b<4;b++){
523 regs->X[b]=FF0();
524 regs->Y[b]=FF0();
525 regs->Z[b]=FF0();
526 regs->D[b]=FF0();
527 regs->E[b]=FF0();
528 regs->F[b]=FF0();
530 regs->p=FF0();
531 regs->q=FF0();
532 regs->r=FF0();
533 #endif
535 for(dbg=0;dbg<4;dbg++){
536 DBG(fprintf(stderr,"dbg A0[%i]=",dbg));
537 DBG(dump_mem("",(unsigned char *)&regs->A[aboff+0][dbg],BYPG,BYPG));
538 DBG(fprintf(stderr,"dbg B0[%i]=",dbg));
539 DBG(dump_mem("",(unsigned char *)&regs->B[aboff+0][dbg],BYPG,BYPG));
542 ////////////////////////////////////////////////////////////////////////////////
544 // EXTERNAL LOOP - 8 bytes per operation
545 for(i=0;i<8;i++){
547 DBG(fprintf(stderr,"--BEGIN EXTERNAL LOOP %i\n",i));
549 #ifdef STREAM_INIT
550 for(b=0;b<4;b++){
551 in1[b]=sb_g[8*i+4+b];
552 in2[b]=sb_g[8*i+b];
554 #endif
556 // INTERNAL LOOP - 2 bits per iteration
557 for(j=0; j<4; j++){
559 DBG(fprintf(stderr,"---BEGIN INTERNAL LOOP %i (EXT %i, INT %i)\n",j,i,j));
561 // from A0..A9, 35 bits are selected as inputs to 7 s-boxes
562 // 5 bits input per s-box, 2 bits output per s-box
564 // we can select bits with zero masking and shifting operations
565 // and synthetize s-boxes with optimized boolean functions.
566 // this is the actual reason we do all the crazy transposition
567 // stuff to switch between normal and bit slice representations.
568 // this code really flies.
570 fe=regs->A[aboff+3][0];fa=regs->A[aboff+0][2];fb=regs->A[aboff+5][1];fc=regs->A[aboff+6][3];fd=regs->A[aboff+8][0];
571 /* 1000 1110 1110 0001 : lev 7: */ //tmp0=( fa^( fb^( ( ( ( fa|fb )^fc )|( fc^fd ) )^ALL_ONES ) ) );
572 /* 1110 0010 0011 0011 : lev 6: */ //tmp1=( ( fa|fb )^( ( fc&( fa|( fb^fd ) ) )^ALL_ONES ) );
573 /* 0011 0110 1000 1101 : lev 5: */ //tmp2=( fa^( ( fb&fd )^( ( fa&fd )|fc ) ) );
574 /* 0101 0101 1001 0011 : lev 5: */ //tmp3=( ( fa&fc )^( fa^( ( fa&fb )|fd ) ) );
575 /* 1000 1110 1110 0001 : lev 7: */ tmp0=FFXOR(fa,FFXOR(fb,FFXOR(FFOR(FFXOR(FFOR(fa,fb),fc),FFXOR(fc,fd)),FF1())));
576 /* 1110 0010 0011 0011 : lev 6: */ tmp1=FFXOR(FFOR(fa,fb),FFXOR(FFAND(fc,FFOR(fa,FFXOR(fb,fd))),FF1()));
577 /* 0011 0110 1000 1101 : lev 5: */ tmp2=FFXOR(fa,FFXOR(FFAND(fb,fd),FFOR(FFAND(fa,fd),fc)));
578 /* 0101 0101 1001 0011 : lev 5: */ tmp3=FFXOR(FFAND(fa,fc),FFXOR(fa,FFOR(FFAND(fa,fb),fd)));
579 s1a=FFXOR(tmp0,FFAND(fe,tmp1));
580 s1b=FFXOR(tmp2,FFAND(fe,tmp3));
581 //dump_mem("s1as1b-fe",&fe,BYPG,BYPG);
582 //dump_mem("s1as1b-fa",&fa,BYPG,BYPG);
583 //dump_mem("s1as1b-fb",&fb,BYPG,BYPG);
584 //dump_mem("s1as1b-fc",&fc,BYPG,BYPG);
585 //dump_mem("s1as1b-fd",&fd,BYPG,BYPG);
587 fe=regs->A[aboff+1][1];fa=regs->A[aboff+2][2];fb=regs->A[aboff+5][3];fc=regs->A[aboff+6][0];fd=regs->A[aboff+8][1];
588 /* 1001 1110 0110 0001 : lev 6: */ //tmp0=( fa^( ( fb&( fc|fd ) )^( fc^( fd^ALL_ONES ) ) ) );
589 /* 0000 0011 0111 1011 : lev 5: */ //tmp1=( ( fa&( fb^fd ) )|( ( fa|fb )&fc ) );
590 /* 1100 0110 1101 0010 : lev 6: */ //tmp2=( ( fb&fd )^( ( fa&fd )|( fb^( fc^ALL_ONES ) ) ) );
591 /* 0001 1110 1111 0101 : lev 5: */ //tmp3=( ( fa&fd )|( fa^( fb^( fc&fd ) ) ) );
592 /* 1001 1110 0110 0001 : lev 6: */ tmp0=FFXOR(fa,FFXOR(FFAND(fb,FFOR(fc,fd)),FFXOR(fc,FFXOR(fd,FF1()))));
593 /* 0000 0011 0111 1011 : lev 5: */ tmp1=FFOR(FFAND(fa,FFXOR(fb,fd)),FFAND(FFOR(fa,fb),fc));
594 /* 1100 0110 1101 0010 : lev 6: */ tmp2=FFXOR(FFAND(fb,fd),FFOR(FFAND(fa,fd),FFXOR(fb,FFXOR(fc,FF1()))));
595 /* 0001 1110 1111 0101 : lev 5: */ tmp3=FFOR(FFAND(fa,fd),FFXOR(fa,FFXOR(fb,FFAND(fc,fd))));
596 s2a=FFXOR(tmp0,FFAND(fe,tmp1));
597 s2b=FFXOR(tmp2,FFAND(fe,tmp3));
599 fe=regs->A[aboff+0][3];fa=regs->A[aboff+1][0];fb=regs->A[aboff+4][1];fc=regs->A[aboff+4][3];fd=regs->A[aboff+5][2];
600 /* 0100 1011 1001 0110 : lev 5: */ //tmp0=( fa^( fb^( ( fc&( fa|fd ) )^fd ) ) );
601 /* 1101 0101 1000 1100 : lev 7: */ //tmp1=( ( fa&fc )^( ( fa^fd )|( ( fb|fc )^( fd^ALL_ONES ) ) ) );
602 /* 0010 0111 1101 1000 : lev 4: */ //tmp2=( fa^( ( ( fb^fc )&fd )^fc ) );
603 /* 1111 1111 1111 1111 : lev 0: */ //tmp3=ALL_ONES;
604 /* 0100 1011 1001 0110 : lev 5: */ tmp0=FFXOR(fa,FFXOR(fb,FFXOR(FFAND(fc,FFOR(fa,fd)),fd)));
605 /* 1101 0101 1000 1100 : lev 7: */ tmp1=FFXOR(FFAND(fa,fc),FFOR(FFXOR(fa,fd),FFXOR(FFOR(fb,fc),FFXOR(fd,FF1()))));
606 /* 0010 0111 1101 1000 : lev 4: */ tmp2=FFXOR(fa,FFXOR(FFAND(FFXOR(fb,fc),fd),fc));
607 /* 1111 1111 1111 1111 : lev 0: */ tmp3=FF1();
608 s3a=FFXOR(tmp0,FFAND(FFNOT(fe),tmp1));
609 s3b=FFXOR(tmp2,FFAND(fe,tmp3));
611 fe=regs->A[aboff+2][3];fa=regs->A[aboff+0][1];fb=regs->A[aboff+1][3];fc=regs->A[aboff+3][2];fd=regs->A[aboff+7][0];
612 /* 1011 0101 0100 1001 : lev 7: */ //tmp0=( fa^( ( fc&( fa^fd ) )|( fb^( fc|( fd^ALL_ONES ) ) ) ) );
613 /* 0010 1101 0110 0110 : lev 6: */ //tmp1=( ( fa&fb )^( fb^( ( ( fa|fc )&fd )^fc ) ) );
614 /* 0110 0111 1101 0000 : lev 7: */ //tmp2=( fa^( ( fb&fc )|( ( ( fa&( fb^fd ) )|fc )^fd ) ) );
615 /* 1111 1111 1111 1111 : lev 0: */ //tmp3=ALL_ONES;
616 /* 1011 0101 0100 1001 : lev 7: */ tmp0=FFXOR(fa,FFOR(FFAND(fc,FFXOR(fa,fd)),FFXOR(fb,FFOR(fc,FFXOR(fd,FF1())))));
617 /* 0010 1101 0110 0110 : lev 6: */ tmp1=FFXOR(FFAND(fa,fb),FFXOR(fb,FFXOR(FFAND(FFOR(fa,fc),fd),fc)));
618 /* 0110 0111 1101 0000 : lev 7: */ tmp2=FFXOR(fa,FFOR(FFAND(fb,fc),FFXOR(FFOR(FFAND(fa,FFXOR(fb,fd)),fc),fd)));
619 /* 1111 1111 1111 1111 : lev 0: */ tmp3=FF1();
620 s4a=FFXOR(tmp0,FFAND(fe,FFXOR(tmp1,tmp0)));
621 s4b=FFXOR(FFXOR(s4a,tmp2),FFAND(fe,tmp3));
623 fe=regs->A[aboff+4][2];fa=regs->A[aboff+3][3];fb=regs->A[aboff+5][0];fc=regs->A[aboff+7][1];fd=regs->A[aboff+8][2];
624 /* 1000 1111 0011 0010 : lev 7: */ //tmp0=( ( ( fa&( fb|fc ) )^fb )|( ( ( fa^fc )|fd )^ALL_ONES ) );
625 /* 0110 1011 0000 1011 : lev 6: */ //tmp1=( fb^( ( fc^fd )&( fc^( fb|( fa^fd ) ) ) ) );
626 /* 0001 1010 0111 1001 : lev 6: */ //tmp2=( ( fa&fc )^( fb^( ( fb|( fa^fc ) )&fd ) ) );
627 /* 0101 1101 1101 0101 : lev 4: */ //tmp3=( ( ( fa^fb )&( fc^ALL_ONES ) )|fd );
628 /* 1000 1111 0011 0010 : lev 7: */ tmp0=FFOR(FFXOR(FFAND(fa,FFOR(fb,fc)),fb),FFXOR(FFOR(FFXOR(fa,fc),fd),FF1()));
629 /* 0110 1011 0000 1011 : lev 6: */ tmp1=FFXOR(fb,FFAND(FFXOR(fc,fd),FFXOR(fc,FFOR(fb,FFXOR(fa,fd)))));
630 /* 0001 1010 0111 1001 : lev 6: */ tmp2=FFXOR(FFAND(fa,fc),FFXOR(fb,FFAND(FFOR(fb,FFXOR(fa,fc)),fd)));
631 /* 0101 1101 1101 0101 : lev 4: */ tmp3=FFOR(FFAND(FFXOR(fa,fb),FFXOR(fc,FF1())),fd);
632 s5a=FFXOR(tmp0,FFAND(fe,tmp1));
633 s5b=FFXOR(tmp2,FFAND(fe,tmp3));
635 fe=regs->A[aboff+2][1];fa=regs->A[aboff+3][1];fb=regs->A[aboff+4][0];fc=regs->A[aboff+6][2];fd=regs->A[aboff+8][3];
636 /* 0011 0110 0010 1101 : lev 6: */ //tmp0=( ( ( fa&fc )&fd )^( ( fb&( fa|fd ) )^fc ) );
637 /* 1110 1110 1011 1011 : lev 3: */ //tmp1=( ( ( fa^fc )&fd )^ALL_ONES );
638 /* 0101 1000 0110 0111 : lev 6: */ //tmp2=( ( fa&( fb|fc ) )^( fb^( ( fb&fc )|fd ) ) );
639 /* 0001 0011 0000 0001 : lev 5: */ //tmp3=( fc&( ( fa&( fb^fd ) )^( fb|fd ) ) );
640 /* 0011 0110 0010 1101 : lev 6: */ tmp0=FFXOR(FFAND(FFAND(fa,fc),fd),FFXOR(FFAND(fb,FFOR(fa,fd)),fc));
641 /* 1110 1110 1011 1011 : lev 3: */ tmp1=FFXOR(FFAND(FFXOR(fa,fc),fd),FF1());
642 /* 0101 1000 0110 0111 : lev 6: */ tmp2=FFXOR(FFAND(fa,FFOR(fb,fc)),FFXOR(fb,FFOR(FFAND(fb,fc),fd)));
643 /* 0001 0011 0000 0001 : lev 5: */ tmp3=FFAND(fc,FFXOR(FFAND(fa,FFXOR(fb,fd)),FFOR(fb,fd)));
644 s6a=FFXOR(tmp0,FFAND(fe,tmp1));
645 s6b=FFXOR(tmp2,FFAND(fe,tmp3));
647 fe=regs->A[aboff+1][2];fa=regs->A[aboff+2][0];fb=regs->A[aboff+6][1];fc=regs->A[aboff+7][2];fd=regs->A[aboff+7][3];
648 /* 0111 1000 1001 0110 : lev 5: */ //tmp0=( fb^( ( fc&fd )|( fa^( fc^fd ) ) ) );
649 /* 0100 1001 0101 1011 : lev 6: */ //tmp1=( ( fb|fd )&( ( fa&fc )|( fb^( fc^fd ) ) ) );
650 /* 0100 1001 1011 1001 : lev 5: */ //tmp2=( ( fa|fb )^( ( fc&( fb|fd ) )^fd ) );
651 /* 1111 1111 1101 1101 : lev 3: */ //tmp3=( fd|( ( fa&fc )^ALL_ONES ) );
652 /* 0111 1000 1001 0110 : lev 5: */ tmp0=FFXOR(fb,FFOR(FFAND(fc,fd),FFXOR(fa,FFXOR(fc,fd))));
653 /* 0100 1001 0101 1011 : lev 6: */ tmp1=FFAND(FFOR(fb,fd),FFOR(FFAND(fa,fc),FFXOR(fb,FFXOR(fc,fd))));
654 /* 0100 1001 1011 1001 : lev 5: */ tmp2=FFXOR(FFOR(fa,fb),FFXOR(FFAND(fc,FFOR(fb,fd)),fd));
655 /* 1111 1111 1101 1101 : lev 3: */ tmp3=FFOR(fd,FFXOR(FFAND(fa,fc),FF1()));
656 s7a=FFXOR(tmp0,FFAND(fe,tmp1));
657 s7b=FFXOR(tmp2,FFAND(fe,tmp3));
661 we have just done this:
663 int sbox1[0x20] = {2,0,1,1,2,3,3,0, 3,2,2,0,1,1,0,3, 0,3,3,0,2,2,1,1, 2,2,0,3,1,1,3,0};
664 int sbox2[0x20] = {3,1,0,2,2,3,3,0, 1,3,2,1,0,0,1,2, 3,1,0,3,3,2,0,2, 0,0,1,2,2,1,3,1};
665 int sbox3[0x20] = {2,0,1,2,2,3,3,1, 1,1,0,3,3,0,2,0, 1,3,0,1,3,0,2,2, 2,0,1,2,0,3,3,1};
666 int sbox4[0x20] = {3,1,2,3,0,2,1,2, 1,2,0,1,3,0,0,3, 1,0,3,1,2,3,0,3, 0,3,2,0,1,2,2,1};
667 int sbox5[0x20] = {2,0,0,1,3,2,3,2, 0,1,3,3,1,0,2,1, 2,3,2,0,0,3,1,1, 1,0,3,2,3,1,0,2};
668 int sbox6[0x20] = {0,1,2,3,1,2,2,0, 0,1,3,0,2,3,1,3, 2,3,0,2,3,0,1,1, 2,1,1,2,0,3,3,0};
669 int sbox7[0x20] = {0,3,2,2,3,0,0,1, 3,0,1,3,1,2,2,1, 1,0,3,3,0,1,1,2, 2,3,1,0,2,3,0,2};
671 s12 = sbox1[ (((A3>>0)&1)<<4) | (((A0>>2)&1)<<3) | (((A5>>1)&1)<<2) | (((A6>>3)&1)<<1) | (((A8>>0)&1)<<0) ]
672 |sbox2[ (((A1>>1)&1)<<4) | (((A2>>2)&1)<<3) | (((A5>>3)&1)<<2) | (((A6>>0)&1)<<1) | (((A8>>1)&1)<<0) ];
673 s34 = sbox3[ (((A0>>3)&1)<<4) | (((A1>>0)&1)<<3) | (((A4>>1)&1)<<2) | (((A4>>3)&1)<<1) | (((A5>>2)&1)<<0) ]
674 |sbox4[ (((A2>>3)&1)<<4) | (((A0>>1)&1)<<3) | (((A1>>3)&1)<<2) | (((A3>>2)&1)<<1) | (((A7>>0)&1)<<0) ];
675 s56 = sbox5[ (((A4>>2)&1)<<4) | (((A3>>3)&1)<<3) | (((A5>>0)&1)<<2) | (((A7>>1)&1)<<1) | (((A8>>2)&1)<<0) ]
676 |sbox6[ (((A2>>1)&1)<<4) | (((A3>>1)&1)<<3) | (((A4>>0)&1)<<2) | (((A6>>2)&1)<<1) | (((A8>>3)&1)<<0) ];
677 s7 = sbox7[ (((A1>>2)&1)<<4) | (((A2>>0)&1)<<3) | (((A6>>1)&1)<<2) | (((A7>>2)&1)<<1) | (((A7>>3)&1)<<0) ];
680 // use 4x4 xor to produce extra nibble for T3
682 extra_B[3]=FFXOR(FFXOR(FFXOR(regs->B[aboff+2][0],regs->B[aboff+5][1]),regs->B[aboff+6][2]),regs->B[aboff+8][3]);
683 extra_B[2]=FFXOR(FFXOR(FFXOR(regs->B[aboff+5][0],regs->B[aboff+7][1]),regs->B[aboff+2][3]),regs->B[aboff+3][2]);
684 extra_B[1]=FFXOR(FFXOR(FFXOR(regs->B[aboff+4][3],regs->B[aboff+7][2]),regs->B[aboff+3][0]),regs->B[aboff+4][1]);
685 extra_B[0]=FFXOR(FFXOR(FFXOR(regs->B[aboff+8][2],regs->B[aboff+5][3]),regs->B[aboff+2][1]),regs->B[aboff+7][0]);
686 for(dbg=0;dbg<4;dbg++){
687 DBG(fprintf(stderr,"extra_B[%i]=",dbg));
688 DBG(dump_mem("",(unsigned char *)&extra_B[dbg],BYPG,BYPG));
691 // T1 = xor all inputs
692 // in1, in2, D are only used in T1 during initialisation, not generation
693 for(b=0;b<4;b++){
694 regs->A[aboff-1][b]=FFXOR(regs->A[aboff+9][b],regs->X[b]);
697 #ifdef STREAM_INIT
698 for(b=0;b<4;b++){
699 regs->A[aboff-1][b]=FFXOR(FFXOR(regs->A[aboff-1][b],regs->D[b]),((j % 2) ? in2[b] : in1[b]));
701 #endif
703 for(dbg=0;dbg<4;dbg++){
704 DBG(fprintf(stderr,"next_A0[%i]=",dbg));
705 DBG(dump_mem("",(unsigned char *)&regs->A[aboff-1][dbg],BYPG,BYPG));
708 // T2 = xor all inputs
709 // in1, in2 are only used in T1 during initialisation, not generation
710 // if p=0, use this, if p=1, rotate the result left
711 for(b=0;b<4;b++){
712 regs->B[aboff-1][b]=FFXOR(FFXOR(regs->B[aboff+6][b],regs->B[aboff+9][b]),regs->Y[b]);
715 #ifdef STREAM_INIT
716 for(b=0;b<4;b++){
717 regs->B[aboff-1][b]=FFXOR(regs->B[aboff-1][b],((j % 2) ? in1[b] : in2[b]));
719 #endif
721 for(dbg=0;dbg<4;dbg++){
722 DBG(fprintf(stderr,"next_B0[%i]=",dbg));
723 DBG(dump_mem("",(unsigned char *)&regs->B[aboff-1][dbg],BYPG,BYPG));
726 // if p=1, rotate left (yes, this is what we're doing)
727 tmp3=regs->B[aboff-1][3];
728 regs->B[aboff-1][3]=FFXOR(regs->B[aboff-1][3],FFAND(FFXOR(regs->B[aboff-1][3],regs->B[aboff-1][2]),regs->p));
729 regs->B[aboff-1][2]=FFXOR(regs->B[aboff-1][2],FFAND(FFXOR(regs->B[aboff-1][2],regs->B[aboff-1][1]),regs->p));
730 regs->B[aboff-1][1]=FFXOR(regs->B[aboff-1][1],FFAND(FFXOR(regs->B[aboff-1][1],regs->B[aboff-1][0]),regs->p));
731 regs->B[aboff-1][0]=FFXOR(regs->B[aboff-1][0],FFAND(FFXOR(regs->B[aboff-1][0],tmp3),regs->p));
733 for(dbg=0;dbg<4;dbg++){
734 DBG(fprintf(stderr,"next_B0[%i]=",dbg));
735 DBG(dump_mem("",(unsigned char *)&regs->B[aboff-1][dbg],BYPG,BYPG));
738 // T3 = xor all inputs
739 for(b=0;b<4;b++){
740 regs->D[b]=FFXOR(FFXOR(regs->E[b],regs->Z[b]),extra_B[b]);
743 for(dbg=0;dbg<4;dbg++){
744 DBG(fprintf(stderr,"D[%i]=",dbg));
745 DBG(dump_mem("",(unsigned char *)&regs->D[dbg],BYPG,BYPG));
748 // T4 = sum, carry of Z + E + r
749 for(b=0;b<4;b++){
750 next_E[b]=regs->F[b];
753 tmp0=FFXOR(regs->Z[0],regs->E[0]);
754 tmp1=FFAND(regs->Z[0],regs->E[0]);
755 regs->F[0]=FFXOR(regs->E[0],FFAND(regs->q,FFXOR(regs->Z[0],regs->r)));
756 tmp3=FFAND(tmp0,regs->r);
757 tmp4=FFOR(tmp1,tmp3);
759 tmp0=FFXOR(regs->Z[1],regs->E[1]);
760 tmp1=FFAND(regs->Z[1],regs->E[1]);
761 regs->F[1]=FFXOR(regs->E[1],FFAND(regs->q,FFXOR(regs->Z[1],tmp4)));
762 tmp3=FFAND(tmp0,tmp4);
763 tmp4=FFOR(tmp1,tmp3);
765 tmp0=FFXOR(regs->Z[2],regs->E[2]);
766 tmp1=FFAND(regs->Z[2],regs->E[2]);
767 regs->F[2]=FFXOR(regs->E[2],FFAND(regs->q,FFXOR(regs->Z[2],tmp4)));
768 tmp3=FFAND(tmp0,tmp4);
769 tmp4=FFOR(tmp1,tmp3);
771 tmp0=FFXOR(regs->Z[3],regs->E[3]);
772 tmp1=FFAND(regs->Z[3],regs->E[3]);
773 regs->F[3]=FFXOR(regs->E[3],FFAND(regs->q,FFXOR(regs->Z[3],tmp4)));
774 tmp3=FFAND(tmp0,tmp4);
775 regs->r=FFXOR(regs->r,FFAND(regs->q,FFXOR(FFOR(tmp1,tmp3),regs->r))); // ultimate carry
778 we have just done this: (believe it or not)
780 if (q) {
781 F = Z + E + r;
782 r = (F >> 4) & 1;
783 F = F & 0x0f;
785 else {
786 F = E;
789 for(b=0;b<4;b++){
790 regs->E[b]=next_E[b];
792 for(dbg=0;dbg<4;dbg++){
793 DBG(fprintf(stderr,"F[%i]=",dbg));
794 DBG(dump_mem("",(unsigned char *)&regs->F[dbg],BYPG,BYPG));
796 DBG(fprintf(stderr,"r="));
797 DBG(dump_mem("",(unsigned char *)&regs->r,BYPG,BYPG));
798 for(dbg=0;dbg<4;dbg++){
799 DBG(fprintf(stderr,"E[%i]=",dbg));
800 DBG(dump_mem("",(unsigned char *)&regs->E[dbg],BYPG,BYPG));
803 // this simple instruction is virtually shifting all the shift registers
804 aboff--;
807 we've just done this:
809 A9=A8;A8=A7;A7=A6;A6=A5;A5=A4;A4=A3;A3=A2;A2=A1;A1=A0;A0=next_A0;
810 B9=B8;B8=B7;B7=B6;B6=B5;B5=B4;B4=B3;B3=B2;B2=B1;B1=B0;B0=next_B0;
813 regs->X[0]=s1a;
814 regs->X[1]=s2a;
815 regs->X[2]=s3b;
816 regs->X[3]=s4b;
817 regs->Y[0]=s3a;
818 regs->Y[1]=s4a;
819 regs->Y[2]=s5b;
820 regs->Y[3]=s6b;
821 regs->Z[0]=s5a;
822 regs->Z[1]=s6a;
823 regs->Z[2]=s1b;
824 regs->Z[3]=s2b;
825 regs->p=s7a;
826 regs->q=s7b;
827 for(dbg=0;dbg<4;dbg++){
828 DBG(fprintf(stderr,"X[%i]=",dbg));
829 DBG(dump_mem("",(unsigned char *)&regs->X[dbg],BYPG,BYPG));
831 for(dbg=0;dbg<4;dbg++){
832 DBG(fprintf(stderr,"Y[%i]=",dbg));
833 DBG(dump_mem("",(unsigned char *)&regs->Y[dbg],BYPG,BYPG));
835 for(dbg=0;dbg<4;dbg++){
836 DBG(fprintf(stderr,"Z[%i]=",dbg));
837 DBG(dump_mem("",(unsigned char *)&regs->Z[dbg],BYPG,BYPG));
839 DBG(fprintf(stderr,"p="));
840 DBG(dump_mem("",(unsigned char *)&regs->p,BYPG,BYPG));
841 DBG(fprintf(stderr,"q="));
842 DBG(dump_mem("",(unsigned char *)&regs->q,BYPG,BYPG));
844 #ifdef STREAM_NORMAL
845 // require 4 loops per output byte
846 // 2 output bits are a function of the 4 bits of D
847 // xor 2 by 2
848 cb_g[8*i+7-2*j]=FFXOR(regs->D[2],regs->D[3]);
849 cb_g[8*i+6-2*j]=FFXOR(regs->D[0],regs->D[1]);
850 for(dbg=0;dbg<8;dbg++){
851 DBG(fprintf(stderr,"op[%i]=",dbg));
852 DBG(dump_mem("",(unsigned char *)&cb_g[8*i+dbg],BYPG,BYPG));
854 #endif
856 DBG(fprintf(stderr,"---END INTERNAL LOOP\n"));
858 } // INTERNAL LOOP
860 DBG(fprintf(stderr,"--END EXTERNAL LOOP\n"));
862 } // EXTERNAL LOOP
864 // move 32 steps forward, ready for next call
865 for(k=0;k<10;k++){
866 for(b=0;b<4;b++){
867 DBG(fprintf(stderr,"moving forward AB k=%i b=%i\n",k,b));
868 regs->A[32+k][b]=regs->A[k][b];
869 regs->B[32+k][b]=regs->B[k][b];
874 ////////////////////////////////////////////////////////////////////////////////
876 #ifdef STREAM_NORMAL
877 for(j=0;j<64;j++){
878 DBG(fprintf(stderr,"postcall prerot cb[%2i]=",j));
879 DBG(dump_mem("",(unsigned char *)(cb+BYPG*j),BYPG,BYPG));
882 #if GROUP_PARALLELISM==32
883 trasp64_32_88cw(cb);
884 #endif
885 #if GROUP_PARALLELISM==64
886 trasp64_64_88cw(cb);
887 #endif
888 #if GROUP_PARALLELISM==128
889 trasp64_128_88cw(cb);
890 #endif
892 for(j=0;j<64;j++){
893 DBG(fprintf(stderr,"postcall postrot cb[%2i]=",j));
894 DBG(dump_mem("",(unsigned char *)(cb+BYPG*j),BYPG,BYPG));
896 #endif
898 #ifdef STREAM_INIT
899 DBG(fprintf(stderr,":::::::::: END STREAM INIT\n"));
900 #endif
901 #ifdef STREAM_NORMAL
902 DBG(fprintf(stderr,":::::::::: END STREAM NORMAL\n"));
903 #endif