[gbx]
[oscam.git] / ffdecsa / ffdecsa.c
blob2c7169a796e999f63f8be157cf055ca76b3112ca
1 /* FFdecsa -- fast decsa algorithm
3 * Copyright (C) 2003-2004 fatih89r
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 #include <sys/types.h>
22 #include <string.h>
23 #include <stdio.h>
24 #include <stdlib.h>
26 #include "ffdecsa.h"
28 #ifndef NULL
29 #define NULL 0
30 #endif
32 //#define DEBUG
33 #ifdef DEBUG
34 #define DBG(a) a
35 #else
36 #define DBG(a)
37 #endif
39 //// parallelization stuff, large speed differences are possible
40 // possible choices
41 #define PARALLEL_32_4CHAR 320
42 #define PARALLEL_32_4CHARA 321
43 #define PARALLEL_32_INT 322
44 #define PARALLEL_64_8CHAR 640
45 #define PARALLEL_64_8CHARA 641
46 #define PARALLEL_64_2INT 642
47 #define PARALLEL_64_LONG 643
48 #define PARALLEL_64_MMX 644
49 #define PARALLEL_128_16CHAR 1280
50 #define PARALLEL_128_16CHARA 1281
51 #define PARALLEL_128_4INT 1282
52 #define PARALLEL_128_2LONG 1283
53 #define PARALLEL_128_2MMX 1284
54 #define PARALLEL_128_SSE 1285
55 #define PARALLEL_128_SSE2 1286
57 //////// our choice //////////////// our choice //////////////// our choice //////////////// our choice ////////
58 #ifndef PARALLEL_MODE
60 #if defined(__x86_64__) || defined(_M_X64)
61 #define PARALLEL_MODE PARALLEL_128_SSE2
63 #elif defined(__mips__) || defined(__mips) || defined(__MIPS__)
64 #define PARALLEL_MODE PARALLEL_64_LONG
66 #elif defined(__sh__) || defined(__SH4__)
67 #define PARALLEL_MODE PARALLEL_32_INT
68 #define COPY_UNALIGNED_PKT
69 #define MEMALIGN_VAL 4
71 #else
72 #define PARALLEL_MODE PARALLEL_32_INT
73 #endif
75 #endif
76 //////// our choice //////////////// our choice //////////////// our choice //////////////// our choice ////////
78 #include "parallel_generic.h"
79 //// conditionals
80 #if PARALLEL_MODE==PARALLEL_32_4CHAR
81 #include "parallel_032_4char.h"
82 #elif PARALLEL_MODE==PARALLEL_32_4CHARA
83 #include "parallel_032_4charA.h"
84 #elif PARALLEL_MODE==PARALLEL_32_INT
85 #include "parallel_032_int.h"
86 #elif PARALLEL_MODE==PARALLEL_64_8CHAR
87 #include "parallel_064_8char.h"
88 #elif PARALLEL_MODE==PARALLEL_64_8CHARA
89 #include "parallel_064_8charA.h"
90 #elif PARALLEL_MODE==PARALLEL_64_2INT
91 #include "parallel_064_2int.h"
92 #elif PARALLEL_MODE==PARALLEL_64_LONG
93 #include "parallel_064_long.h"
94 #elif PARALLEL_MODE==PARALLEL_64_MMX
95 #include "parallel_064_mmx.h"
96 #elif PARALLEL_MODE==PARALLEL_128_16CHAR
97 #include "parallel_128_16char.h"
98 #elif PARALLEL_MODE==PARALLEL_128_16CHARA
99 #include "parallel_128_16charA.h"
100 #elif PARALLEL_MODE==PARALLEL_128_4INT
101 #include "parallel_128_4int.h"
102 #elif PARALLEL_MODE==PARALLEL_128_2LONG
103 #include "parallel_128_2long.h"
104 #elif PARALLEL_MODE==PARALLEL_128_2MMX
105 #include "parallel_128_2mmx.h"
106 #elif PARALLEL_MODE==PARALLEL_128_SSE
107 #include "parallel_128_sse.h"
108 #elif PARALLEL_MODE==PARALLEL_128_SSE2
109 #include "parallel_128_sse2.h"
110 #else
111 #error "unknown/undefined parallel mode"
112 #endif
114 // stuff depending on conditionals
116 #define BYTES_PER_GROUP (GROUP_PARALLELISM/8)
117 #define BYPG BYTES_PER_GROUP
118 #define BITS_PER_GROUP GROUP_PARALLELISM
119 #define BIPG BITS_PER_GROUP
121 // platform specific
123 #ifdef __arm__
124 #if !defined(MEMALIGN_VAL) || MEMALIGN_VAL<4
125 #undef MEMALIGN_VAL
126 #define MEMALIGN_VAL 4
127 #endif
128 #define COPY_UNALIGNED_PKT
129 #endif
133 #ifndef MALLOC
134 #define MALLOC(X) malloc(X)
135 #endif
136 #ifndef FREE
137 #define FREE(X) free(X)
138 #endif
139 #ifdef MEMALIGN_VAL
140 #define MEMALIGN __attribute__((aligned(MEMALIGN_VAL)))
141 #else
142 #define MEMALIGN
143 #endif
145 //// debug tool
147 #ifdef DEBUG
148 static void dump_mem(const char *string, const unsigned char *p, int len, int linelen){
149 int i;
150 for(i=0;i<len;i++){
151 if(i%linelen==0&&i) fprintf(stderr,"\n");
152 if(i%linelen==0) fprintf(stderr,"%s %08x:",string,i);
153 else{
154 if(i%8==0) fprintf(stderr," ");
155 if(i%4==0) fprintf(stderr," ");
157 fprintf(stderr," %02x",p[i]);
159 if(i%linelen==0) fprintf(stderr,"\n");
161 #endif
163 //////////////////////////////////////////////////////////////////////////////////
165 struct csa_key_t{
166 unsigned char ck[8];
167 // used by stream
168 int iA[8]; // iA[0] is for A1, iA[7] is for A8
169 int iB[8]; // iB[0] is for B1, iB[7] is for B8
170 // used by stream (group)
171 MEMALIGN group ck_g[8][8]; // [byte][bit:0=LSB,7=MSB]
172 MEMALIGN group iA_g[8][4]; // [0 for A1][0 for LSB]
173 MEMALIGN group iB_g[8][4]; // [0 for B1][0 for LSB]
174 // used by block
175 unsigned char kk[56];
176 // used by block (group)
177 MEMALIGN batch kkmulti[56]; // many times the same byte in every batch
180 struct csa_keys_t{
181 struct csa_key_t even;
182 struct csa_key_t odd;
185 //-----stream cypher
187 //-----key schedule for stream decypher
188 static void key_schedule_stream(
189 unsigned char *ck, // [In] ck[0]-ck[7] 8 bytes | Key.
190 int *iA, // [Out] iA[0]-iA[7] 8 nibbles | Key schedule.
191 int *iB) // [Out] iB[0]-iB[7] 8 nibbles | Key schedule.
193 iA[0]=(ck[0]>>4)&0xf;
194 iA[1]=(ck[0] )&0xf;
195 iA[2]=(ck[1]>>4)&0xf;
196 iA[3]=(ck[1] )&0xf;
197 iA[4]=(ck[2]>>4)&0xf;
198 iA[5]=(ck[2] )&0xf;
199 iA[6]=(ck[3]>>4)&0xf;
200 iA[7]=(ck[3] )&0xf;
201 iB[0]=(ck[4]>>4)&0xf;
202 iB[1]=(ck[4] )&0xf;
203 iB[2]=(ck[5]>>4)&0xf;
204 iB[3]=(ck[5] )&0xf;
205 iB[4]=(ck[6]>>4)&0xf;
206 iB[5]=(ck[6] )&0xf;
207 iB[6]=(ck[7]>>4)&0xf;
208 iB[7]=(ck[7] )&0xf;
211 //----- stream main function
213 #define STREAM_INIT
214 #include "stream.c"
215 #undef STREAM_INIT
217 #define STREAM_NORMAL
218 #include "stream.c"
219 #undef STREAM_NORMAL
222 //-----block decypher
224 //-----key schedule for block decypher
226 static void key_schedule_block(
227 unsigned char *ck, // [In] ck[0]-ck[7] 8 bytes | Key.
228 unsigned char *kk) // [Out] kk[0]-kk[55] 56 bytes | Key schedule.
230 static const unsigned char key_perm[0x40] = {
231 0x12,0x24,0x09,0x07,0x2A,0x31,0x1D,0x15, 0x1C,0x36,0x3E,0x32,0x13,0x21,0x3B,0x40,
232 0x18,0x14,0x25,0x27,0x02,0x35,0x1B,0x01, 0x22,0x04,0x0D,0x0E,0x39,0x28,0x1A,0x29,
233 0x33,0x23,0x34,0x0C,0x16,0x30,0x1E,0x3A, 0x2D,0x1F,0x08,0x19,0x17,0x2F,0x3D,0x11,
234 0x3C,0x05,0x38,0x2B,0x0B,0x06,0x0A,0x2C, 0x20,0x3F,0x2E,0x0F,0x03,0x26,0x10,0x37,
237 int i,j,k;
238 int bit[64];
239 int newbit[64];
240 int kb[7][8];
242 // 56 steps
243 // 56 key bytes kk(55)..kk(0) by key schedule from ck
245 // kb(6,0) .. kb(6,7) = ck(0) .. ck(7)
246 kb[6][0] = ck[0];
247 kb[6][1] = ck[1];
248 kb[6][2] = ck[2];
249 kb[6][3] = ck[3];
250 kb[6][4] = ck[4];
251 kb[6][5] = ck[5];
252 kb[6][6] = ck[6];
253 kb[6][7] = ck[7];
255 // calculate kb[5] .. kb[0]
256 for(i=5; i>=0; i--){
257 // 64 bit perm on kb
258 for(j=0; j<8; j++){
259 for(k=0; k<8; k++){
260 bit[j*8+k] = (kb[i+1][j] >> (7-k)) & 1;
261 newbit[key_perm[j*8+k]-1] = bit[j*8+k];
264 for(j=0; j<8; j++){
265 kb[i][j] = 0;
266 for(k=0; k<8; k++){
267 kb[i][j] |= newbit[j*8+k] << (7-k);
272 // xor to give kk
273 for(i=0; i<7; i++){
274 for(j=0; j<8; j++){
275 kk[i*8+j] = kb[i][j] ^ i;
281 //-----block utils
283 static inline __attribute__((always_inline)) void trasp_N_8 (unsigned char *in,unsigned char* out,int count){
284 int *ri=(int *)in;
285 int *ibi=(int *)out;
286 int j,i,k,g;
287 // copy and first step
288 for(g=0;g<count;g++){
289 ri[g]=ibi[2*g];
290 ri[GROUP_PARALLELISM+g]=ibi[2*g+1];
292 //dump_mem("NE1 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
293 // now 01230123
294 #define INTS_PER_ROW (GROUP_PARALLELISM/8*2)
295 for(j=0;j<8;j+=4){
296 for(i=0;i<2;i++){
297 for(k=0;k<INTS_PER_ROW;k++){
298 unsigned int t,b;
299 t=ri[INTS_PER_ROW*(j+i)+k];
300 b=ri[INTS_PER_ROW*(j+i+2)+k];
301 ri[INTS_PER_ROW*(j+i)+k]= (t&0x0000ffff) | ((b )<<16);
302 ri[INTS_PER_ROW*(j+i+2)+k]= ((t )>>16) | (b&0xffff0000) ;
306 //dump_mem("NE2 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
307 // now 01010101
308 for(j=0;j<8;j+=2){
309 for(i=0;i<1;i++){
310 for(k=0;k<INTS_PER_ROW;k++){
311 unsigned int t,b;
312 t=ri[INTS_PER_ROW*(j+i)+k];
313 b=ri[INTS_PER_ROW*(j+i+1)+k];
314 ri[INTS_PER_ROW*(j+i)+k]= (t&0x00ff00ff) | ((b&0x00ff00ff)<<8);
315 ri[INTS_PER_ROW*(j+i+1)+k]= ((t&0xff00ff00)>>8) | (b&0xff00ff00);
319 //dump_mem("NE3 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
320 // now 00000000
323 static inline __attribute__((always_inline)) void trasp_8_N (unsigned char *in,unsigned char* out,int count){
324 int *ri=(int *)in;
325 int *bdi=(int *)out;
326 int j,i,k,g;
327 #define INTS_PER_ROW (GROUP_PARALLELISM/8*2)
328 //dump_mem("NE1 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
329 // now 00000000
330 for(j=0;j<8;j+=2){
331 for(i=0;i<1;i++){
332 for(k=0;k<INTS_PER_ROW;k++){
333 unsigned int t,b;
334 t=ri[INTS_PER_ROW*(j+i)+k];
335 b=ri[INTS_PER_ROW*(j+i+1)+k];
336 ri[INTS_PER_ROW*(j+i)+k]= (t&0x00ff00ff) | ((b&0x00ff00ff)<<8);
337 ri[INTS_PER_ROW*(j+i+1)+k]= ((t&0xff00ff00)>>8) | (b&0xff00ff00);
341 //dump_mem("NE2 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
342 // now 01010101
343 for(j=0;j<8;j+=4){
344 for(i=0;i<2;i++){
345 for(k=0;k<INTS_PER_ROW;k++){
346 unsigned int t,b;
347 t=ri[INTS_PER_ROW*(j+i)+k];
348 b=ri[INTS_PER_ROW*(j+i+2)+k];
349 ri[INTS_PER_ROW*(j+i)+k]= (t&0x0000ffff) | ((b )<<16);
350 ri[INTS_PER_ROW*(j+i+2)+k]= ((t )>>16) | (b&0xffff0000) ;
354 //dump_mem("NE3 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
355 // now 01230123
356 for(g=0;g<count;g++){
357 bdi[2*g]=ri[g];
358 bdi[2*g+1]=ri[GROUP_PARALLELISM+g];
362 //-----block main function
364 // block group
365 static void block_decypher_group(
366 batch *kkmulti, // [In] kkmulti[0]-kkmulti[55] 56 batches | Key schedule (each batch has repeated equal bytes).
367 unsigned char *ib, // [In] (ib0,ib1,...ib7)...x32 32*8 bytes | Initialization vector.
368 unsigned char *bd, // [Out] (bd0,bd1,...bd7)...x32 32*8 bytes | Block decipher.
369 int count)
371 // int is faster than unsigned char. apparently not
372 static const unsigned char block_sbox[0x100] = {
373 0x3A,0xEA,0x68,0xFE,0x33,0xE9,0x88,0x1A, 0x83,0xCF,0xE1,0x7F,0xBA,0xE2,0x38,0x12,
374 0xE8,0x27,0x61,0x95,0x0C,0x36,0xE5,0x70, 0xA2,0x06,0x82,0x7C,0x17,0xA3,0x26,0x49,
375 0xBE,0x7A,0x6D,0x47,0xC1,0x51,0x8F,0xF3, 0xCC,0x5B,0x67,0xBD,0xCD,0x18,0x08,0xC9,
376 0xFF,0x69,0xEF,0x03,0x4E,0x48,0x4A,0x84, 0x3F,0xB4,0x10,0x04,0xDC,0xF5,0x5C,0xC6,
377 0x16,0xAB,0xAC,0x4C,0xF1,0x6A,0x2F,0x3C, 0x3B,0xD4,0xD5,0x94,0xD0,0xC4,0x63,0x62,
378 0x71,0xA1,0xF9,0x4F,0x2E,0xAA,0xC5,0x56, 0xE3,0x39,0x93,0xCE,0x65,0x64,0xE4,0x58,
379 0x6C,0x19,0x42,0x79,0xDD,0xEE,0x96,0xF6, 0x8A,0xEC,0x1E,0x85,0x53,0x45,0xDE,0xBB,
380 0x7E,0x0A,0x9A,0x13,0x2A,0x9D,0xC2,0x5E, 0x5A,0x1F,0x32,0x35,0x9C,0xA8,0x73,0x30,
382 0x29,0x3D,0xE7,0x92,0x87,0x1B,0x2B,0x4B, 0xA5,0x57,0x97,0x40,0x15,0xE6,0xBC,0x0E,
383 0xEB,0xC3,0x34,0x2D,0xB8,0x44,0x25,0xA4, 0x1C,0xC7,0x23,0xED,0x90,0x6E,0x50,0x00,
384 0x99,0x9E,0x4D,0xD9,0xDA,0x8D,0x6F,0x5F, 0x3E,0xD7,0x21,0x74,0x86,0xDF,0x6B,0x05,
385 0x8E,0x5D,0x37,0x11,0xD2,0x28,0x75,0xD6, 0xA7,0x77,0x24,0xBF,0xF0,0xB0,0x02,0xB7,
386 0xF8,0xFC,0x81,0x09,0xB1,0x01,0x76,0x91, 0x7D,0x0F,0xC8,0xA0,0xF2,0xCB,0x78,0x60,
387 0xD1,0xF7,0xE0,0xB5,0x98,0x22,0xB3,0x20, 0x1D,0xA6,0xDB,0x7B,0x59,0x9F,0xAE,0x31,
388 0xFB,0xD3,0xB6,0xCA,0x43,0x72,0x07,0xF4, 0xD8,0x41,0x14,0x55,0x0D,0x54,0x8B,0xB9,
389 0xAD,0x46,0x0B,0xAF,0x80,0x52,0x2C,0xFA, 0x8C,0x89,0x66,0xFD,0xB2,0xA9,0x9B,0xC0,
391 MEMALIGN unsigned char r[GROUP_PARALLELISM*(8+56)]; /* 56 because we will move back in memory while looping */
392 MEMALIGN unsigned char sbox_in[GROUP_PARALLELISM],sbox_out[GROUP_PARALLELISM],perm_out[GROUP_PARALLELISM];
393 int roff;
394 int i,g,count_all=GROUP_PARALLELISM;
396 roff=GROUP_PARALLELISM*56;
398 #define FASTTRASP1
399 #ifndef FASTTRASP1
400 for(g=0;g<count;g++){
401 // Init registers
402 int j;
403 for(j=0;j<8;j++){
404 r[roff+GROUP_PARALLELISM*j+g]=ib[8*g+j];
407 #else
408 trasp_N_8((unsigned char *)&r[roff],(unsigned char *)ib,count);
409 #endif
410 //dump_mem("OLD r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
412 // loop over kk[55]..kk[0]
413 for(i=55;i>=0;i--){
415 MEMALIGN batch tkkmulti=kkmulti[i];
416 batch *si=(batch *)sbox_in;
417 batch *r6_N=(batch *)(r+roff+GROUP_PARALLELISM*6);
418 for(g=0;g<count_all/BYTES_PER_BATCH;g++){
419 si[g]=B_FFXOR(tkkmulti,r6_N[g]); //FIXME: introduce FASTBATCH?
423 // table lookup, this works on only one byte at a time
424 // most difficult part of all
425 // - can't be parallelized
426 // - can't be synthetized through boolean terms (8 input bits are too many)
427 for(g=0;g<count_all;g++){
428 sbox_out[g]=block_sbox[sbox_in[g]];
431 // bit permutation
433 unsigned char *po=(unsigned char *)perm_out;
434 unsigned char *so=(unsigned char *)sbox_out;
435 //dump_mem("pre perm ",(unsigned char *)so,GROUP_PARALLELISM,GROUP_PARALLELISM);
436 for(g=0;g<count_all;g+=BYTES_PER_BATCH){
437 MEMALIGN batch in,out;
438 in=*(batch *)&so[g];
440 out=B_FFOR(
441 B_FFOR(
442 B_FFOR(
443 B_FFOR(
444 B_FFOR(
445 B_FFSH8L(B_FFAND(in,B_FFN_ALL_29()),1),
446 B_FFSH8L(B_FFAND(in,B_FFN_ALL_02()),6)),
447 B_FFSH8L(B_FFAND(in,B_FFN_ALL_04()),3)),
448 B_FFSH8R(B_FFAND(in,B_FFN_ALL_10()),2)),
449 B_FFSH8R(B_FFAND(in,B_FFN_ALL_40()),6)),
450 B_FFSH8R(B_FFAND(in,B_FFN_ALL_80()),4));
452 *(batch *)&po[g]=out;
454 //dump_mem("post perm",(unsigned char *)po,GROUP_PARALLELISM,GROUP_PARALLELISM);
457 roff-=GROUP_PARALLELISM; /* virtual shift of registers */
459 #if 0
460 /* one by one */
461 for(g=0;g<count_all;g++){
462 r[roff+GROUP_PARALLELISM*0+g]=r[roff+GROUP_PARALLELISM*8+g]^sbox_out[g];
463 r[roff+GROUP_PARALLELISM*6+g]^=perm_out[g];
464 r[roff+GROUP_PARALLELISM*4+g]^=r[roff+GROUP_PARALLELISM*0+g];
465 r[roff+GROUP_PARALLELISM*3+g]^=r[roff+GROUP_PARALLELISM*0+g];
466 r[roff+GROUP_PARALLELISM*2+g]^=r[roff+GROUP_PARALLELISM*0+g];
468 #else
469 for(g=0;g<count_all;g+=BEST_SPAN){
470 XOR_BEST_BY(&r[roff+GROUP_PARALLELISM*0+g],&r[roff+GROUP_PARALLELISM*8+g],&sbox_out[g]);
471 XOREQ_BEST_BY(&r[roff+GROUP_PARALLELISM*6+g],&perm_out[g]);
472 XOREQ_BEST_BY(&r[roff+GROUP_PARALLELISM*4+g],&r[roff+GROUP_PARALLELISM*0+g]);
473 XOREQ_BEST_BY(&r[roff+GROUP_PARALLELISM*3+g],&r[roff+GROUP_PARALLELISM*0+g]);
474 XOREQ_BEST_BY(&r[roff+GROUP_PARALLELISM*2+g],&r[roff+GROUP_PARALLELISM*0+g]);
476 #endif
479 #define FASTTRASP2
480 #ifndef FASTTRASP2
481 for(g=0;g<count;g++){
482 // Copy results
483 int j;
484 for(j=0;j<8;j++){
485 bd[8*g+j]=r[roff+GROUP_PARALLELISM*j+g];
488 #else
489 trasp_8_N((unsigned char *)&r[roff],(unsigned char *)bd,count);
490 #endif
493 //-----------------------------------EXTERNAL INTERFACE
495 //-----get internal parallelism
497 int get_internal_parallelism(void){
498 return GROUP_PARALLELISM;
501 //-----get suggested cluster size
503 int get_suggested_cluster_size(void){
504 int r;
505 r=GROUP_PARALLELISM+GROUP_PARALLELISM/10;
506 if(r<GROUP_PARALLELISM+5) r=GROUP_PARALLELISM+5;
507 return r;
510 //-----key structure
512 void *get_key_struct(void){
513 struct csa_keys_t *keys=(struct csa_keys_t *)MALLOC(sizeof(struct csa_keys_t));
514 if(keys) {
515 static const unsigned char pk[8] = { 0,0,0,0,0,0,0,0 };
516 set_control_words(keys,pk,pk);
518 return keys;
521 void free_key_struct(void *keys){
522 return FREE(keys);
525 //-----set control words
527 static void schedule_key(struct csa_key_t *key, const unsigned char *pk){
528 // could be made faster, but is not run often
529 int bi,by;
530 int i,j;
531 // key
532 memcpy(key->ck,pk,8);
533 // precalculations for stream
534 key_schedule_stream(key->ck,key->iA,key->iB);
535 for(by=0;by<8;by++){
536 for(bi=0;bi<8;bi++){
537 key->ck_g[by][bi]=(key->ck[by]&(1<<bi))?FF1():FF0();
540 for(by=0;by<8;by++){
541 for(bi=0;bi<4;bi++){
542 key->iA_g[by][bi]=(key->iA[by]&(1<<bi))?FF1():FF0();
543 key->iB_g[by][bi]=(key->iB[by]&(1<<bi))?FF1():FF0();
546 // precalculations for block
547 key_schedule_block(key->ck,key->kk);
548 for(i=0;i<56;i++){
549 for(j=0;j<BYTES_PER_BATCH;j++){
550 *(((unsigned char *)&key->kkmulti[i])+j)=key->kk[i];
555 void set_control_words(void *keys, const unsigned char *ev, const unsigned char *od){
556 schedule_key(&((struct csa_keys_t *)keys)->even,ev);
557 schedule_key(&((struct csa_keys_t *)keys)->odd,od);
560 void set_even_control_word(void *keys, const unsigned char *pk){
561 schedule_key(&((struct csa_keys_t *)keys)->even,pk);
564 void set_odd_control_word(void *keys, const unsigned char *pk){
565 schedule_key(&((struct csa_keys_t *)keys)->odd,pk);
568 //-----get control words
570 void get_control_words(void *keys, unsigned char *even, unsigned char *odd){
571 memcpy(even,&((struct csa_keys_t *)keys)->even.ck,8);
572 memcpy(odd,&((struct csa_keys_t *)keys)->odd.ck,8);
575 //----- decrypt
577 int decrypt_packets(void *keys, unsigned char **cluster){
578 // statistics, currently unused
579 int stat_no_scramble=0;
580 int stat_reserved=0;
581 int stat_decrypted[2]={0,0};
582 int stat_decrypted_mini=0;
583 unsigned char **clst;
584 unsigned char **clst2;
585 int grouped;
586 int group_ev_od;
587 int advanced;
588 int can_advance;
589 unsigned char *g_pkt[GROUP_PARALLELISM];
590 int g_len[GROUP_PARALLELISM];
591 int g_offset[GROUP_PARALLELISM];
592 int g_n[GROUP_PARALLELISM];
593 int g_residue[GROUP_PARALLELISM];
594 unsigned char *pkt;
595 int xc0,ev_od,len,offset,n,residue;
596 struct csa_key_t* k;
597 int i,j,iter,g;
598 int t23,tsmall;
599 int alive[24];
600 //icc craziness int pad1=0; //////////align! FIXME
601 unsigned char *encp[GROUP_PARALLELISM];
602 MEMALIGN unsigned char stream_in[GROUP_PARALLELISM*8];
603 MEMALIGN unsigned char stream_out[GROUP_PARALLELISM*8];
604 MEMALIGN unsigned char ib[GROUP_PARALLELISM*8];
605 MEMALIGN unsigned char block_out[GROUP_PARALLELISM*8];
606 #ifdef COPY_UNALIGNED_PKT
607 unsigned char *unaligned[GROUP_PARALLELISM];
608 MEMALIGN unsigned char alignedBuff[GROUP_PARALLELISM][188];
609 #endif
610 struct stream_regs regs;
612 //icc craziness i=(int)&pad1;//////////align!!! FIXME
614 // build a list of packets to be processed
615 clst=cluster;
616 grouped=0;
617 advanced=0;
618 can_advance=1;
619 group_ev_od=-1; // silence incorrect compiler warning
620 pkt=*clst;
621 do{ // find a new packet
622 if(grouped==GROUP_PARALLELISM){
623 // full
624 break;
626 if(pkt==NULL){
627 // no more ranges
628 break;
630 if(pkt>=*(clst+1)){
631 // out of this range, try next
632 clst++;clst++;
633 pkt=*clst;
634 continue;
637 do{ // handle this packet
638 xc0=pkt[3]&0xc0;
639 DBG(fprintf(stderr," exam pkt=%p, xc0=%02x, can_adv=%i\n",pkt,xc0,can_advance));
640 if(xc0==0x00){
641 DBG(fprintf(stderr,"skip clear pkt %p (can_advance is %i)\n",pkt,can_advance));
642 advanced+=can_advance;
643 stat_no_scramble++;
644 break;
646 if(xc0==0x40){
647 DBG(fprintf(stderr,"skip reserved pkt %p (can_advance is %i)\n",pkt,can_advance));
648 advanced+=can_advance;
649 stat_reserved++;
650 break;
652 if(xc0==0x80||xc0==0xc0){ // encrypted
653 ev_od=(xc0&0x40)>>6; // 0 even, 1 odd
654 if(grouped==0) group_ev_od=ev_od; // this group will be all even (or odd)
655 if(group_ev_od==ev_od){ // could be added to group
656 pkt[3]&=0x3f; // consider it decrypted now
657 if(pkt[3]&0x20){ // incomplete packet
658 offset=4+pkt[4]+1;
659 len=188-offset;
660 n=len>>3;
661 residue=len-(n<<3);
662 if(n==0){ // decrypted==encrypted!
663 DBG(fprintf(stderr,"DECRYPTED MINI! (can_advance is %i)\n",can_advance));
664 advanced+=can_advance;
665 stat_decrypted_mini++;
666 break; // this doesn't need more processing
668 }else{
669 len=184;
670 offset=4;
671 n=23;
672 residue=0;
674 g_pkt[grouped]=pkt;
675 g_len[grouped]=len;
676 g_offset[grouped]=offset;
677 g_n[grouped]=n;
678 g_residue[grouped]=residue;
679 DBG(fprintf(stderr,"%2i: eo=%i pkt=%p len=%03i n=%2i residue=%i\n",grouped,ev_od,pkt,len,n,residue));
680 grouped++;
681 advanced+=can_advance;
682 stat_decrypted[ev_od]++;
684 else{
685 can_advance=0;
686 DBG(fprintf(stderr,"skip pkt %p and can_advance set to 0\n",pkt));
687 break; // skip and go on
690 } while(0);
692 if(can_advance){
693 // move range start forward
694 *clst+=188;
696 // next packet, if there is one
697 pkt+=188;
698 } while(1);
699 DBG(fprintf(stderr,"-- result: grouped %i pkts, advanced %i pkts\n",grouped,advanced));
701 // delete empty ranges and compact list
702 clst2=cluster;
703 for(clst=cluster;*clst!=NULL;clst+=2){
704 // if not empty
705 if(*clst<*(clst+1)){
706 // it will remain
707 *clst2=*clst;
708 *(clst2+1)=*(clst+1);
709 clst2+=2;
712 *clst2=NULL;
714 if(grouped==0){
715 // no processing needed
716 return advanced;
719 // sort them, longest payload first
720 // we expect many n=23 packets and a few n<23
721 DBG(fprintf(stderr,"PRESORTING\n"));
722 for(i=0;i<grouped;i++){
723 DBG(fprintf(stderr,"%2i of %2i: pkt=%p len=%03i n=%2i residue=%i\n",i,grouped,g_pkt[i],g_len[i],g_n[i],g_residue[i]));
725 // grouped is always <= GROUP_PARALLELISM
727 #define g_swap(a,b) \
728 pkt=g_pkt[a]; \
729 g_pkt[a]=g_pkt[b]; \
730 g_pkt[b]=pkt; \
732 len=g_len[a]; \
733 g_len[a]=g_len[b]; \
734 g_len[b]=len; \
736 offset=g_offset[a]; \
737 g_offset[a]=g_offset[b]; \
738 g_offset[b]=offset; \
740 n=g_n[a]; \
741 g_n[a]=g_n[b]; \
742 g_n[b]=n; \
744 residue=g_residue[a]; \
745 g_residue[a]=g_residue[b]; \
746 g_residue[b]=residue;
748 // step 1: move n=23 packets before small packets
749 t23=0;
750 tsmall=grouped-1;
751 for(;;){
752 for(;t23<grouped;t23++){
753 if(g_n[t23]!=23) break;
755 DBG(fprintf(stderr,"t23 after for =%i\n",t23));
757 for(;tsmall>=0;tsmall--){
758 if(g_n[tsmall]==23) break;
760 DBG(fprintf(stderr,"tsmall after for =%i\n",tsmall));
762 if(tsmall-t23<1) break;
764 DBG(fprintf(stderr,"swap t23=%i,tsmall=%i\n",t23,tsmall));
766 g_swap(t23,tsmall);
768 t23++;
769 tsmall--;
770 DBG(fprintf(stderr,"new t23=%i,tsmall=%i\n\n",t23,tsmall));
772 DBG(fprintf(stderr,"packets with n=23, t23=%i grouped=%i\n",t23,grouped));
773 DBG(fprintf(stderr,"MIDSORTING\n"));
774 for(i=0;i<grouped;i++){
775 DBG(fprintf(stderr,"%2i of %2i: pkt=%p len=%03i n=%2i residue=%i\n",i,grouped,g_pkt[i],g_len[i],g_n[i],g_residue[i]));
778 // step 2: sort small packets in decreasing order of n (bubble sort is enough)
779 for(i=t23;i<grouped;i++){
780 for(j=i+1;j<grouped;j++){
781 if(g_n[j]>g_n[i]){
782 g_swap(i,j);
786 DBG(fprintf(stderr,"POSTSORTING\n"));
787 for(i=0;i<grouped;i++){
788 DBG(fprintf(stderr,"%2i of %2i: pkt=%p len=%03i n=%2i residue=%i\n",i,grouped,g_pkt[i],g_len[i],g_n[i],g_residue[i]));
791 // we need to know how many packets need 23 iterations, how many 22...
792 for(i=0;i<=23;i++){
793 alive[i]=0;
795 // count
796 alive[23-1]=t23;
797 for(i=t23;i<grouped;i++){
798 alive[g_n[i]-1]++;
800 // integrate
801 for(i=22;i>=0;i--){
802 alive[i]+=alive[i+1];
804 DBG(fprintf(stderr,"ALIVE\n"));
805 for(i=0;i<=23;i++){
806 DBG(fprintf(stderr,"alive%2i=%i\n",i,alive[i]));
809 // choose key
810 if(group_ev_od==0){
811 k=&((struct csa_keys_t *)keys)->even;
813 else{
814 k=&((struct csa_keys_t *)keys)->odd;
817 //INIT
818 //#define INITIALIZE_UNUSED_INPUT
819 #ifdef INITIALIZE_UNUSED_INPUT
820 // unnecessary zeroing.
821 // without this, we operate on uninitialized memory
822 // when grouped<GROUP_PARALLELISM, but it's not a problem,
823 // as final results will be discarded.
824 // random data makes debugging sessions difficult.
825 for(j=0;j<GROUP_PARALLELISM*8;j++) stream_in[j]=0;
826 DBG(fprintf(stderr,"--- WARNING: you could gain speed by not initializing unused memory ---\n"));
827 #else
828 DBG(fprintf(stderr,"--- WARNING: DEBUGGING IS MORE DIFFICULT WHEN PROCESSING RANDOM DATA CHANGING AT EVERY RUN! ---\n"));
829 #endif
831 for(g=0;g<grouped;g++){
832 encp[g]=g_pkt[g];
833 DBG(fprintf(stderr,"header[%i]=%p (%02x)\n",g,encp[g],*(encp[g])));
834 encp[g]+=g_offset[g]; // skip header
835 #ifdef COPY_UNALIGNED_PKT
836 if(((int)encp[g])&0x03) {
837 memcpy(alignedBuff[g],encp[g],g_len[g]);
838 unaligned[g]=encp[g];
839 encp[g]=alignedBuff[g];
841 else unaligned[g]=0;
842 #endif
843 FFTABLEIN(stream_in,g,encp[g]);
845 //dump_mem("stream_in",stream_in,GROUP_PARALLELISM*8,BYPG);
848 // ITER 0
849 DBG(fprintf(stderr,">>>>>ITER 0\n"));
850 iter=0;
851 stream_cypher_group_init(&regs,k->iA_g,k->iB_g,stream_in);
852 // fill first ib
853 for(g=0;g<alive[iter];g++){
854 COPY_8_BY(ib+8*g,encp[g]);
856 DBG(dump_mem("IB ",ib,8*alive[iter],8));
857 // ITER 1..N-1
858 for (iter=1;iter<23&&alive[iter-1]>0;iter++){
859 DBG(fprintf(stderr,">>>>>ITER %i\n",iter));
860 // alive and just dead packets: calc block
861 block_decypher_group(k->kkmulti,ib,block_out,alive[iter-1]);
862 DBG(dump_mem("BLO_ib ",block_out,8*alive[iter-1],8));
863 // all packets (dead too): calc stream
864 stream_cypher_group_normal(&regs,stream_out);
865 //dump_mem("stream_out",stream_out,GROUP_PARALLELISM*8,BYPG);
867 // alive packets: calc ib
868 for(g=0;g<alive[iter];g++){
869 FFTABLEOUT(ib+8*g,stream_out,g);
870 DBG(dump_mem("stream_out_ib ",ib+8*g,8,8));
871 // XOREQ8BY gcc bug? 2x4 ok, 8 ko UPDATE: result ok but speed 1-2% slower (!!!???)
872 #if 1
873 XOREQ_4_BY(ib+8*g,encp[g]+8);
874 XOREQ_4_BY(ib+8*g+4,encp[g]+8+4);
875 #else
876 XOREQ_8_BY(ib+8*g,encp[g]+8);
877 #endif
878 DBG(dump_mem("after_stream_xor_ib ",ib+8*g,8,8));
880 // alive packets: decrypt data
881 for(g=0;g<alive[iter];g++){
882 DBG(dump_mem("before_ib_decrypt_data ",encp[g],8,8));
883 XOR_8_BY(encp[g],ib+8*g,block_out+8*g);
884 DBG(dump_mem("after_ib_decrypt_data ",encp[g],8,8));
886 // just dead packets: write decrypted data
887 for(g=alive[iter];g<alive[iter-1];g++){
888 DBG(dump_mem("jd_before_ib_decrypt_data ",encp[g],8,8));
889 COPY_8_BY(encp[g],block_out+8*g);
890 DBG(dump_mem("jd_after_ib_decrypt_data ",encp[g],8,8));
892 // just dead packets: decrypt residue
893 for(g=alive[iter];g<alive[iter-1];g++){
894 DBG(dump_mem("jd_before_decrypt_residue ",encp[g]+8,g_residue[g],g_residue[g]));
895 FFTABLEOUTXORNBY(g_residue[g],encp[g]+8,stream_out,g);
896 DBG(dump_mem("jd_after_decrypt_residue ",encp[g]+8,g_residue[g],g_residue[g]));
898 // alive packets: pointers++
899 for(g=0;g<alive[iter];g++) encp[g]+=8;
901 // ITER N
902 DBG(fprintf(stderr,">>>>>ITER 23\n"));
903 iter=23;
904 // calc block
905 block_decypher_group(k->kkmulti,ib,block_out,alive[iter-1]);
906 DBG(dump_mem("23BLO_ib ",block_out,8*alive[iter-1],8));
907 // just dead packets: write decrypted data
908 for(g=alive[iter];g<alive[iter-1];g++){
909 DBG(dump_mem("23jd_before_ib_decrypt_data ",encp[g],8,8));
910 COPY_8_BY(encp[g],block_out+8*g);
911 DBG(dump_mem("23jd_after_ib_decrypt_data ",encp[g],8,8));
913 // no residue possible
914 // so do nothing
916 DBG(fprintf(stderr,"returning advanced=%i\n",advanced));
918 #ifdef COPY_UNALIGNED_PKT
919 for(g=0;g<grouped;g++)
920 if(unaligned[g]) memcpy(unaligned[g],alignedBuff[g],g_len[g]);
921 #endif
923 M_EMPTY(); // restore CPU multimedia state
925 return advanced;