patch #7308
[mldonkey.git] / src / networks / direct_connect / che3_c.c
blob350717141037739f872a90607fb80175c514979c
1 /* rewrite in C and caml stubs by b8_bavard (2002) */
2 /* rewrite to class without glib by Mathias Küster (2002) */
4 /* DCTC - a Direct Connect text clone for Linux
5 * Copyright (C) 2001 Eric Prevoteau
7 * he3.c: Copyright (C) Eric Prevoteau <www@ac2i.tzo.com>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
29 #include <dclib/clist.h>
30 #include <dclib/cbytearray.h>
32 #include "che3_c.h"
34 /******************************************************/
35 /* patch from David Toth for */
36 /* File sharing list Windows (DC) compatibiliy bugfix */
37 /* (byte 4 meaning and computation). */
38 /******************************************************/
40 /*********************************************************************/
41 /* When the file list of another user is retrieved, it is compressed */
42 /* The data format is described below.
43 byte 0: 'H'
44 byte 1: 'E'
45 byte 2: '3'
46 byte 3: 0xD
47 byte 4: 8 bit parity of the decoded data (the result of all bytes "xor"-ed)
48 byte 5 - 8 : it is a number encoded in little endian. It it the number of bytes to produce.
49 byte 9 - 10: it is a number encoded in little endian. This number gives the number of couples.
50 Let's name this number as N
51 byte 11 - (10+2*N): this is a list of couple. The first byte is a character (the one before compression),
52 the second byte gives the length of the compressed pattern. Let's name this value L.
53 byte (11+2*N) - (11+2*N+ ((sum(L)+7)&~7): a bit barbare :) To simplify. Let's name K, the sum of all L values.
54 Here, you find a string of K bits (byte aligned). The L0 first bits are the encoded value of
55 caracter of couple 0. The next Lx bits are the encoded value of caractere of couple x.
56 Note: each byte are used from the lower bit to the upper bit.
57 after this string, you have the encoded string.
58 When data are finally decoded. The format is always:
59 directory name \xD\xA
60 \x9 filename | filesize \xD\xA
61 \x9 filename | filesize \xD\xA
62 \x9 filename | filesize \xD\xA
63 \x9 filename | filesize \xD\xA
64 \x9 filename | filesize \xD\xA
69 /* ============= Decoding functions ================ */
71 /******************************************************/
72 /*get 1 bit from the current bit position inside data */
73 /******************************************************/
74 unsigned long get_bit(unsigned char *data, unsigned long *cur_pos)
76 unsigned long out;
78 out=((unsigned long)(data[(*cur_pos)/8]>>((*cur_pos)&7)))&1;
80 (*cur_pos)++;
82 return out;
85 /*********************************************************/
86 /* get nb_bits from the current bit position inside data */
87 /*********************************************************/
88 unsigned long get_bits(unsigned char *data, unsigned long *cur_pos, int nb_bit)
90 int i;
91 unsigned long res=0;
93 for(i=0;i<nb_bit;i++)
95 res=(res<<1)|get_bit(data,cur_pos);
97 return res;
100 /**************************************************/
101 /* decompress data compressed using HE3 algorithm */
102 /**********************************************************/
103 /* input: a GByteArray containing HE3 compressed data */
104 /* output: a GString containing uncompressed data or NULL */
105 /**********************************************************/
106 char *decode_he3_data(char *data_string, int data_len, int* final_len)
108 char *output_string = NULL;
109 int output_len = 0;
112 if((data_string[0]=='H')&&(data_string[1]=='E')&&(data_string[2]=='3')&&(data_string[3]==0xD))
114 char *decode_array=NULL;
115 int pos;
116 int nb_couple;
117 int max_len=0; /* max size of encoded pattern */
118 int ttl_len=0; /* total size of all encoded patterns */
119 unsigned long offset_pattern;
120 unsigned long offset_encoded;
121 int nb_output;
122 int l=0;
124 /* compute the number of bytes to produce */
125 nb_output=(((int)(data_string[8]))&255);
126 nb_output<<=8;
127 nb_output|=(((int)(data_string[7]))&255);
128 nb_output<<=8;
129 nb_output|=(((int)(data_string[6]))&255);
130 nb_output<<=8;
131 nb_output|=(((int)(data_string[5]))&255);
133 /* compute the number of couples */
134 nb_couple=data_string[9];
135 nb_couple+=((((int)(data_string[10]))&255)<<8);
137 for(pos=0;pos<nb_couple;pos++)
139 int v;
141 v=(((int)(data_string[12+pos*2]))&255);
142 if(v>max_len)
143 max_len=v;
144 ttl_len+=v;
147 //decode_array=g_byte_array_new();
148 decode_array = NULL;
149 /* theorytically, max_len could reach up to 255 */
152 decode_array = malloc(1<<(max_len+1));
154 /* but really, a value above 15 is not oftenly encountered */
155 /* thus the algorithm needs no more than ... 64KB */
156 /* raisonnable value is upto 23 (requires 8MB) */
158 if(decode_array!=NULL)
160 /* clear the decode array */
161 /* the decode array is technically a binary tree */
162 /* if the depth of the tree is big (let's say more than 10), */
163 /* storing the binary tree inside an array becomes very memory consumming */
164 /* but I am too lazy to program all binary tree creation/addition/navigation/destruction functions :) */
165 memset(decode_array,0,1<<(max_len+1));
167 offset_pattern=8*(11+nb_couple*2); /* position of the pattern block, it is just after the list of couples */
168 offset_encoded=offset_pattern + ((ttl_len+7)&~7); /* the encoded data are just after */
169 /* the pattern block (rounded to upper full byte) */
171 /* decode_array is a binary tree. byte 0 is the level 0 of the tree. byte 2-3, the level 1, byte 4-7, the level 2, */
172 /* in decode array, a N bit length pattern having the value K is its data at the position: */
173 /* 2^N + (K&((2^N)-1)) */
174 /* due to the fact K has always N bit length, the formula can be simplified into: */
175 /* 2^N + K */
176 for(pos=0;pos<nb_couple;pos++)
178 unsigned int v_len;
179 unsigned long value;
181 v_len=(((int)(data_string[12+pos*2]))&255); /* the number of bit required */
183 value=get_bits(data_string,&offset_pattern,v_len);
184 decode_array[(1<<v_len)+ value]=data_string[11+pos*2]; /* the character */
187 /* now, its time to decode */
188 //while(output->len!=nb_output)
189 output_string = malloc(nb_output+1);
190 output_string[nb_output] = 0;
191 while(output_len != nb_output)
192 //while(l!=nb_output)
194 unsigned long cur_val;
195 unsigned int nb_bit_val;
197 cur_val=get_bit(data_string,&offset_encoded); /* get one bit */
198 nb_bit_val=1;
200 while(decode_array[(1<<nb_bit_val) + cur_val ]==0)
202 cur_val=(cur_val<<1)|get_bit(data_string,&offset_encoded);
203 nb_bit_val++;
206 //output=g_string_append_c(output,decode_array->data[(1<<nb_bit_val) + cur_val ]);
207 output_string[output_len++] = decode_array[(1<<nb_bit_val) + cur_val ];
208 l++;
211 //g_byte_array_free(decode_array,TRUE);
212 free(decode_array);
217 int i;
218 unsigned char parity=0;
220 for(i=0;i<output_len;i++)
221 parity^=output_string[i];
225 *final_len = output_len;
226 return output_string;
230 #if 0
231 /* ================= Encoding functions ======================== */
233 //static gint huf_insert_glist(gconstpointer a, gconstpointer b)
234 int huf_insert_glist(void * a, void * b)
236 if(((const HUFNODE*)a)->occur<((const HUFNODE*)b)->occur)
237 return -1; /* a is must be before b */
238 else if(((const HUFNODE*)a)->occur>((const HUFNODE*)b)->occur)
239 return 1; /* a is must be after b */
241 /* if both have the same occurences, the one without son comes before */
242 if( (((const HUFNODE*)a)->left==NULL) && (((const HUFNODE*)b)->left==NULL) )
243 return -1;
245 if(((const HUFNODE*)a)->left==NULL)
246 return -1;
248 return 1;
250 #endif
252 /**************************************************************************/
253 /* recursively scan all nodes of the huffman tree and fill encoding table */
254 /**************************************************************************/
255 void use_hufnode(HUFENCODE tbl_enc[256], HUFNODE *node,unsigned int bits_len, unsigned long bits)
257 if(node->left!=NULL)
258 { /* it is a node, right is always != NULL */
259 use_hufnode(tbl_enc,node->left,bits_len+1,(bits<<1)|0);
260 use_hufnode(tbl_enc,node->right,bits_len+1,(bits<<1)|1);
262 else
263 { /* it is a leaf, right is always NULL */
264 int idx=((int)node->val)&255;
265 tbl_enc[idx].bits_len=bits_len;
266 tbl_enc[idx].bits=bits;
267 // printf("huf: bits_len: %u pattern: %lu ",bits_len,bits);
268 // printf("leaf: %d (%c)\n",idx,node->val);
270 // if(bits_len>8*sizeof(unsigned long)) /* compared with the size of unsigned long in bits */
271 // disp_msg(ERR_MSG,"user_hufnode","encoded value cannot fit into unsigned long",NULL);
274 return;
277 /*****************************************************************/
278 /* recursively free memory used by all nodes of the huffman tree */
279 /*****************************************************************/
280 void free_hufnode(HUFNODE *node)
282 // printf("free node %c\n",node->val);
283 if(node==NULL)
284 return;
285 if(node->left!=NULL)
286 free_hufnode(node->left);
287 if(node->right!=NULL)
288 free_hufnode(node->right);
289 free(node);
293 void add_bit(char *data, unsigned long *bit_pos, unsigned char bit_value)
295 if(((*bit_pos)&7)==0) /* starting a new byte ? */
297 //data=g_byte_array_append(data,&v,1);
298 data[(*bit_pos)/8]|= 0;
301 /* due to the fact we always add 0 as new byte, we don't have to set bit having 0 as value */
302 /* but just 1 */
303 if(bit_value!=0)
305 data[(*bit_pos)/8]|= (1<<((*bit_pos)&7));
308 (*bit_pos)++;
311 /**************************************/
312 /* append the pattern to data@bit_pos */
313 /**************************************/
314 void add_bits(char *data, unsigned long *bit_pos, unsigned long pattern, unsigned int pattern_length)
316 unsigned long i;
318 for(i=0;i<pattern_length;i++)
320 add_bit(data, bit_pos,(unsigned char)(pattern>>(pattern_length-1-i))&1 );
321 /* use pattern from upper to lower bit */
326 #if 0
327 static void disp_huf(GList *pre_tree)
329 int i;
330 HUFNODE *w;
331 printf("---\n");
332 //for(i=0;i<g_list_length(pre_tree);i++)
333 for(i=0;i<pre_tree->size;i++)
335 w=g_list_nth_data(pre_tree,i); /* get the first HUFNODE of the list */
337 printf("occur: %ld, left=%p, right=%p, val=%02X\n",w->occur,w->left,w->right,w->val);
340 #endif
341 /*****************************************************************************************/
342 /* compress data compressed using an Huffman algorithm and store it in HE3 usable format */
343 /*****************************************************************************************/
344 /* input: a GString containing a string to compress */
345 /* output: a GByteArray containing compressed data or NULL */
346 /***********************************************************/
347 static HUFNODE *pre_tree[256];
348 static int pre_tree_len = 0;
350 void insert_node(HUFNODE *nw){
352 int j;
354 j=pre_tree_len++;
355 while(j>0 && (pre_tree[j-1]->occur < nw->occur || (pre_tree[j-1]->occur == nw->occur && pre_tree[j-1]->left == NULL && nw->left != NULL))){
356 pre_tree[j] = pre_tree[j-1];
357 // printf("move %d -> %d\n", j- 1, j);
358 j--;
360 pre_tree[j] = nw;
361 // printf("insert at %d\n", j);
364 HUFNODE *remove_node()
366 pre_tree_len--;
367 // printf("remove %d\n", pre_tree_len);
368 return pre_tree[pre_tree_len];
371 char *encode_he3_data(char *str, int len, int* final_len)
373 unsigned long occur[256];
374 HUFENCODE tbl_enc[256];
376 long i;
377 char *data = NULL;
378 HUFNODE *root_huf=NULL;
379 int nb_val=0;
380 unsigned long bit_pos;
381 char *pos;
383 if((str==NULL)||(len==0)) {
384 *final_len = 0;
385 return NULL;
388 /* count the number of times each character appears */
389 memset(occur,0,sizeof(occur));
391 for(i=0;i<len;i++)
393 occur[((int)(str[i]))&255]++;
396 /* now, we will build the huffman tree */
397 pre_tree_len = 0;
399 /* stage 1: create all the leafs */
400 for(i=0;i<256;i++)
402 if(occur[i]!=0)
404 HUFNODE *nw;
406 nw=(HUFNODE*)malloc(sizeof(HUFNODE));
407 nw->occur=occur[i];
408 nw->left=NULL;
409 nw->right=NULL;
410 nw->val=(unsigned char)i;
411 insert_node(nw);
412 nb_val++;
416 while(pre_tree_len>1)
418 HUFNODE *nw;
420 nw=(HUFNODE*)malloc(sizeof(HUFNODE));
422 nw->left=remove_node();
423 nw->right=remove_node ();
425 nw->occur=nw->left->occur+nw->right->occur;
426 nw->val=0;
428 insert_node(nw);
430 #if 0
431 disp_huf(pre_tree);
432 #endif
433 /* now, the huffman tree is done */
434 //root_huf=g_list_nth_data(pre_tree,0);
435 root_huf=pre_tree[0];
436 pre_tree_len=0;
438 memset(tbl_enc,0,sizeof(tbl_enc));
440 /* now, we will compute encoding pattern from huffman tree */
441 use_hufnode(tbl_enc,root_huf,0,0);
443 /* well, encoding table is now done, we can encode the data */
444 //data=g_byte_array_new();
445 data = malloc(len + 256 * 10);
446 pos=data;
447 /* set the initial HE3 header */
449 unsigned char he3_header[]={'H','E','3',0xD,
450 0, /* parity: all uncompressed bytes xor'ed */
451 0,0,0,0, /* number of bytes to produce (little-endian) */
452 0,0}; /* number of couples */
454 /* patch from David Toth (byte 4: parity computation) */
455 unsigned char parity=0;
457 for(i=0;i<len;i++)
458 parity^=str[i];
459 he3_header[4]=(unsigned char)(parity&255);
461 //he3_header[5]=str->len&255;
462 he3_header[5]=(unsigned char)(len&255);
463 he3_header[6]=(unsigned char)((len>>8)&255);
464 he3_header[7]=(unsigned char)((len>>16)&255);
465 he3_header[8]=(unsigned char)((len>>24)&255);
466 he3_header[9]=(unsigned char)(nb_val&255);
467 he3_header[10]=(unsigned char)((nb_val>>8)&255);
469 //data=g_byte_array_append(data,he3_header,11);
470 memmove(pos, he3_header, 11);
471 pos+=11;
474 /* add the couple list (character, pattern length) */
475 for(i=0;i<256;i++)
477 if(occur[i]!=0)
479 *pos++ = (unsigned char)i;
480 *pos++ = (unsigned char)tbl_enc[i].bits_len;
484 /* pos--; *//* increment pos when bit_pos mod 8 = 0. The semantics of pos
485 change: instead of the next byte to write, it is the previous byte written. */
487 /* and now, add all the patterns */
488 bit_pos=(pos - data)*8;
489 for(i=0;i<256;i++)
491 if(occur[i]!=0)
493 add_bits(data,&bit_pos,tbl_enc[i].bits, tbl_enc[i].bits_len);
497 bit_pos=(bit_pos+7)&~7; /* move to the beginning of the next byte */
499 /* now, we encode the string */
500 for(i=0;i<len;i++)
502 int idx=((int)str[i])&255;
504 add_bits(data,&bit_pos,tbl_enc[idx].bits, tbl_enc[idx].bits_len);
507 bit_pos=(bit_pos+7)&~7; /* move to the beginning of the next byte */
508 *final_len = (bit_pos / 8)+1;
510 // printf("final len %d\n", *final_len);
512 /* free huffman tree */
513 free_hufnode(root_huf);
515 return data;
520 #include "caml/mlvalues.h"
521 #include "caml/fail.h"
522 #include "caml/alloc.h"
525 value ml_che3_decompress(value s_v)
527 char *s = String_val(s_v);
528 int len = string_length(s_v);
529 char *result;
530 int final_len;
531 value res;
533 result = decode_he3_data(s, len, &final_len);
535 res = alloc_string(final_len);
536 memmove(String_val(res), result, final_len);
538 return res;
541 value ml_che3_compress(value s_v)
543 char *s = String_val(s_v);
544 int len = string_length(s_v);
545 char *result;
546 int final_len;
547 value res;
549 result = encode_he3_data(s, len, &final_len);
551 res = alloc_string(final_len);
552 memmove(String_val(res), result, final_len);
554 return res;
557 #if 0
559 int main(int argc,char **argv)
561 int final_len;
562 char *out;
563 char *in;
564 unsigned char tst[54]={0x48,0x45,0x33,0x0D,0x12,0x24,0x00,0x00,0x00,0x0B,0x00,0x09,0x04,0x0A,0x03,0x0D,0x04,0x2E,0x04,0x32,0x04,0x61,0x02,0x62,0x03,0x65,0x05,0x73,0x05,0x74,0x03,0x7C,0x04,0xA6,0xF4,0xE2,0x21,0xAE,0x01,0x61,0x71,0x29,0xFB,0xFF,0xFF,0xBE,0x75,0xA5,0x0C,0x00,0xF0,0xAD,0x2B,0x05};
566 out=decode_he3_data(tst, 54, &final_len);
568 printf("LEN[%d] = [%s]\n",final_len, out);
570 return 0;
573 int main(int argc,char **argv)
575 int final_len;
576 char *out;
577 char *in;
578 int start_len;
579 int i;
581 in= encode_he3_data(argv[1], strlen(argv[1]), &start_len);
582 for(i=0; i<start_len;i++)
583 printf("%4x,", in[i]);
584 printf("\n");
585 out=decode_he3_data(in, start_len, &final_len);
587 printf("LEN[%d] = [%s]\n",final_len, out);
589 return 0;
591 #endif