BG XLC: Use tr1/unordered_map
[charm.git] / src / arch / util / compress.c
blob8358bc0cc47b46bf5636b67abdf7f1853d21b360
1 /*
2 * =====================================================================================
4 * Filename: Compress.C
6 * Description: Floating point compression/Decompression algorithm
8 * Version: 1.0
9 * Created: 09/02/2012 02:53:08 PM
10 * Revision: none
11 * Compiler: gcc
13 * Author: Yanhua Sun
14 * Organization:
16 * =====================================================================================
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <math.h>
22 struct timeval tv;
23 #include <sys/time.h>
24 #include <assert.h>
26 //#define USE_SSE 1
28 #if USE_SSE
29 #include <smmintrin.h>
30 #endif
32 double get_clock()
34 struct timeval tv; int ok;
35 ok = gettimeofday(&tv, NULL);
36 if (ok<0) { CmiPrintf("gettimeofday error"); }
37 return (tv.tv_sec * 1.0 + tv.tv_usec * 1.0E-6);
40 #define COMPRESS 1
41 //#define DEBUG 1
42 #define CHAR_BIT 8
43 #define FLOAT_BIT CHAR_BIT*sizeof(float)
44 #define FLOAT_BYTE sizeof(float)
46 #define COMPRESS_EXP 1
48 #if COMPRESS_EXP
49 #define SETBIT(dest, i) (dest[i>>3]) |= (1 << (i&7) )
50 #define TESTBIT(dest, i) ((dest[i>>3]) >> (i&7)) & 1
51 #define SETBIT11(dest, i) (dest[(i)>>3]) |= (3 << ((i)&7) )
52 #define TESTBIT11(dest, i) ((dest[(i)>>3]) >> ((i)&7)) & 0x3l
54 #else
56 #define TESTBIT(data, b) (data>>(b)) & 1
57 #define SETBIT(data, index, bit) (data |= ((bit)<<(index)))
58 #endif
60 /** compress char is the general algorithm for any data*/
61 void compressChar(void *src, void *dst, int size, int *compressSize, void *bData)
63 register char *source = (char*)src;
64 register char *dest = (char*)dst;
65 register char *baseData = (char*)bData;
66 register int i;
67 #if DEBUG
68 double t1 = get_clock();
69 #endif
71 #if !COMPRESS
72 memcpy(dest, source, size*sizeof(char));
73 *compressSize = size;
74 #else
75 register int _dataIndex = (size+7)/8;
76 memset(dest, 0, (size+7)/8 );
77 for (i = 0; i < size&&_dataIndex<size; ++i) {
78 // Bitmask everything but the exponents, then check if they match.
79 char xor_d = source[i] ^ baseData[i];
80 short different= xor_d & 0xff;
81 if (different) {
82 // If not, mark this exponent as "different" and store it to send with the message.
83 dest[_dataIndex] = source[i];
84 _dataIndex += 1;
85 }else
87 SETBIT(dest, i);
91 *compressSize = _dataIndex;
92 #endif
93 #if DEBUG
94 double t = get_clock()-t1;
95 printf(" +++++CHAR done compressing(%d===>%d) (reduction:%d) ration=%f time=%d us\n", (int)(size*sizeof(char)), *compressSize, (int)(size*sizeof(char)-*compressSize), (1-(float)*compressSize/(size*sizeof(char)))*100, (int)(t*1000000));
96 #endif
99 void decompressChar(void *cData, void *dData, int size, int compressSize, void *bData) {
100 #if DEBUG
101 double t1 = get_clock();
102 #endif
103 #if !COMPRESS
104 memcpy(dData, cData, size*sizeof(char));
105 #else
106 char *compressData = (char*)cData;
107 char *baseData = (char*)bData;
108 register char *decompressData =(char*)dData;
109 register int sdataIndex = (size+7)/8;
110 register char *src = (char*)compressData;
111 register int i;
112 for(i=0; i<size; ++i)
114 if(TESTBIT(src, i)) // same
116 decompressData[i] = baseData[i];
117 }else //different exponet
119 decompressData[i] = compressData[sdataIndex];
120 sdataIndex += 1;
123 #endif
124 #if DEBUG
125 double t = get_clock()-t1;
126 printf("------CHAR done decompressing..... orig size:%d time:%d us \n", (int)size, (int)(t*1000000)) ;
127 #endif
131 #if COMPRESS_EXP
133 #if USE_SSE
134 void compressFloatingPoint(void *src, void *dst, int s, int *compressSize, void *bData)
136 int size = s/FLOAT_BYTE;
137 float *source = (float*)src;
138 float *dest = (float*)dst;
139 float *baseData = (float*)bData;
140 register unsigned int *bptr = (unsigned int*) baseData;
141 register unsigned int *uptr = (unsigned int *) source;
142 register char *uchar;
143 register int i, j;
144 #if DEBUG
145 double t1 = get_clock();
146 #endif
148 #if !COMPRESS
149 memcpy(dest, source, size*sizeof(float));
150 *compressSize = s;
151 #else
152 assert(baseData != NULL);
154 // Create message to receive the compressed buffer.
155 register unsigned char *cdst = (unsigned char*)dest;
156 register int _dataIndex = (size+7)/8;
157 register unsigned int diff;
158 memset(cdst, 0, (size+7)/8 );
160 register const __m128i* b_ptr = (__m128i*)bptr;
161 register const __m128i* u_ptr = (__m128i*)uptr;
163 register __m128i xmm_f = _mm_set1_epi32(0xFF000000);
164 for (i = 0; i < size; i+=4) {
165 // Bitmask everything but the exponents, then check if they match.
166 __m128i xmm_b = _mm_load_si128(b_ptr);
167 __m128i xmm_u = _mm_load_si128(u_ptr);
168 __m128i xmm_d = _mm_xor_si128(xmm_b, xmm_u); // XOR 4 32-bit words
169 xmm_d = _mm_and_si128(xmm_d, xmm_f);
171 if (_mm_extract_epi32(xmm_d, 0)) {
172 SETBIT(cdst, i);
173 memcpy(cdst+_dataIndex, &(uptr[i]), 4);
174 _dataIndex += 4;
176 else{
177 memcpy(cdst+_dataIndex, &(uptr[i]), 3);
178 _dataIndex += 3;
180 if (_mm_extract_epi32(xmm_d, 1)) {
181 SETBIT(cdst, i+1);
182 memcpy(cdst+_dataIndex, &(uptr[i+1]), 4);
183 _dataIndex += 4;
184 }else{
185 memcpy(cdst+_dataIndex, &(uptr[i+1]), 3);
186 _dataIndex += 3;
188 if (_mm_extract_epi32(xmm_d, 2)) {
189 SETBIT(cdst, i+2);
190 memcpy(cdst+_dataIndex, &(uptr[i+2]), 4);
191 _dataIndex += 4;
192 }else{
193 memcpy(cdst+_dataIndex, &(uptr[i+2]), 3);
194 _dataIndex += 3;
196 if (_mm_extract_epi32(xmm_d, 3)) {
197 SETBIT(cdst, i+3);
198 memcpy(cdst+_dataIndex, &(uptr[i+3]), 4);
199 _dataIndex += 4;
200 }else{
201 memcpy(cdst+_dataIndex, &(uptr[i+3]), 3);
202 _dataIndex += 3;
204 ++b_ptr;
205 ++u_ptr;
207 *compressSize = _dataIndex;
209 #endif
210 #if DEBUG
211 double t = get_clock()-t1;
212 printf(" ===>floating compare done compressingcompressed size:(%d===>%d) (reduction:%d) ration=%f time=%d us \n", (int)(size*sizeof(float)), *compressSize, (int)(size*sizeof(float)-*compressSize), (1-(float)*compressSize/(size*sizeof(float)))*100, (int)(t*1000000));
213 #endif
216 #else
218 void compressFloatingPoint(void *src, void *dst, int s, int *compressSize, void *bData)
220 int size = s/FLOAT_BYTE;
221 float *source = (float*)src;
222 float *dest = (float*)dst;
223 float *baseData = (float*)bData;
224 register unsigned int *bptr = (unsigned int*) baseData;
225 register unsigned int *uptr = (unsigned int *) source;
226 register char *uchar;
227 register int i;
228 #if DEBUG
229 double t1 = get_clock();
230 #endif
232 #if !COMPRESS
233 memcpy(dest, source, size*sizeof(float));
234 *compressSize = s;
235 #else
236 assert(baseData != NULL);
237 // Is this the first time we're sending stuff to this node?
239 // Create message to receive the compressed buffer.
240 register unsigned char *cdst = (unsigned char*)dest;
241 register int _dataIndex = (size+7)/8;
242 register unsigned int diff;
243 memset(cdst, 0, (size+7)/8 );
244 for (i = 0; i < size; ++i) {
245 // Bitmask everything but the exponents, then check if they match.
246 diff = (bptr[i] ^ uptr[i]) & 0xff000000 ;
247 if (diff) {
248 // If not, mark this exponent as "different" and store it to send with the message.
249 SETBIT(cdst, i);
250 memcpy(cdst+_dataIndex, &(uptr[i]), 4);
251 _dataIndex += 4;
252 }else
254 memcpy(cdst+_dataIndex, &(uptr[i]), 3);
255 _dataIndex += 3;
259 *compressSize = _dataIndex;
261 #endif
262 #if DEBUG
263 double t = get_clock()-t1;
264 CmiPrintf(" ===> FLOATING done compressingcompressed size:(%d===>%d) (reduction:%d) ration=%f time=%d us\n", (int)(size*sizeof(float)), *compressSize, (int)(size*sizeof(float)-*compressSize), (1-(float)*compressSize/(size*sizeof(float)))*100, (int)(t*1000000));
265 #endif
268 #endif
270 void decompressFloatingPoint(void *cData, void *dData, int s, int compressSize, void *bData) {
271 int size = s/FLOAT_BYTE;
272 #if DEBUG
273 double t1 = get_clock();
274 #endif
275 #if !COMPRESS
276 memcpy(dData, cData, size*sizeof(float));
277 #else
278 float *compressData = (float*)cData;
279 float *baseData = (float*)bData;
280 register unsigned int *decompressData =(unsigned int*)dData;
281 register int _sdataIndex = (size+7)/8;
282 register char *src = (char*)compressData;
283 register int exponent;
284 register unsigned int mantissa;
285 register unsigned int *bptr = (unsigned int*)baseData;
286 register int i;
287 for(i=0; i<size; ++i)
289 if(TESTBIT(src, i)) // different
292 decompressData[i] = *((unsigned int*)(src+_sdataIndex));
293 _sdataIndex += 4;
294 }else //same exponet
296 exponent = bptr[i] & 0xff000000;
297 mantissa = *((unsigned int*)(src+_sdataIndex)) & 0x00FFFFFF;
298 mantissa |= exponent;
299 decompressData[i] = mantissa;
300 _sdataIndex += 3;
303 #endif
304 #if DEBUG
305 double t = get_clock()-t1;
306 //CmiPrintf("--- FLOATING done decompressing..... orig size:%d\n time:%d us", (int)size, (int)(t*1000000)) ;
307 #endif
311 #else
312 void compressFloatingPoint(void *src, void *dst, int s, int *compressSize, void *bData)
314 register unsigned int *dest = (unsigned int*)dst;
315 register unsigned int *bptr = (unsigned int*) bData;
316 register unsigned int *uptr = (unsigned int *) src;
317 int size = s/sizeof(float);
318 #if DEBUG
319 double t1 = get_clock();
320 #endif
322 #if !COMPRESS
323 memcpy(dest, src, size*sizeof(float));
324 *compressSize = s;
325 #else
326 register unsigned int comp_data = 0;
327 register int f_index = 0;
328 register int i;
329 register int j;
330 register int b;
331 register int zers;
332 register unsigned int xor_data;
333 memset(dest, 0, s);
334 for (i = 0; i < size; ++i) {
335 xor_data = (uptr[i])^(bptr[i]);
336 zers = 0;
337 b=FLOAT_BIT-1;
338 while(!TESTBIT(xor_data, b) && zers<15){
339 zers++;
340 b--;
342 //set the LZC 4 bits
343 for(j=0; j<4; j++)
345 SETBIT(dest[(int)(f_index>>5)], (f_index&0x1f), TESTBIT(zers, j));
346 f_index++;
348 while(b>=0)
350 SETBIT(dest[(f_index>>5)], f_index&0x1f, TESTBIT(xor_data, b));
351 f_index++;
352 b--;
355 *compressSize = f_index/8;
356 float compressRatio = (1-(float)(*compressSize)/s)*100;
358 #if DEBUG
359 double t = get_clock()-t1;
360 CmiPrintf("===>[floating point lzc]done compressing compressed size:(%d===>%d) (reduction:%d) ration=%f Timer:%d us\n\n", (int)(size*sizeof(float)), *compressSize, (int)((size*sizeof(float)-*compressSize)), (1-(float)*compressSize/(size*sizeof(float)))*100, (int)(t*1000000));
361 #endif
363 #endif
366 void decompressFloatingPoint(void *cData, void *dData, int s, int compressSize, void *bData) {
367 int size = s/sizeof(float);
368 #if DEBUG
369 double t1 = get_clock();
370 if(CmiMyPe() == 5)
371 CmiPrintf("[%d] starting decompressing \n", CmiMyPe());
372 #endif
373 #if !COMPRESS
374 memcpy(dData, cData, size*sizeof(float));
375 #else
376 register unsigned int *compressData = (unsigned int*)cData;
377 register unsigned int *decompressData = (unsigned int*)dData;
378 register unsigned int *baseData = (unsigned int*)bData;
379 memset(decompressData, 0, s);
380 register int index;
381 register unsigned int xor_data;
382 register int data = 0;
383 register int d_index=0;
384 register int compp = 0;
385 register int i;
386 register int j;
387 register int f;
388 for (i=0; i<size; i++) {
389 index = FLOAT_BIT-1;
390 data = 0;
391 //read 4 bits and puts index acccordingly
392 for (f=0; f<4; f++,compp++) {
393 if(TESTBIT(compressData[(int)(compp>>5)], (compp&0x1f))){
394 for (j=0; j < (1<<f); j++) {
395 SETBIT(data, index, 0);
396 index--;
400 while(index>=0){
401 SETBIT(data, index, TESTBIT(compressData[(int)(compp>>5)], (compp&0x1f)));
402 index--; compp++;
404 xor_data = data^(baseData[i]);
405 decompressData[i] = xor_data;
408 #if DEBUG
409 double t = get_clock()-t1;
410 if(CmiMyPe() == 5)
411 CmiPrintf("[%d] done decompressing..... orig size:%d time:%d us \n", CmiMyPe(), size, (int)(t*1000000));
412 #endif
414 #endif
417 #endif
420 /***************************
422 * algorithms to compress doubles
423 * *****************/
425 #define DOUBLE_BYTE sizeof(double)
426 #define BITS_DOUBLE sizeof(double)*8
428 #if COMPRESS_EXP
430 void compressDouble(void *src, void *dst, int s, int *compressSize, void *bData)
432 int size = s/DOUBLE_BYTE;
433 double *source = (double*)src;
434 double *dest = (double*)dst;
435 double *baseData = (double*)bData;
436 register unsigned long *bptr = (unsigned long*) baseData;
437 register unsigned long *uptr = (unsigned long*) source;
438 register char *uchar;
439 register int i;
440 #if DEBUG
441 double t1 = get_clock();
442 #endif
444 #if !COMPRESS
445 memcpy(dest, source, s);
446 *compressSize = s;
447 #else
448 assert(baseData != NULL);
449 // Is this the first time we're sending stuff to this node?
451 *compressSize = s;
452 // Create message to receive the compressed buffer.
453 register unsigned char *cdst = (unsigned char*)dest;
454 register int _dataIndex = (2*size+7)/8;
455 memset(cdst, 0, (2*size+7)/8 );
456 for (i = 0; i < size; ++i) {
457 // Bitmask everything but the exponents, then check if they match.
458 unsigned long xord = bptr[i] ^ uptr[i];
459 unsigned long eight = xord & 0xff00000000000000;
460 unsigned long sixteen = xord & 0xffff000000000000;
461 if(sixteen == 0l) //00
463 unsigned long ui = uptr[i];
464 memcpy(cdst+_dataIndex, &ui, 6);
465 _dataIndex += 6;
467 else if(eight == 0l)//01
469 SETBIT(cdst, i<<1);
470 unsigned long ui = uptr[i];
471 memcpy(cdst+_dataIndex, &ui, 7);
472 _dataIndex += 7;
473 }else //11
475 SETBIT11(cdst, i<<1);
476 unsigned long ui = uptr[i];
477 memcpy(cdst+_dataIndex, &ui, 8);
478 _dataIndex += 8;
481 *compressSize = _dataIndex;
483 #endif
484 #if DEBUG
485 double t = get_clock()-t1;
486 printf(" ===>[double lzc] done compressingcompressed size:(%d===>%d) (reduction:%d) ration=%f time=%d us\n", (int)(size*sizeof(double)), *compressSize, (int)(size*sizeof(double)-*compressSize), (1-(double)*compressSize/(size*sizeof(double)))*100, (int)(t*1000000));
487 #endif
490 void decompressDouble(void *cData, void *dData, int s, int compressSize, void *bData) {
491 int size = s/DOUBLE_BYTE;
492 #if DEBUG
493 double t1 = get_clock();
494 #endif
495 #if !COMPRESS
496 memcpy(dData, cData, s);
497 #else
498 double *compressData = (double*)cData;
499 double *baseData = (double*)bData;
500 register unsigned long *decompressData =(unsigned long*)dData;
501 register int _sdataIndex = (2*size+7)/8;
502 register char *src = (char*)compressData;
503 register unsigned long exponent;
504 register unsigned long mantissa;
505 register unsigned long *bptr = (unsigned long*)baseData;
506 register int i;
507 for(i=0; i<size; ++i)
509 int bitss = TESTBIT(src, i<<1);
510 if(bitss==3) // different
513 decompressData[i] = *((unsigned long*)(src+_sdataIndex));
514 _sdataIndex += 8;
515 }else if(bitss==1)
517 exponent = bptr[i] & 0xff00000000000000;
518 mantissa = *((unsigned long*)(src+_sdataIndex)) & 0x00ffffffffffffff;
519 mantissa |= exponent;
520 decompressData[i] = mantissa;
521 _sdataIndex += 7;
522 }else
524 exponent = bptr[i] & 0xffff000000000000;
525 mantissa = *((unsigned long*)(src+_sdataIndex)) & 0x0000ffffffffffff;
526 mantissa |= exponent;
527 decompressData[i] = mantissa;
528 _sdataIndex += 6;
531 #endif
532 #if DEBUG
533 double t = get_clock()-t1;
534 printf("done decompressing..... orig size:%d\n time:%d us", (int)size, (int)(t*1000000)) ;
535 #endif
540 #else
542 void compressDouble(void *src, void *dst, int s, int *compressSize, void *bData)
544 register unsigned long *dest = (unsigned long*)dst;
545 register unsigned long *bptr = (unsigned long*) bData;
546 register unsigned long *uptr = (unsigned long*) src;
547 int size = s/sizeof(double);
548 #if DEBUG
549 double t1 = get_clock();
550 #endif
552 #if !COMPRESS
553 memcpy(dest, src, size*sizeof(double));
554 *compressSize = s;
555 #else
556 register int f_index = 0;
557 register int i;
558 register int j;
559 register int b;
560 register int zers;
561 register unsigned long xor_data;
562 memset(dest, 0, s);
563 for (i = 0; i < size; ++i) {
564 xor_data = (uptr[i])^(bptr[i]);
565 zers = 0;
566 //int value = xor_data;
567 //printbitssimple(value);
568 //printf("\n\n");
569 b=BITS_DOUBLE-1;
570 while(!TESTBIT(xor_data, b) && zers<15){
571 zers++;
572 b--;
574 //cout<<"c: "<<zers<<endl;
575 //set the LZC 4 bits
576 for(j=0; j<4; j++)
578 SETBIT(dest[(int)(f_index>>6)], (f_index&0x3f), ((unsigned long)(TESTBIT(zers, j))));
579 f_index++;
581 while(b>=0)
583 SETBIT(dest[(f_index>>6)], f_index&0x3f, TESTBIT(xor_data, b));
584 f_index++;
585 b--;
588 /*for (int k=0; k<size; k++) {
589 printf(" %f ",dest[k]);
592 *compressSize = f_index/8;
593 double compressRatio = (1-(double)(*compressSize)/s)*100;
595 #if DEBUG
596 double t = get_clock()-t1;
597 printf("===>double lzc done compressing compressed size:(%d===>%d) (reduction:%d) ration=%f Timer:%d us\n\n", (int)(size*sizeof(double)), *compressSize, (int)((size*sizeof(double)-*compressSize)), (1-(double)*compressSize/(size*sizeof(double)))*100, (int)(t*1000000));
598 #endif
600 #endif
603 void decompressDouble(void *cData, void *dData, int s, int compressSize, void *bData) {
604 int size = s/sizeof(double);
605 #if DEBUG
606 double t1 = get_clock();
607 #endif
608 #if !COMPRESS
609 memcpy(dData, cData, size*sizeof(double));
610 #else
611 register unsigned long *compressData = (unsigned long*)cData;
612 register unsigned long *decompressData = (unsigned long*)dData;
613 register unsigned long *baseData = (unsigned long*)bData;
614 /*for (int k=0; k<size; k++) {
615 printf("d: %d ",compressData[k]);
618 memset(decompressData, 0, s);
619 register int index;
620 register unsigned long xor_data;
621 register unsigned long data = 0;
622 register int d_index=0;
623 register int compp = 0;
624 register int i;
625 register int j;
626 register int f;
627 for (i=0; i<size; i++) {
628 index = BITS_DOUBLE-1;
629 data = 0; int zers=0;
630 //read 4 bits and puts index acccordingly
631 for (f=0; f<4; f++,compp++) {
632 if(TESTBIT(compressData[(int)(compp>>6)], (compp&0x3f))){
633 for (j=0; j < (1<<f); j++) {
634 index--; zers++;
638 //cout<<"d: "<<zers<<endl;
639 //printbitssimple();
640 while(index>=0){
641 SETBIT(data, index, TESTBIT(compressData[(int)(compp>>6)], (compp&0x3f)));
642 index--; compp++;
644 xor_data = data^(baseData[i]);
645 decompressData[i] = xor_data;
648 #if DEBUG
649 double t = get_clock()-t1;
650 printf("done decompressing..... orig size:%d time:%d us \n", size, (int)(t*1000000));
651 #endif
653 #endif
656 #endif