1 // Fast data compression library
2 // Copyright (C) 2006-2009 Lasse Mikkel Reinhold
5 // QuickLZ can be used for free under the GPL-1 or GPL-2 license (where anything
6 // released into public must be open source) or under a commercial license if such
7 // has been acquired (see http://www.quicklz.com/order.html). The commercial license
8 // does not cover derived or ported versions created by third parties under GPL.
10 // BETA VERSION BETA VERSION BETA VERSION BETA VERSION BETA VERSION BETA VERSION BETA VERSION BETA VERSION BETA VERSION BETA VERSION BETA VERSION
11 // BETA VERSION BETA VERSION BETA VERSION BETA VERSION BETA VERSION BETA VERSION BETA VERSION BETA VERSION BETA VERSION BETA VERSION BETA VERSION
12 // BETA VERSION BETA VERSION BETA VERSION BETA VERSION BETA VERSION BETA VERSION BETA VERSION BETA VERSION BETA VERSION BETA VERSION BETA VERSION
13 // BETA VERSION BETA VERSION BETA VERSION BETA VERSION BETA VERSION BETA VERSION BETA VERSION BETA VERSION BETA VERSION BETA VERSION BETA VERSION
14 // BETA VERSION BETA VERSION BETA VERSION BETA VERSION BETA VERSION BETA VERSION BETA VERSION BETA VERSION BETA VERSION BETA VERSION BETA VERSION
20 #if QLZ_VERSION_MAJOR != 1 || QLZ_VERSION_MINOR != 5 || QLZ_VERSION_REVISION != 0
21 #error quicklz.c and quicklz.h have different versions
24 #if (defined(__X86__) || defined(__i386__) || defined(i386) || defined(_M_IX86) || defined(__386__) || defined(__x86_64__) || defined(_M_X64))
29 #define UNCONDITIONAL_MATCHLEN 6
30 #define UNCOMPRESSED_END 4
38 #if QLZ_COMPRESSION_LEVEL == 1 && defined QLZ_PTR_64 && QLZ_STREAMING_BUFFER == 0
39 #define OFFSET_BASE source
40 #define CAST (ui32)(size_t)
46 int qlz_get_setting(int setting)
50 case 0: return QLZ_COMPRESSION_LEVEL;
51 case 1: return sizeof(qlz_state_compress);
52 case 2: return sizeof(qlz_state_decompress);
53 case 3: return QLZ_STREAMING_BUFFER;
54 #ifdef QLZ_MEMORY_SAFE
59 case 7: return QLZ_VERSION_MAJOR;
60 case 8: return QLZ_VERSION_MINOR;
61 case 9: return QLZ_VERSION_REVISION;
66 #if QLZ_COMPRESSION_LEVEL == 1
67 static int same(const unsigned char *src, size_t n)
69 while(n > 0 && *(src + n) == *src)
71 return n == 0 ? 1 : 0;
75 static void reset_table_compress(qlz_state_compress *state)
78 for(i = 0; i < QLZ_HASH_VALUES; i++)
80 #if QLZ_COMPRESSION_LEVEL == 1
81 state->hash[i].offset = 0;
83 state->hash_counter[i] = 0;
88 static void reset_table_decompress(qlz_state_decompress *state)
93 #if QLZ_COMPRESSION_LEVEL == 2
94 for(i = 0; i < QLZ_HASH_VALUES; i++)
96 state->hash_counter[i] = 0;
101 static __inline ui32 hash_func(ui32 i)
103 #if QLZ_COMPRESSION_LEVEL == 2
104 return ((i >> 9) ^ (i >> 13) ^ i) & (QLZ_HASH_VALUES - 1);
106 return ((i >> 12) ^ i) & (QLZ_HASH_VALUES - 1);
110 static __inline ui32 fast_read(void const *src, ui32 bytes)
113 unsigned char *p = (unsigned char*)src;
117 return(*p | *(p + 1) << 8 | *(p + 2) << 16 | *(p + 3) << 24);
119 return(*p | *(p + 1) << 8 | *(p + 2) << 16);
121 return(*p | *(p + 1) << 8);
127 if (bytes >= 1 && bytes <= 4)
128 return *((ui32*)src);
134 static __inline ui32 hashat(const unsigned char *src)
137 fetch = fast_read(src, 3);
138 hash = hash_func(fetch);
142 static __inline void fast_write(ui32 f, void *dst, size_t bytes)
145 unsigned char *p = (unsigned char*)dst;
150 *p = (unsigned char)f;
151 *(p + 1) = (unsigned char)(f >> 8);
152 *(p + 2) = (unsigned char)(f >> 16);
153 *(p + 3) = (unsigned char)(f >> 24);
156 *p = (unsigned char)f;
157 *(p + 1) = (unsigned char)(f >> 8);
158 *(p + 2) = (unsigned char)(f >> 16);
161 *p = (unsigned char)f;
162 *(p + 1) = (unsigned char)(f >> 8);
165 *p = (unsigned char)f;
178 *((ui16 *)dst) = (ui16)f;
181 *((unsigned char*)dst) = (unsigned char)f;
188 size_t qlz_size_decompressed(const char *source)
191 n = (((*source) & 2) == 2) ? 4 : 1;
192 r = fast_read(source + 1 + n, n);
193 r = r & (0xffffffff >> ((4 - n)*8));
197 size_t qlz_size_compressed(const char *source)
200 n = (((*source) & 2) == 2) ? 4 : 1;
201 r = fast_read(source + 1, n);
202 r = r & (0xffffffff >> ((4 - n)*8));
206 size_t qlz_size_header(const char *source)
208 size_t n = 2*((((*source) & 2) == 2) ? 4 : 1) + 1;
213 static __inline void memcpy_up(unsigned char *dst, const unsigned char *src, ui32 n)
215 // Caution if modifying memcpy_up! Overlap of dst and src must be special handled.
217 unsigned char *end = dst + n;
228 *(ui32 *)(dst + f) = *(ui32 *)(src + f);
235 static __inline void update_hash(qlz_state_decompress *state, const unsigned char *s)
237 #if QLZ_COMPRESSION_LEVEL == 1
240 state->hash[hash].offset = s;
241 state->hash_counter[hash] = 1;
242 #elif QLZ_COMPRESSION_LEVEL == 2
246 c = state->hash_counter[hash];
247 state->hash[hash].offset[c & (QLZ_POINTERS - 1)] = s;
249 state->hash_counter[hash] = c;
255 #if QLZ_COMPRESSION_LEVEL <= 2
256 static void update_hash_upto(qlz_state_decompress *state, unsigned char **lh, const unsigned char *max)
261 update_hash(state, *lh);
266 static size_t qlz_compress_core(const unsigned char *source, unsigned char *destination, size_t size, qlz_state_compress *state)
268 const unsigned char *last_byte = source + size - 1;
269 const unsigned char *src = source;
270 unsigned char *cword_ptr = destination;
271 unsigned char *dst = destination + CWORD_LEN;
272 ui32 cword_val = 1U << 31;
273 const unsigned char *last_matchstart = last_byte - UNCONDITIONAL_MATCHLEN - UNCOMPRESSED_END;
275 unsigned int lits = 0;
279 if(src <= last_matchstart)
280 fetch = fast_read(src, 3);
282 while(src <= last_matchstart)
284 if ((cword_val & 1) == 1)
286 // store uncompressed if compression ratio is too low
287 if (src > source + (size >> 1) && dst - destination > src - source - ((src - source) >> 5))
290 fast_write((cword_val >> 1) | (1U << 31), cword_ptr, CWORD_LEN);
294 cword_val = 1U << 31;
295 fetch = fast_read(src, 3);
297 #if QLZ_COMPRESSION_LEVEL == 1
299 const unsigned char *o;
302 hash = hash_func(fetch);
303 cached = fetch ^ state->hash[hash].cache;
304 state->hash[hash].cache = fetch;
306 o = state->hash[hash].offset + OFFSET_BASE;
307 state->hash[hash].offset = CAST(src - OFFSET_BASE);
310 if ((cached & 0xffffff) == 0 && o != OFFSET_BASE && (src - o > MINOFFSET || (src == o + 1 && lits >= 3 && src > source + 3 && same(src - 3, 6))))
315 if (cached == 0 && o != OFFSET_BASE && (src - o > MINOFFSET || (src == o + 1 && lits >= 3 && src > source + 3 && same(src - 3, 6))))
317 if (*(o + 3) != *(src + 3))
321 cword_val = (cword_val >> 1) | (1U << 31);
322 fast_write((3 - 2) | hash, dst, 2);
328 const unsigned char *old_src = src;
332 cword_val = (cword_val >> 1) | (1U << 31);
335 if(*(o + (src - old_src)) == *src)
338 if(*(o + (src - old_src)) == *src)
340 size_t q = last_byte - UNCOMPRESSED_END - (src - 5) + 1;
341 size_t remaining = q > 255 ? 255 : q;
343 while(*(o + (src - old_src)) == *src && (size_t)(src - old_src) < remaining)
348 matchlen = src - old_src;
351 fast_write((ui32)(matchlen - 2) | hash, dst, 2);
356 fast_write((ui32)(matchlen << 16) | hash, dst, 3);
360 fetch = fast_read(src, 3);
369 cword_val = (cword_val >> 1);
371 fetch = fast_read(src, 3);
373 fetch = (fetch >> 8 & 0xffff) | (*(src + 2) << 16);
377 #elif QLZ_COMPRESSION_LEVEL >= 2
379 const unsigned char *o, *offset2;
380 ui32 hash, matchlen, k, m, best_k = 0;
382 size_t remaining = (last_byte - UNCOMPRESSED_END - src + 1) > 255 ? 255 : (last_byte - UNCOMPRESSED_END - src + 1);
386 //hash = hashat(src);
387 fetch = fast_read(src, 3);
388 hash = hash_func(fetch);
390 c = state->hash_counter[hash];
392 offset2 = state->hash[hash].offset[0];
393 if(offset2 < src - MINOFFSET && c > 0 && ((fast_read(offset2, 3) ^ fetch) & 0xffffff) == 0)
396 if(*(offset2 + matchlen) == *(src + matchlen))
399 while(*(offset2 + matchlen) == *(src + matchlen) && matchlen < remaining)
405 for(k = 1; k < QLZ_POINTERS && c > k; k++)
407 o = state->hash[hash].offset[k];
408 #if QLZ_COMPRESSION_LEVEL == 3
409 if(((fast_read(o, 3) ^ fetch) & 0xffffff) == 0 && o < src - MINOFFSET)
410 #elif QLZ_COMPRESSION_LEVEL == 2
411 if(*(src + matchlen) == *(o + matchlen) && ((fast_read(o, 3) ^ fetch) & 0xffffff) == 0 && o < src - MINOFFSET)
415 while(*(o + m) == *(src + m) && m < remaining)
417 #if QLZ_COMPRESSION_LEVEL == 3
418 if ((m > matchlen) || (m == matchlen && o > offset2))
419 #elif QLZ_COMPRESSION_LEVEL == 2
430 state->hash[hash].offset[c & (QLZ_POINTERS - 1)] = src;
432 state->hash_counter[hash] = c;
434 #if QLZ_COMPRESSION_LEVEL == 3
435 if(matchlen > 2 && src - o < 131071)
438 size_t offset = src - o;
440 for(u = 1; u < matchlen; u++)
442 hash = hashat(src + u);
443 c = state->hash_counter[hash]++;
444 state->hash[hash].offset[c & (QLZ_POINTERS - 1)] = src + u;
447 cword_val = (cword_val >> 1) | (1U << 31);
450 if(matchlen == 3 && offset <= 63)
452 *dst = (unsigned char)(offset << 2);
455 else if (matchlen == 3 && offset <= 16383)
457 ui32 f = (ui32)((offset << 2) | 1);
458 fast_write(f, dst, 2);
461 else if (matchlen <= 18 && offset <= 1023)
463 ui32 f = ((matchlen - 3) << 2) | ((ui32)offset << 6) | 2;
464 fast_write(f, dst, 2);
468 else if(matchlen <= 33)
470 ui32 f = ((matchlen - 2) << 2) | ((ui32)offset << 7) | 3;
471 fast_write(f, dst, 3);
476 ui32 f = ((matchlen - 3) << 7) | ((ui32)offset << 15) | 3;
477 fast_write(f, dst, 4);
486 cword_val = (cword_val >> 1);
488 #elif QLZ_COMPRESSION_LEVEL == 2
492 cword_val = (cword_val >> 1) | (1U << 31);
497 ui32 f = best_k | ((matchlen - 2) << 2) | (hash << 5);
498 fast_write(f, dst, 2);
503 ui32 f = best_k | (matchlen << 16) | (hash << 5);
504 fast_write(f, dst, 3);
513 cword_val = (cword_val >> 1);
519 while (src <= last_byte)
521 if ((cword_val & 1) == 1)
523 fast_write((cword_val >> 1) | (1U << 31), cword_ptr, CWORD_LEN);
526 cword_val = 1U << 31;
528 #if QLZ_COMPRESSION_LEVEL < 3
529 if (src <= last_byte - 3)
531 #if QLZ_COMPRESSION_LEVEL == 1
533 fetch = fast_read(src, 3);
534 hash = hash_func(fetch);
535 state->hash[hash].offset = CAST(src - OFFSET_BASE);
536 state->hash[hash].cache = fetch;
537 #elif QLZ_COMPRESSION_LEVEL == 2
541 c = state->hash_counter[hash];
542 state->hash[hash].offset[c & (QLZ_POINTERS - 1)] = src;
544 state->hash_counter[hash] = c;
551 cword_val = (cword_val >> 1);
554 while((cword_val & 1) != 1)
555 cword_val = (cword_val >> 1);
557 fast_write((cword_val >> 1) | (1U << 31), cword_ptr, CWORD_LEN);
559 // min. size must be 9 bytes so that the qlz_size functions can take 9 bytes as argument
560 return dst - destination < 9 ? 9 : dst - destination;
563 static size_t qlz_decompress_core(const unsigned char *source, unsigned char *destination, size_t size, qlz_state_decompress *state, const unsigned char *history)
565 const unsigned char *src = source + qlz_size_header((const char *)source);
566 unsigned char *dst = destination;
567 const unsigned char *last_destination_byte = destination + size - 1;
569 const unsigned char *last_matchstart = last_destination_byte - UNCONDITIONAL_MATCHLEN - UNCOMPRESSED_END;
570 unsigned char *last_hashed = destination - 1;
571 const unsigned char *last_source_byte = source + qlz_size_compressed((const char *)source) - 1;
572 static const ui32 bitlut[16] = {4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0};
574 (void) last_source_byte;
585 #ifdef QLZ_MEMORY_SAFE
586 if(src + CWORD_LEN - 1 > last_source_byte)
589 cword_val = fast_read(src, CWORD_LEN);
593 #ifdef QLZ_MEMORY_SAFE
594 if(src + 4 - 1 > last_source_byte)
598 fetch = fast_read(src, 4);
600 if ((cword_val & 1) == 1)
603 const unsigned char *offset2;
605 #if QLZ_COMPRESSION_LEVEL == 1
607 cword_val = cword_val >> 1;
608 hash = (fetch >> 4) & 0xfff;
609 offset2 = (const unsigned char *)(size_t)state->hash[hash].offset;
611 if((fetch & 0xf) != 0)
613 matchlen = (fetch & 0xf) + 2;
618 matchlen = *(src + 2);
622 #elif QLZ_COMPRESSION_LEVEL == 2
625 cword_val = cword_val >> 1;
626 hash = (fetch >> 5) & 0x7ff;
627 c = (unsigned char)(fetch & 0x3);
628 offset2 = state->hash[hash].offset[c];
630 if((fetch & (28)) != 0)
632 matchlen = ((fetch >> 2) & 0x7) + 2;
637 matchlen = *(src + 2);
641 #elif QLZ_COMPRESSION_LEVEL == 3
643 cword_val = cword_val >> 1;
644 if ((fetch & 3) == 0)
646 offset = (fetch & 0xff) >> 2;
650 else if ((fetch & 2) == 0)
652 offset = (fetch & 0xffff) >> 2;
656 else if ((fetch & 1) == 0)
658 offset = (fetch & 0xffff) >> 6;
659 matchlen = ((fetch >> 2) & 15) + 3;
662 else if ((fetch & 127) != 3)
664 offset = (fetch >> 7) & 0x1ffff;
665 matchlen = ((fetch >> 2) & 0x1f) + 2;
670 offset = (fetch >> 15);
671 matchlen = ((fetch >> 7) & 255) + 3;
675 offset2 = dst - offset;
678 #ifdef QLZ_MEMORY_SAFE
679 if(offset2 < history || offset2 > dst - MINOFFSET - 1)
682 if(matchlen > (ui32)(last_destination_byte - dst - UNCOMPRESSED_END + 1))
686 memcpy_up(dst, offset2, matchlen);
689 #if QLZ_COMPRESSION_LEVEL <= 2
690 update_hash_upto(state, &last_hashed, dst - matchlen);
691 last_hashed = dst - 1;
696 if (dst < last_matchstart)
698 unsigned int n = bitlut[cword_val & 0xf];
700 *(ui32 *)dst = *(ui32 *)src;
702 memcpy_up(dst, src, 4);
704 cword_val = cword_val >> n;
707 #if QLZ_COMPRESSION_LEVEL <= 2
708 update_hash_upto(state, &last_hashed, dst - 3);
713 while(dst <= last_destination_byte)
718 cword_val = 1U << 31;
720 #ifdef QLZ_MEMORY_SAFE
721 if(src >= last_source_byte + 1)
727 cword_val = cword_val >> 1;
730 #if QLZ_COMPRESSION_LEVEL <= 2
731 update_hash_upto(state, &last_hashed, last_destination_byte - 3); // todo, use constant
740 size_t qlz_compress(const void *source, char *destination, size_t size, qlz_state_compress *state)
746 if(size == 0 || size > 0xffffffff - 400)
754 #if QLZ_STREAMING_BUFFER > 0
755 if (state->stream_counter + size - 1 >= QLZ_STREAMING_BUFFER)
758 reset_table_compress(state);
759 r = base + qlz_compress_core((const unsigned char *)source, (unsigned char*)destination + base, size, state);
760 #if QLZ_STREAMING_BUFFER > 0
761 reset_table_compress(state);
765 memcpy(destination + base, source, size);
773 state->stream_counter = 0;
775 #if QLZ_STREAMING_BUFFER > 0
778 unsigned char *src = state->stream_buffer + state->stream_counter;
780 memcpy(src, source, size);
781 r = base + qlz_compress_core(src, (unsigned char*)destination + base, size, state);
785 memcpy(destination + base, src, size);
788 reset_table_compress(state);
794 state->stream_counter += size;
799 *destination = (unsigned char)(0 | compressed);
800 *(destination + 1) = (unsigned char)r;
801 *(destination + 2) = (unsigned char)size;
805 *destination = (unsigned char)(2 | compressed);
806 fast_write((ui32)r, destination + 1, 4);
807 fast_write((ui32)size, destination + 5, 4);
810 *destination |= (QLZ_COMPRESSION_LEVEL << 2);
811 *destination |= (1 << 6);
812 *destination |= ((QLZ_STREAMING_BUFFER == 0 ? 0 : (QLZ_STREAMING_BUFFER == 100000 ? 1 : (QLZ_STREAMING_BUFFER == 1000000 ? 2 : 3))) << 4);
820 size_t qlz_decompress(const char *source, void *destination, qlz_state_decompress *state)
822 size_t dsiz = qlz_size_decompressed(source);
824 #if QLZ_STREAMING_BUFFER > 0
825 if (state->stream_counter + qlz_size_decompressed(source) - 1 >= QLZ_STREAMING_BUFFER)
828 if((*source & 1) == 1)
830 reset_table_decompress(state);
831 dsiz = qlz_decompress_core((const unsigned char *)source, (unsigned char *)destination, dsiz, state, (const unsigned char *)destination);
835 memcpy(destination, source + qlz_size_header(source), dsiz);
837 state->stream_counter = 0;
838 reset_table_decompress(state);
840 #if QLZ_STREAMING_BUFFER > 0
843 unsigned char *dst = state->stream_buffer + state->stream_counter;
844 if((*source & 1) == 1)
846 dsiz = qlz_decompress_core((const unsigned char *)source, dst, dsiz, state, (const unsigned char *)state->stream_buffer);
850 memcpy(dst, source + qlz_size_header(source), dsiz);
851 reset_table_decompress(state);
853 memcpy(destination, dst, dsiz);
854 state->stream_counter += dsiz;