ipfw: Add icmpcodes support.
[dragonfly.git] / sys / vfs / hammer2 / hammer2_lz4.c
blob90b7e4144cff8bf7548ce290d66939185f099202
1 /*
2 LZ4 - Fast LZ compression algorithm
3 Copyright (C) 2011-2013, Yann Collet.
4 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are
8 met:
10 * Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12 * Redistributions in binary form must reproduce the above
13 copyright notice, this list of conditions and the following disclaimer
14 in the documentation and/or other materials provided with the
15 distribution.
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 You can contact the author at :
30 - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
31 - LZ4 source repository : http://code.google.com/p/lz4/
35 Note : this source file requires "hammer2_lz4_encoder.h"
38 //**************************************
39 // Tuning parameters
40 //**************************************
41 // MEMORY_USAGE :
42 // Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB;
43 // 16 -> 64KB; 20 -> 1MB; etc.)
44 // Increasing memory usage improves compression ratio
45 // Reduced memory usage can improve speed, due to cache effect
46 // Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
47 #define MEMORY_USAGE 14
49 // HEAPMODE :
50 // Select how default compression function will allocate memory for its
51 // hash table,
52 // in memory stack (0:default, fastest), or in memory heap (1:requires
53 // memory allocation (malloc)).
54 // Default allocation strategy is to use stack (HEAPMODE 0)
55 // Note : explicit functions *_stack* and *_heap* are unaffected by this setting
56 #define HEAPMODE 1
58 // BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE :
59 // This will provide a small boost to performance for big endian cpu,
60 // but the resulting compressed stream will be incompatible with little-endian CPU.
61 // You can set this option to 1 in situations where data will remain within
62 // closed environment
63 // This option is useless on Little_Endian CPU (such as x86)
64 //#define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1
67 //**************************************
68 // CPU Feature Detection
69 //**************************************
70 // 32 or 64 bits ?
71 #if (defined(__x86_64__) || defined(_M_X64)) // Detects 64 bits mode
72 # define LZ4_ARCH64 1
73 #else
74 # define LZ4_ARCH64 0
75 #endif
77 //This reduced library code is only Little Endian compatible,
78 //if the need arises, please look for the appropriate defines in the
79 //original complete LZ4 library.
80 //Same is true for unaligned memory access which is enabled by default,
81 //hardware bit count, also enabled by default, and Microsoft/Visual
82 //Studio compilers.
84 //**************************************
85 // Compiler Options
86 //**************************************
87 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99
88 /* "restrict" is a known keyword */
89 #else
90 # define restrict // Disable restrict
91 #endif
93 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
95 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
96 # define expect(expr,value) (__builtin_expect ((expr),(value)) )
97 #else
98 # define expect(expr,value) (expr)
99 #endif
101 #define likely(expr) expect((expr) != 0, 1)
102 #define unlikely(expr) expect((expr) != 0, 0)
105 //**************************************
106 // Includes
107 //**************************************
108 #include <sys/malloc.h> //for malloc macros
109 #include "hammer2.h"
110 #include "hammer2_lz4.h"
113 //Declaration for kmalloc functions
114 MALLOC_DECLARE(C_HASHTABLE);
115 MALLOC_DEFINE(C_HASHTABLE, "comphashtable",
116 "A hash table used by LZ4 compression function.");
119 //**************************************
120 // Basic Types
121 //**************************************
122 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99
123 # include <stdint.h>
124 typedef uint8_t BYTE;
125 typedef uint16_t U16;
126 typedef uint32_t U32;
127 typedef int32_t S32;
128 typedef uint64_t U64;
129 #else
130 typedef unsigned char BYTE;
131 typedef unsigned short U16;
132 typedef unsigned int U32;
133 typedef signed int S32;
134 typedef unsigned long long U64;
135 #endif
137 #if defined(__GNUC__) && !defined(LZ4_FORCE_UNALIGNED_ACCESS)
138 # define _PACKED __attribute__ ((packed))
139 #else
140 # define _PACKED
141 #endif
143 #if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
144 # pragma pack(push, 1)
145 #endif
147 typedef struct _U16_S { U16 v; } _PACKED U16_S;
148 typedef struct _U32_S { U32 v; } _PACKED U32_S;
149 typedef struct _U64_S { U64 v; } _PACKED U64_S;
151 #if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
152 # pragma pack(pop)
153 #endif
155 #define A64(x) (((U64_S *)(x))->v)
156 #define A32(x) (((U32_S *)(x))->v)
157 #define A16(x) (((U16_S *)(x))->v)
160 //**************************************
161 // Constants
162 //**************************************
163 #define HASHTABLESIZE (1 << MEMORY_USAGE)
165 #define MINMATCH 4
167 #define COPYLENGTH 8
168 #define LASTLITERALS 5
169 #define MFLIMIT (COPYLENGTH+MINMATCH)
170 #define MINLENGTH (MFLIMIT+1)
172 #define LZ4_64KLIMIT ((1<<16) + (MFLIMIT-1))
173 #define SKIPSTRENGTH 6
174 // Increasing this value will make the compression run slower on
175 // incompressible data
177 #define MAXD_LOG 16
178 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
180 #define ML_BITS 4
181 #define ML_MASK ((1U<<ML_BITS)-1)
182 #define RUN_BITS (8-ML_BITS)
183 #define RUN_MASK ((1U<<RUN_BITS)-1)
186 //**************************************
187 // Architecture-specific macros
188 //**************************************
189 #if LZ4_ARCH64 // 64-bit
190 # define STEPSIZE 8
191 # define UARCH U64
192 # define AARCH A64
193 # define LZ4_COPYSTEP(s,d) A64(d) = A64(s); d+=8; s+=8;
194 # define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d)
195 # define LZ4_SECURECOPY(s,d,e) if (d<e) LZ4_WILDCOPY(s,d,e)
196 # define HTYPE U32
197 # define INITBASE(base) BYTE* base = ip
198 #else // 32-bit
199 # define STEPSIZE 4
200 # define UARCH U32
201 # define AARCH A32
202 # define LZ4_COPYSTEP(s,d) A32(d) = A32(s); d+=4; s+=4;
203 # define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d); LZ4_COPYSTEP(s,d);
204 # define LZ4_SECURECOPY LZ4_WILDCOPY
205 # define HTYPE BYTE*
206 # define INITBASE(base) int base = 0
207 #endif
209 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
210 # define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p);
211 v = lz4_bswap16(v);
212 d = (s) - v; }
213 # define LZ4_WRITE_LITTLEENDIAN_16(p,i) { U16 v = (U16)(i);
214 v = lz4_bswap16(v);
215 A16(p) = v;
216 p+=2; }
217 #else // Little Endian
218 # define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); }
219 # define LZ4_WRITE_LITTLEENDIAN_16(p,v) { A16(p) = v; p+=2; }
220 #endif
223 //**************************************
224 // Macros
225 //**************************************
226 #define LZ4_WILDCOPY(s,d,e) do { LZ4_COPYPACKET(s,d) } while (d<e);
227 #define LZ4_BLINDCOPY(s,d,l) { BYTE* e=(d)+(l); LZ4_WILDCOPY(s,d,e); d=e; }
230 //****************************
231 // Private functions
232 //****************************
233 #if LZ4_ARCH64
235 static
236 inline
238 LZ4_NbCommonBytes (register U64 val)
240 #if defined(LZ4_BIG_ENDIAN)
241 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
242 unsigned long r = 0;
243 _BitScanReverse64( &r, val );
244 return (int)(r>>3);
245 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
246 return (__builtin_clzll(val) >> 3);
247 #else
248 int r;
249 if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
250 if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
251 r += (!val);
252 return r;
253 #endif
254 #else
255 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
256 unsigned long r = 0;
257 _BitScanForward64( &r, val );
258 return (int)(r>>3);
259 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
260 return (__builtin_ctzll(val) >> 3);
261 #else
262 static int DeBruijnBytePos[64] = {
263 0, 0, 0, 0, 0, 1, 1, 2, 0, 3,
264 1, 3, 1, 4, 2, 7, 0, 2, 3, 6,
265 1, 5, 3, 5, 1, 3, 4, 4, 2, 5,
266 6, 7, 7, 0, 1, 2, 3, 3, 4, 6,
267 2, 6, 5, 5, 3, 4, 5, 6, 7, 1,
268 2, 4, 6, 4, 4, 5, 7, 2, 6, 5,
269 7, 6, 7, 7 };
270 return DeBruijnBytePos[((U64)((val & -val) * 0x0218A392CDABBD3F)) >> 58];
271 #endif
272 #endif
275 #else
277 static
278 inline
280 LZ4_NbCommonBytes (register U32 val)
282 #if defined(LZ4_BIG_ENDIAN)
283 # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
284 unsigned long r = 0;
285 _BitScanReverse( &r, val );
286 return (int)(r>>3);
287 # elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
288 return (__builtin_clz(val) >> 3);
289 # else
290 int r;
291 if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
292 r += (!val);
293 return r;
294 # endif
295 #else
296 # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
297 unsigned long r;
298 _BitScanForward( &r, val );
299 return (int)(r>>3);
300 # elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
301 return (__builtin_ctz(val) >> 3);
302 # else
303 static int DeBruijnBytePos[32] = {
304 0, 0, 3, 0, 3, 1, 3, 0, 3, 2,
305 2, 1, 3, 2, 0, 1, 3, 3, 1, 2,
306 2, 2, 2, 0, 3, 1, 2, 0, 1, 0,
307 1, 1 };
308 return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
309 # endif
310 #endif
313 #endif
317 //******************************
318 // Compression functions
319 //******************************
321 #include "hammer2_lz4_encoder.h"
324 void* LZ4_createHeapMemory();
325 int LZ4_freeHeapMemory(void* ctx);
327 Used to allocate and free hashTable memory
328 to be used by the LZ4_compress_heap* family of functions.
329 LZ4_createHeapMemory() returns NULL is memory allocation fails.
331 void*
332 LZ4_create(void)
334 return kmalloc(HASHTABLESIZE, C_HASHTABLE, M_INTWAIT);
338 LZ4_free(void* ctx)
340 kfree(ctx, C_HASHTABLE);
341 return 0;
345 LZ4_compress_limitedOutput(char* source, char* dest, int inputSize, int maxOutputSize)
347 void* ctx = LZ4_create();
348 int result;
349 if (ctx == NULL) return 0; // Failed allocation => compression not done
350 if (inputSize < LZ4_64KLIMIT)
351 result = LZ4_compress64k_heap_limitedOutput(ctx, source, dest,
352 inputSize, maxOutputSize);
353 else result = LZ4_compress_heap_limitedOutput(ctx, source, dest,
354 inputSize, maxOutputSize);
355 LZ4_free(ctx);
356 return result;
360 //****************************
361 // Decompression functions
362 //****************************
364 typedef enum { noPrefix = 0, withPrefix = 1 } prefix64k_directive;
365 typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } end_directive;
366 typedef enum { full = 0, partial = 1 } exit_directive;
369 // This generic decompression function cover all use cases.
370 // It shall be instanciated several times, using different sets of directives
371 // Note that it is essential this generic function is really inlined,
372 // in order to remove useless branches during compilation optimisation.
373 static
374 inline
375 int LZ4_decompress_generic(
376 char* source,
377 char* dest,
378 int inputSize, //
379 int outputSize,
380 // OutputSize must be != 0; if endOnInput==endOnInputSize,
381 // this value is the max size of Output Buffer.
383 int endOnInput, // endOnOutputSize, endOnInputSize
384 int prefix64k, // noPrefix, withPrefix
385 int partialDecoding, // full, partial
386 int targetOutputSize // only used if partialDecoding==partial
389 // Local Variables
390 BYTE* restrict ip = (BYTE*) source;
391 BYTE* ref;
392 BYTE* iend = ip + inputSize;
394 BYTE* op = (BYTE*) dest;
395 BYTE* oend = op + outputSize;
396 BYTE* cpy;
397 BYTE* oexit = op + targetOutputSize;
399 size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
400 #if LZ4_ARCH64
401 size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
402 #endif
405 // Special case
406 if ((partialDecoding) && (oexit> oend-MFLIMIT)) oexit = oend-MFLIMIT;
407 // targetOutputSize too large, better decode everything
408 if unlikely(outputSize==0) goto _output_error;
409 // Empty output buffer
412 // Main Loop
413 while (1)
415 unsigned token;
416 size_t length;
418 // get runlength
419 token = *ip++;
420 if ((length=(token>>ML_BITS)) == RUN_MASK)
422 unsigned s=255;
423 while (((endOnInput)?ip<iend:1) && (s==255))
425 s = *ip++;
426 length += s;
430 // copy literals
431 cpy = op+length;
432 if (((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT))
433 || (ip+length>iend-(2+1+LASTLITERALS))) )
434 || ((!endOnInput) && (cpy>oend-COPYLENGTH)))
436 if (partialDecoding)
438 if (cpy > oend) goto _output_error;
439 // Error : write attempt beyond end of output buffer
440 if ((endOnInput) && (ip+length > iend)) goto _output_error;
441 // Error : read attempt beyond end of input buffer
443 else
445 if ((!endOnInput) && (cpy != oend)) goto _output_error;
446 // Error : block decoding must stop exactly there,
447 // due to parsing restrictions
448 if ((endOnInput) && ((ip+length != iend) || (cpy > oend)))
449 goto _output_error;
450 // Error : not enough place for another match (min 4) + 5 literals
452 memcpy(op, ip, length);
453 ip += length;
454 op += length;
455 break;
456 // Necessarily EOF, due to parsing restrictions
458 LZ4_WILDCOPY(ip, op, cpy); ip -= (op-cpy); op = cpy;
460 // get offset
461 LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2;
462 if ((prefix64k==noPrefix) && unlikely(ref < (BYTE*)dest))
463 goto _output_error; // Error : offset outside destination buffer
465 // get matchlength
466 if ((length=(token&ML_MASK)) == ML_MASK)
468 while (endOnInput ? ip<iend-(LASTLITERALS+1) : 1)
469 // A minimum nb of input bytes must remain for LASTLITERALS + token
471 unsigned s = *ip++;
472 length += s;
473 if (s==255) continue;
474 break;
478 // copy repeated sequence
479 if unlikely((op-ref)<STEPSIZE)
481 #if LZ4_ARCH64
482 size_t dec64 = dec64table[op-ref];
483 #else
484 const size_t dec64 = 0;
485 #endif
486 op[0] = ref[0];
487 op[1] = ref[1];
488 op[2] = ref[2];
489 op[3] = ref[3];
490 op += 4, ref += 4; ref -= dec32table[op-ref];
491 A32(op) = A32(ref);
492 op += STEPSIZE-4; ref -= dec64;
493 } else { LZ4_COPYSTEP(ref,op); }
494 cpy = op + length - (STEPSIZE-4);
496 if unlikely(cpy>oend-(COPYLENGTH)-(STEPSIZE-4))
498 if (cpy > oend-LASTLITERALS) goto _output_error;
499 // Error : last 5 bytes must be literals
500 LZ4_SECURECOPY(ref, op, (oend-COPYLENGTH));
501 while(op<cpy) *op++=*ref++;
502 op=cpy;
503 continue;
505 LZ4_WILDCOPY(ref, op, cpy);
506 op=cpy; // correction
509 // end of decoding
510 if (endOnInput)
511 return (int) (((char*)op)-dest); // Nb of output bytes decoded
512 else
513 return (int) (((char*)ip)-source); // Nb of input bytes read
515 // Overflow error detected
516 _output_error:
517 return (int) (-(((char*)ip)-source))-1;
522 LZ4_decompress_safe(char* source, char* dest, int inputSize, int maxOutputSize)
524 return LZ4_decompress_generic(source, dest, inputSize, maxOutputSize,
525 endOnInputSize, noPrefix, full, 0);