8416 abd.h is not C++ friendly
[unleashed.git] / usr / src / uts / common / fs / zfs / lz4.c
blob82a08939dca39d95c56e22f2fb8a1a1802abf96b
1 /*
2 * LZ4 - Fast LZ compression algorithm
3 * Header File
4 * Copyright (C) 2011-2013, Yann Collet.
5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met:
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following disclaimer
15 * in the documentation and/or other materials provided with the
16 * distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * You can contact the author at :
31 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
32 * - LZ4 source repository : http://code.google.com/p/lz4/
35 * Copyright (c) 2016 by Delphix. All rights reserved.
38 #include <sys/zfs_context.h>
40 static int real_LZ4_compress(const char *source, char *dest, int isize,
41 int osize);
42 static int real_LZ4_uncompress(const char *source, char *dest, int osize);
43 static int LZ4_compressBound(int isize);
44 static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
45 int isize, int maxOutputSize);
46 static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
47 int isize, int osize);
48 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
49 int isize, int osize);
51 /*ARGSUSED*/
52 size_t
53 lz4_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
55 uint32_t bufsiz;
56 char *dest = d_start;
58 ASSERT(d_len >= sizeof (bufsiz));
60 bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
61 d_len - sizeof (bufsiz));
63 /* Signal an error if the compression routine returned zero. */
64 if (bufsiz == 0)
65 return (s_len);
68 * Encode the compresed buffer size at the start. We'll need this in
69 * decompression to counter the effects of padding which might be
70 * added to the compressed buffer and which, if unhandled, would
71 * confuse the hell out of our decompression function.
73 *(uint32_t *)dest = BE_32(bufsiz);
75 return (bufsiz + sizeof (bufsiz));
78 /*ARGSUSED*/
79 int
80 lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
82 const char *src = s_start;
83 uint32_t bufsiz = BE_IN32(src);
85 /* invalid compressed buffer size encoded at start */
86 if (bufsiz + sizeof (bufsiz) > s_len)
87 return (1);
90 * Returns 0 on success (decompression function returned non-negative)
91 * and non-zero on failure (decompression function returned negative).
93 return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
94 d_start, bufsiz, d_len) < 0);
98 * LZ4 API Description:
100 * Simple Functions:
101 * real_LZ4_compress() :
102 * isize : is the input size. Max supported value is ~1.9GB
103 * return : the number of bytes written in buffer dest
104 * or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
105 * note : destination buffer must be already allocated.
106 * destination buffer must be sized to handle worst cases
107 * situations (input data not compressible) worst case size
108 * evaluation is provided by function LZ4_compressBound().
110 * real_LZ4_uncompress() :
111 * osize : is the output size, therefore the original size
112 * return : the number of bytes read in the source buffer.
113 * If the source stream is malformed, the function will stop
114 * decoding and return a negative result, indicating the byte
115 * position of the faulty instruction. This function never
116 * writes beyond dest + osize, and is therefore protected
117 * against malicious data packets.
118 * note : destination buffer must be already allocated
120 * Advanced Functions
122 * LZ4_compressBound() :
123 * Provides the maximum size that LZ4 may output in a "worst case"
124 * scenario (input data not compressible) primarily useful for memory
125 * allocation of output buffer.
127 * isize : is the input size. Max supported value is ~1.9GB
128 * return : maximum output size in a "worst case" scenario
129 * note : this function is limited by "int" range (2^31-1)
131 * LZ4_uncompress_unknownOutputSize() :
132 * isize : is the input size, therefore the compressed size
133 * maxOutputSize : is the size of the destination buffer (which must be
134 * already allocated)
135 * return : the number of bytes decoded in the destination buffer
136 * (necessarily <= maxOutputSize). If the source stream is
137 * malformed, the function will stop decoding and return a
138 * negative result, indicating the byte position of the faulty
139 * instruction. This function never writes beyond dest +
140 * maxOutputSize, and is therefore protected against malicious
141 * data packets.
142 * note : Destination buffer must be already allocated.
143 * This version is slightly slower than real_LZ4_uncompress()
145 * LZ4_compressCtx() :
146 * This function explicitly handles the CTX memory structure.
148 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
149 * by the caller (either on the stack or using kmem_zalloc). Passing NULL
150 * isn't valid.
152 * LZ4_compress64kCtx() :
153 * Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
154 * isize *Must* be <64KB, otherwise the output will be corrupted.
156 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
157 * by the caller (either on the stack or using kmem_zalloc). Passing NULL
158 * isn't valid.
162 * Tuning parameters
166 * COMPRESSIONLEVEL: Increasing this value improves compression ratio
167 * Lowering this value reduces memory usage. Reduced memory usage
168 * typically improves speed, due to cache effect (ex: L1 32KB for Intel,
169 * L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
170 * (examples : 12 -> 16KB ; 17 -> 512KB)
172 #define COMPRESSIONLEVEL 12
175 * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
176 * algorithm skip faster data segments considered "incompressible".
177 * This may decrease compression ratio dramatically, but will be
178 * faster on incompressible data. Increasing this value will make
179 * the algorithm search more before declaring a segment "incompressible".
180 * This could improve compression a bit, but will be slower on
181 * incompressible data. The default value (6) is recommended.
183 #define NOTCOMPRESSIBLE_CONFIRMATION 6
186 * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
187 * performance for big endian cpu, but the resulting compressed stream
188 * will be incompatible with little-endian CPU. You can set this option
189 * to 1 in situations where data will stay within closed environment.
190 * This option is useless on Little_Endian CPU (such as x86).
192 /* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
195 * CPU Feature Detection
198 /* 32 or 64 bits ? */
199 #if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \
200 defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \
201 defined(__LP64__) || defined(_LP64))
202 #define LZ4_ARCH64 1
203 #else
204 #define LZ4_ARCH64 0
205 #endif
208 * Limits the amount of stack space that the algorithm may consume to hold
209 * the compression lookup table. The value `9' here means we'll never use
210 * more than 2k of stack (see above for a description of COMPRESSIONLEVEL).
211 * If more memory is needed, it is allocated from the heap.
213 #define STACKLIMIT 9
216 * Little Endian or Big Endian?
217 * Note: overwrite the below #define if you know your architecture endianess.
219 #if (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || \
220 defined(_BIG_ENDIAN) || defined(_ARCH_PPC) || defined(__PPC__) || \
221 defined(__PPC) || defined(PPC) || defined(__powerpc__) || \
222 defined(__powerpc) || defined(powerpc) || \
223 ((defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))))
224 #define LZ4_BIG_ENDIAN 1
225 #else
227 * Little Endian assumed. PDP Endian and other very rare endian format
228 * are unsupported.
230 #endif
233 * Unaligned memory access is automatically enabled for "common" CPU,
234 * such as x86. For others CPU, the compiler will be more cautious, and
235 * insert extra code to ensure aligned access is respected. If you know
236 * your target CPU supports unaligned memory access, you may want to
237 * force this option manually to improve performance
239 #if defined(__ARM_FEATURE_UNALIGNED)
240 #define LZ4_FORCE_UNALIGNED_ACCESS 1
241 #endif
243 #ifdef __sparc
244 #define LZ4_FORCE_SW_BITCOUNT
245 #endif
248 * Compiler Options
250 #if __STDC_VERSION__ >= 199901L /* C99 */
251 /* "restrict" is a known keyword */
252 #else
253 /* Disable restrict */
254 #define restrict
255 #endif
257 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
259 #ifdef _MSC_VER
260 /* Visual Studio */
261 /* Visual is not C99, but supports some kind of inline */
262 #define inline __forceinline
263 #if LZ4_ARCH64
264 /* For Visual 2005 */
265 #pragma intrinsic(_BitScanForward64)
266 #pragma intrinsic(_BitScanReverse64)
267 #else /* !LZ4_ARCH64 */
268 /* For Visual 2005 */
269 #pragma intrinsic(_BitScanForward)
270 #pragma intrinsic(_BitScanReverse)
271 #endif /* !LZ4_ARCH64 */
272 #endif /* _MSC_VER */
274 #ifdef _MSC_VER
275 #define lz4_bswap16(x) _byteswap_ushort(x)
276 #else /* !_MSC_VER */
277 #define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
278 (((x) & 0xffu) << 8)))
279 #endif /* !_MSC_VER */
281 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
282 #define expect(expr, value) (__builtin_expect((expr), (value)))
283 #else
284 #define expect(expr, value) (expr)
285 #endif
287 #define likely(expr) expect((expr) != 0, 1)
288 #define unlikely(expr) expect((expr) != 0, 0)
290 /* Basic types */
291 #if defined(_MSC_VER)
292 /* Visual Studio does not support 'stdint' natively */
293 #define BYTE unsigned __int8
294 #define U16 unsigned __int16
295 #define U32 unsigned __int32
296 #define S32 __int32
297 #define U64 unsigned __int64
298 #else /* !defined(_MSC_VER) */
299 #define BYTE uint8_t
300 #define U16 uint16_t
301 #define U32 uint32_t
302 #define S32 int32_t
303 #define U64 uint64_t
304 #endif /* !defined(_MSC_VER) */
306 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
307 #pragma pack(1)
308 #endif
310 typedef struct _U16_S {
311 U16 v;
312 } U16_S;
313 typedef struct _U32_S {
314 U32 v;
315 } U32_S;
316 typedef struct _U64_S {
317 U64 v;
318 } U64_S;
320 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
321 #pragma pack()
322 #endif
324 #define A64(x) (((U64_S *)(x))->v)
325 #define A32(x) (((U32_S *)(x))->v)
326 #define A16(x) (((U16_S *)(x))->v)
329 * Constants
331 #define MINMATCH 4
333 #define HASH_LOG COMPRESSIONLEVEL
334 #define HASHTABLESIZE (1 << HASH_LOG)
335 #define HASH_MASK (HASHTABLESIZE - 1)
337 #define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
338 NOTCOMPRESSIBLE_CONFIRMATION : 2)
341 * Defines if memory is allocated into the stack (local variable),
342 * or into the heap (kmem_alloc()).
344 #define HEAPMODE (HASH_LOG > STACKLIMIT)
345 #define COPYLENGTH 8
346 #define LASTLITERALS 5
347 #define MFLIMIT (COPYLENGTH + MINMATCH)
348 #define MINLENGTH (MFLIMIT + 1)
350 #define MAXD_LOG 16
351 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
353 #define ML_BITS 4
354 #define ML_MASK ((1U<<ML_BITS)-1)
355 #define RUN_BITS (8-ML_BITS)
356 #define RUN_MASK ((1U<<RUN_BITS)-1)
360 * Architecture-specific macros
362 #if LZ4_ARCH64
363 #define STEPSIZE 8
364 #define UARCH U64
365 #define AARCH A64
366 #define LZ4_COPYSTEP(s, d) A64(d) = A64(s); d += 8; s += 8;
367 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d)
368 #define LZ4_SECURECOPY(s, d, e) if (d < e) LZ4_WILDCOPY(s, d, e)
369 #define HTYPE U32
370 #define INITBASE(base) const BYTE* const base = ip
371 #else /* !LZ4_ARCH64 */
372 #define STEPSIZE 4
373 #define UARCH U32
374 #define AARCH A32
375 #define LZ4_COPYSTEP(s, d) A32(d) = A32(s); d += 4; s += 4;
376 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
377 #define LZ4_SECURECOPY LZ4_WILDCOPY
378 #define HTYPE const BYTE *
379 #define INITBASE(base) const int base = 0
380 #endif /* !LZ4_ARCH64 */
382 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
383 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) \
384 { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
385 #define LZ4_WRITE_LITTLEENDIAN_16(p, i) \
386 { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
387 #else
388 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
389 #define LZ4_WRITE_LITTLEENDIAN_16(p, v) { A16(p) = v; p += 2; }
390 #endif
393 /* Local structures */
394 struct refTables {
395 HTYPE hashTable[HASHTABLESIZE];
399 /* Macros */
400 #define LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
401 HASH_LOG))
402 #define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
403 #define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
404 #define LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
405 d = e; }
408 /* Private functions */
409 #if LZ4_ARCH64
411 static inline int
412 LZ4_NbCommonBytes(register U64 val)
414 #if defined(LZ4_BIG_ENDIAN)
415 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
416 unsigned long r = 0;
417 _BitScanReverse64(&r, val);
418 return (int)(r >> 3);
419 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
420 !defined(LZ4_FORCE_SW_BITCOUNT)
421 return (__builtin_clzll(val) >> 3);
422 #else
423 int r;
424 if (!(val >> 32)) {
425 r = 4;
426 } else {
427 r = 0;
428 val >>= 32;
430 if (!(val >> 16)) {
431 r += 2;
432 val >>= 8;
433 } else {
434 val >>= 24;
436 r += (!val);
437 return (r);
438 #endif
439 #else
440 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
441 unsigned long r = 0;
442 _BitScanForward64(&r, val);
443 return (int)(r >> 3);
444 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
445 !defined(LZ4_FORCE_SW_BITCOUNT)
446 return (__builtin_ctzll(val) >> 3);
447 #else
448 static const int DeBruijnBytePos[64] =
449 { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
450 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
451 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
452 4, 5, 7, 2, 6, 5, 7, 6, 7, 7
454 return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
455 58];
456 #endif
457 #endif
460 #else
462 static inline int
463 LZ4_NbCommonBytes(register U32 val)
465 #if defined(LZ4_BIG_ENDIAN)
466 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
467 unsigned long r = 0;
468 _BitScanReverse(&r, val);
469 return (int)(r >> 3);
470 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
471 !defined(LZ4_FORCE_SW_BITCOUNT)
472 return (__builtin_clz(val) >> 3);
473 #else
474 int r;
475 if (!(val >> 16)) {
476 r = 2;
477 val >>= 8;
478 } else {
479 r = 0;
480 val >>= 24;
482 r += (!val);
483 return (r);
484 #endif
485 #else
486 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
487 unsigned long r = 0;
488 _BitScanForward(&r, val);
489 return (int)(r >> 3);
490 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
491 !defined(LZ4_FORCE_SW_BITCOUNT)
492 return (__builtin_ctz(val) >> 3);
493 #else
494 static const int DeBruijnBytePos[32] = {
495 0, 0, 3, 0, 3, 1, 3, 0,
496 3, 2, 2, 1, 3, 2, 0, 1,
497 3, 3, 1, 2, 2, 2, 2, 0,
498 3, 1, 2, 0, 1, 0, 1, 1
500 return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
501 27];
502 #endif
503 #endif
506 #endif
508 /* Public functions */
510 static int
511 LZ4_compressBound(int isize)
513 return (isize + (isize / 255) + 16);
516 /* Compression functions */
518 /*ARGSUSED*/
519 static int
520 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
521 int osize)
523 #if HEAPMODE
524 struct refTables *srt = (struct refTables *)ctx;
525 HTYPE *HashTable = (HTYPE *) (srt->hashTable);
526 #else
527 HTYPE HashTable[HASHTABLESIZE] = { 0 };
528 #endif
530 const BYTE *ip = (BYTE *) source;
531 INITBASE(base);
532 const BYTE *anchor = ip;
533 const BYTE *const iend = ip + isize;
534 const BYTE *const oend = (BYTE *) dest + osize;
535 const BYTE *const mflimit = iend - MFLIMIT;
536 #define matchlimit (iend - LASTLITERALS)
538 BYTE *op = (BYTE *) dest;
540 int len, length;
541 const int skipStrength = SKIPSTRENGTH;
542 U32 forwardH;
545 /* Init */
546 if (isize < MINLENGTH)
547 goto _last_literals;
549 /* First Byte */
550 HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
551 ip++;
552 forwardH = LZ4_HASH_VALUE(ip);
554 /* Main Loop */
555 for (;;) {
556 int findMatchAttempts = (1U << skipStrength) + 3;
557 const BYTE *forwardIp = ip;
558 const BYTE *ref;
559 BYTE *token;
561 /* Find a match */
562 do {
563 U32 h = forwardH;
564 int step = findMatchAttempts++ >> skipStrength;
565 ip = forwardIp;
566 forwardIp = ip + step;
568 if unlikely(forwardIp > mflimit) {
569 goto _last_literals;
572 forwardH = LZ4_HASH_VALUE(forwardIp);
573 ref = base + HashTable[h];
574 HashTable[h] = ip - base;
576 } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
578 /* Catch up */
579 while ((ip > anchor) && (ref > (BYTE *) source) &&
580 unlikely(ip[-1] == ref[-1])) {
581 ip--;
582 ref--;
585 /* Encode Literal length */
586 length = ip - anchor;
587 token = op++;
589 /* Check output limit */
590 if unlikely(op + length + (2 + 1 + LASTLITERALS) +
591 (length >> 8) > oend)
592 return (0);
594 if (length >= (int)RUN_MASK) {
595 *token = (RUN_MASK << ML_BITS);
596 len = length - RUN_MASK;
597 for (; len > 254; len -= 255)
598 *op++ = 255;
599 *op++ = (BYTE)len;
600 } else
601 *token = (length << ML_BITS);
603 /* Copy Literals */
604 LZ4_BLINDCOPY(anchor, op, length);
606 _next_match:
607 /* Encode Offset */
608 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
610 /* Start Counting */
611 ip += MINMATCH;
612 ref += MINMATCH; /* MinMatch verified */
613 anchor = ip;
614 while likely(ip < matchlimit - (STEPSIZE - 1)) {
615 UARCH diff = AARCH(ref) ^ AARCH(ip);
616 if (!diff) {
617 ip += STEPSIZE;
618 ref += STEPSIZE;
619 continue;
621 ip += LZ4_NbCommonBytes(diff);
622 goto _endCount;
624 #if LZ4_ARCH64
625 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
626 ip += 4;
627 ref += 4;
629 #endif
630 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
631 ip += 2;
632 ref += 2;
634 if ((ip < matchlimit) && (*ref == *ip))
635 ip++;
636 _endCount:
638 /* Encode MatchLength */
639 len = (ip - anchor);
640 /* Check output limit */
641 if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
642 return (0);
643 if (len >= (int)ML_MASK) {
644 *token += ML_MASK;
645 len -= ML_MASK;
646 for (; len > 509; len -= 510) {
647 *op++ = 255;
648 *op++ = 255;
650 if (len > 254) {
651 len -= 255;
652 *op++ = 255;
654 *op++ = (BYTE)len;
655 } else
656 *token += len;
658 /* Test end of chunk */
659 if (ip > mflimit) {
660 anchor = ip;
661 break;
663 /* Fill table */
664 HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
666 /* Test next position */
667 ref = base + HashTable[LZ4_HASH_VALUE(ip)];
668 HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
669 if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
670 token = op++;
671 *token = 0;
672 goto _next_match;
674 /* Prepare next loop */
675 anchor = ip++;
676 forwardH = LZ4_HASH_VALUE(ip);
679 _last_literals:
680 /* Encode Last Literals */
682 int lastRun = iend - anchor;
683 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
684 oend)
685 return (0);
686 if (lastRun >= (int)RUN_MASK) {
687 *op++ = (RUN_MASK << ML_BITS);
688 lastRun -= RUN_MASK;
689 for (; lastRun > 254; lastRun -= 255) {
690 *op++ = 255;
692 *op++ = (BYTE)lastRun;
693 } else
694 *op++ = (lastRun << ML_BITS);
695 (void) memcpy(op, anchor, iend - anchor);
696 op += iend - anchor;
699 /* End */
700 return (int)(((char *)op) - dest);
705 /* Note : this function is valid only if isize < LZ4_64KLIMIT */
706 #define LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
707 #define HASHLOG64K (HASH_LOG + 1)
708 #define HASH64KTABLESIZE (1U << HASHLOG64K)
709 #define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8) - \
710 HASHLOG64K))
711 #define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p))
713 /*ARGSUSED*/
714 static int
715 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
716 int osize)
718 #if HEAPMODE
719 struct refTables *srt = (struct refTables *)ctx;
720 U16 *HashTable = (U16 *) (srt->hashTable);
721 #else
722 U16 HashTable[HASH64KTABLESIZE] = { 0 };
723 #endif
725 const BYTE *ip = (BYTE *) source;
726 const BYTE *anchor = ip;
727 const BYTE *const base = ip;
728 const BYTE *const iend = ip + isize;
729 const BYTE *const oend = (BYTE *) dest + osize;
730 const BYTE *const mflimit = iend - MFLIMIT;
731 #define matchlimit (iend - LASTLITERALS)
733 BYTE *op = (BYTE *) dest;
735 int len, length;
736 const int skipStrength = SKIPSTRENGTH;
737 U32 forwardH;
739 /* Init */
740 if (isize < MINLENGTH)
741 goto _last_literals;
743 /* First Byte */
744 ip++;
745 forwardH = LZ4_HASH64K_VALUE(ip);
747 /* Main Loop */
748 for (;;) {
749 int findMatchAttempts = (1U << skipStrength) + 3;
750 const BYTE *forwardIp = ip;
751 const BYTE *ref;
752 BYTE *token;
754 /* Find a match */
755 do {
756 U32 h = forwardH;
757 int step = findMatchAttempts++ >> skipStrength;
758 ip = forwardIp;
759 forwardIp = ip + step;
761 if (forwardIp > mflimit) {
762 goto _last_literals;
765 forwardH = LZ4_HASH64K_VALUE(forwardIp);
766 ref = base + HashTable[h];
767 HashTable[h] = ip - base;
769 } while (A32(ref) != A32(ip));
771 /* Catch up */
772 while ((ip > anchor) && (ref > (BYTE *) source) &&
773 (ip[-1] == ref[-1])) {
774 ip--;
775 ref--;
778 /* Encode Literal length */
779 length = ip - anchor;
780 token = op++;
782 /* Check output limit */
783 if unlikely(op + length + (2 + 1 + LASTLITERALS) +
784 (length >> 8) > oend)
785 return (0);
787 if (length >= (int)RUN_MASK) {
788 *token = (RUN_MASK << ML_BITS);
789 len = length - RUN_MASK;
790 for (; len > 254; len -= 255)
791 *op++ = 255;
792 *op++ = (BYTE)len;
793 } else
794 *token = (length << ML_BITS);
796 /* Copy Literals */
797 LZ4_BLINDCOPY(anchor, op, length);
799 _next_match:
800 /* Encode Offset */
801 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
803 /* Start Counting */
804 ip += MINMATCH;
805 ref += MINMATCH; /* MinMatch verified */
806 anchor = ip;
807 while (ip < matchlimit - (STEPSIZE - 1)) {
808 UARCH diff = AARCH(ref) ^ AARCH(ip);
809 if (!diff) {
810 ip += STEPSIZE;
811 ref += STEPSIZE;
812 continue;
814 ip += LZ4_NbCommonBytes(diff);
815 goto _endCount;
817 #if LZ4_ARCH64
818 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
819 ip += 4;
820 ref += 4;
822 #endif
823 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
824 ip += 2;
825 ref += 2;
827 if ((ip < matchlimit) && (*ref == *ip))
828 ip++;
829 _endCount:
831 /* Encode MatchLength */
832 len = (ip - anchor);
833 /* Check output limit */
834 if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
835 return (0);
836 if (len >= (int)ML_MASK) {
837 *token += ML_MASK;
838 len -= ML_MASK;
839 for (; len > 509; len -= 510) {
840 *op++ = 255;
841 *op++ = 255;
843 if (len > 254) {
844 len -= 255;
845 *op++ = 255;
847 *op++ = (BYTE)len;
848 } else
849 *token += len;
851 /* Test end of chunk */
852 if (ip > mflimit) {
853 anchor = ip;
854 break;
856 /* Fill table */
857 HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
859 /* Test next position */
860 ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
861 HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
862 if (A32(ref) == A32(ip)) {
863 token = op++;
864 *token = 0;
865 goto _next_match;
867 /* Prepare next loop */
868 anchor = ip++;
869 forwardH = LZ4_HASH64K_VALUE(ip);
872 _last_literals:
873 /* Encode Last Literals */
875 int lastRun = iend - anchor;
876 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
877 oend)
878 return (0);
879 if (lastRun >= (int)RUN_MASK) {
880 *op++ = (RUN_MASK << ML_BITS);
881 lastRun -= RUN_MASK;
882 for (; lastRun > 254; lastRun -= 255)
883 *op++ = 255;
884 *op++ = (BYTE)lastRun;
885 } else
886 *op++ = (lastRun << ML_BITS);
887 (void) memcpy(op, anchor, iend - anchor);
888 op += iend - anchor;
891 /* End */
892 return (int)(((char *)op) - dest);
895 static int
896 real_LZ4_compress(const char *source, char *dest, int isize, int osize)
898 #if HEAPMODE
899 void *ctx = kmem_zalloc(sizeof (struct refTables), KM_NOSLEEP);
900 int result;
903 * out of kernel memory, gently fall through - this will disable
904 * compression in zio_compress_data
906 if (ctx == NULL)
907 return (0);
909 if (isize < LZ4_64KLIMIT)
910 result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
911 else
912 result = LZ4_compressCtx(ctx, source, dest, isize, osize);
914 kmem_free(ctx, sizeof (struct refTables));
915 return (result);
916 #else
917 if (isize < (int)LZ4_64KLIMIT)
918 return (LZ4_compress64kCtx(NULL, source, dest, isize, osize));
919 return (LZ4_compressCtx(NULL, source, dest, isize, osize));
920 #endif
923 /* Decompression functions */
926 * Note: The decoding functions real_LZ4_uncompress() and
927 * LZ4_uncompress_unknownOutputSize() are safe against "buffer overflow"
928 * attack type. They will never write nor read outside of the provided
929 * output buffers. LZ4_uncompress_unknownOutputSize() also insures that
930 * it will never read outside of the input buffer. A corrupted input
931 * will produce an error result, a negative int, indicating the position
932 * of the error within input stream.
935 static int
936 real_LZ4_uncompress(const char *source, char *dest, int osize)
938 /* Local Variables */
939 const BYTE *restrict ip = (const BYTE *) source;
940 const BYTE *ref;
942 BYTE *op = (BYTE *) dest;
943 BYTE *const oend = op + osize;
944 BYTE *cpy;
946 unsigned token;
948 size_t length;
949 size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
950 #if LZ4_ARCH64
951 size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
952 #endif
954 /* Main Loop */
955 for (;;) {
956 /* get runlength */
957 token = *ip++;
958 if ((length = (token >> ML_BITS)) == RUN_MASK) {
959 size_t len;
960 for (; (len = *ip++) == 255; length += 255) {
962 length += len;
964 /* copy literals */
965 cpy = op + length;
966 /* CORNER-CASE: cpy might overflow. */
967 if (cpy < op)
968 goto _output_error; /* cpy was overflowed, bail! */
969 if unlikely(cpy > oend - COPYLENGTH) {
970 if (cpy != oend)
971 /* Error: we must necessarily stand at EOF */
972 goto _output_error;
973 (void) memcpy(op, ip, length);
974 ip += length;
975 break; /* EOF */
977 LZ4_WILDCOPY(ip, op, cpy);
978 ip -= (op - cpy);
979 op = cpy;
981 /* get offset */
982 LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
983 ip += 2;
984 if unlikely(ref < (BYTE * const) dest)
986 * Error: offset create reference outside destination
987 * buffer
989 goto _output_error;
991 /* get matchlength */
992 if ((length = (token & ML_MASK)) == ML_MASK) {
993 for (; *ip == 255; length += 255) {
994 ip++;
996 length += *ip++;
998 /* copy repeated sequence */
999 if unlikely(op - ref < STEPSIZE) {
1000 #if LZ4_ARCH64
1001 size_t dec64 = dec64table[op-ref];
1002 #else
1003 const int dec64 = 0;
1004 #endif
1005 op[0] = ref[0];
1006 op[1] = ref[1];
1007 op[2] = ref[2];
1008 op[3] = ref[3];
1009 op += 4;
1010 ref += 4;
1011 ref -= dec32table[op-ref];
1012 A32(op) = A32(ref);
1013 op += STEPSIZE - 4;
1014 ref -= dec64;
1015 } else {
1016 LZ4_COPYSTEP(ref, op);
1018 cpy = op + length - (STEPSIZE - 4);
1019 if (cpy > oend - COPYLENGTH) {
1020 if (cpy > oend)
1022 * Error: request to write beyond destination
1023 * buffer
1025 goto _output_error;
1026 LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
1027 while (op < cpy)
1028 *op++ = *ref++;
1029 op = cpy;
1030 if (op == oend)
1032 * Check EOF (should never happen, since last
1033 * 5 bytes are supposed to be literals)
1035 goto _output_error;
1036 continue;
1038 LZ4_SECURECOPY(ref, op, cpy);
1039 op = cpy; /* correction */
1042 /* end of decoding */
1043 return (int)(((char *)ip) - source);
1045 /* write overflow error detected */
1046 _output_error:
1047 return (int)(-(((char *)ip) - source));
1050 static int
1051 LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
1052 int maxOutputSize)
1054 /* Local Variables */
1055 const BYTE *restrict ip = (const BYTE *) source;
1056 const BYTE *const iend = ip + isize;
1057 const BYTE *ref;
1059 BYTE *op = (BYTE *) dest;
1060 BYTE *const oend = op + maxOutputSize;
1061 BYTE *cpy;
1063 size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
1064 #if LZ4_ARCH64
1065 size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
1066 #endif
1068 /* Main Loop */
1069 while (ip < iend) {
1070 unsigned token;
1071 size_t length;
1073 /* get runlength */
1074 token = *ip++;
1075 if ((length = (token >> ML_BITS)) == RUN_MASK) {
1076 int s = 255;
1077 while ((ip < iend) && (s == 255)) {
1078 s = *ip++;
1079 length += s;
1082 /* copy literals */
1083 cpy = op + length;
1084 /* CORNER-CASE: cpy might overflow. */
1085 if (cpy < op)
1086 goto _output_error; /* cpy was overflowed, bail! */
1087 if ((cpy > oend - COPYLENGTH) ||
1088 (ip + length > iend - COPYLENGTH)) {
1089 if (cpy > oend)
1090 /* Error: writes beyond output buffer */
1091 goto _output_error;
1092 if (ip + length != iend)
1094 * Error: LZ4 format requires to consume all
1095 * input at this stage
1097 goto _output_error;
1098 (void) memcpy(op, ip, length);
1099 op += length;
1100 /* Necessarily EOF, due to parsing restrictions */
1101 break;
1103 LZ4_WILDCOPY(ip, op, cpy);
1104 ip -= (op - cpy);
1105 op = cpy;
1107 /* get offset */
1108 LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
1109 ip += 2;
1110 if (ref < (BYTE * const) dest)
1112 * Error: offset creates reference outside of
1113 * destination buffer
1115 goto _output_error;
1117 /* get matchlength */
1118 if ((length = (token & ML_MASK)) == ML_MASK) {
1119 while (ip < iend) {
1120 int s = *ip++;
1121 length += s;
1122 if (s == 255)
1123 continue;
1124 break;
1127 /* copy repeated sequence */
1128 if unlikely(op - ref < STEPSIZE) {
1129 #if LZ4_ARCH64
1130 size_t dec64 = dec64table[op-ref];
1131 #else
1132 const int dec64 = 0;
1133 #endif
1134 op[0] = ref[0];
1135 op[1] = ref[1];
1136 op[2] = ref[2];
1137 op[3] = ref[3];
1138 op += 4;
1139 ref += 4;
1140 ref -= dec32table[op-ref];
1141 A32(op) = A32(ref);
1142 op += STEPSIZE - 4;
1143 ref -= dec64;
1144 } else {
1145 LZ4_COPYSTEP(ref, op);
1147 cpy = op + length - (STEPSIZE - 4);
1148 if (cpy > oend - COPYLENGTH) {
1149 if (cpy > oend)
1151 * Error: request to write outside of
1152 * destination buffer
1154 goto _output_error;
1155 LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
1156 while (op < cpy)
1157 *op++ = *ref++;
1158 op = cpy;
1159 if (op == oend)
1161 * Check EOF (should never happen, since
1162 * last 5 bytes are supposed to be literals)
1164 goto _output_error;
1165 continue;
1167 LZ4_SECURECOPY(ref, op, cpy);
1168 op = cpy; /* correction */
1171 /* end of decoding */
1172 return (int)(((char *)op) - dest);
1174 /* write overflow error detected */
1175 _output_error:
1176 return (int)(-(((char *)ip) - source));