mfbt/lz4.c

   1 /*
   2    LZ4 - Fast LZ compression algorithm
   3    Copyright (C) 2011-2017, Yann Collet.
   4
   5    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
   6
   7    Redistribution and use in source and binary forms, with or without
   8    modification, are permitted provided that the following conditions are
   9    met:
  10
  11        * Redistributions of source code must retain the above copyright
  12    notice, this list of conditions and the following disclaimer.
  13        * Redistributions in binary form must reproduce the above
  14    copyright notice, this list of conditions and the following disclaimer
  15    in the documentation and/or other materials provided with the
  16    distribution.
  17
  18    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  22    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  23    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  24    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30    You can contact the author at :
  31     - LZ4 homepage : http://www.lz4.org
  32     - LZ4 source repository : https://github.com/lz4/lz4
  33 */
  34
  35
  36 /*-************************************
  37 *  Tuning parameters
  38 **************************************/
  39 /*
  40  * LZ4_HEAPMODE :
  41  * Select how default compression functions will allocate memory for their hash table,
  42  * in memory stack (0:default, fastest), or in memory heap (1:requires malloc()).
  43  */
  44 #ifndef LZ4_HEAPMODE
  45 #  define LZ4_HEAPMODE 0
  46 #endif
  47
  48 /*
  49  * ACCELERATION_DEFAULT :
  50  * Select "acceleration" for LZ4_compress_fast() when parameter value <= 0
  51  */
  52 #define ACCELERATION_DEFAULT 1
  53
  54
  55 /*-************************************
  56 *  CPU Feature Detection
  57 **************************************/
  58 /* LZ4_FORCE_MEMORY_ACCESS
  59  * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
  60  * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
  61  * The below switch allow to select different access method for improved performance.
  62  * Method 0 (default) : use `memcpy()`. Safe and portable.
  63  * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
  64  *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
  65  * Method 2 : direct access. This method is portable but violate C standard.
  66  *            It can generate buggy code on targets which assembly generation depends on alignment.
  67  *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
  68  * See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
  69  * Prefer these methods in priority order (0 > 1 > 2)
  70  */
  71 #ifndef LZ4_FORCE_MEMORY_ACCESS   /* can be defined externally */
  72 #  if defined(__GNUC__) && \
  73   ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) \
  74   || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
  75 #    define LZ4_FORCE_MEMORY_ACCESS 2
  76 #  elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__)
  77 #    define LZ4_FORCE_MEMORY_ACCESS 1
  78 #  endif
  79 #endif
  80
  81 /*
  82  * LZ4_FORCE_SW_BITCOUNT
  83  * Define this parameter if your target system or compiler does not support hardware bit count
  84  */
  85 #if defined(_MSC_VER) && defined(_WIN32_WCE)   /* Visual Studio for WinCE doesn't support Hardware bit count */
  86 #  define LZ4_FORCE_SW_BITCOUNT
  87 #endif
  88
  89
  90
  91 /*-************************************
  92 *  Dependency
  93 **************************************/
  94 #define LZ4_STATIC_LINKING_ONLY
  95 #define LZ4_DISABLE_DEPRECATE_WARNINGS /* due to LZ4_decompress_safe_withPrefix64k */
  96 #include "lz4.h"
  97 /* see also "memory routines" below */
  98
  99
 100 /*-************************************
 101 *  Compiler Options
 102 **************************************/
 103 #ifdef _MSC_VER    /* Visual Studio */
 104 #  include <intrin.h>
 105 #  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
 106 #  pragma warning(disable : 4293)        /* disable: C4293: too large shift (32-bits) */
 107 #endif  /* _MSC_VER */
 108
 109 #ifndef LZ4_FORCE_INLINE
 110 #  ifdef _MSC_VER    /* Visual Studio */
 111 #    define LZ4_FORCE_INLINE static __forceinline
 112 #  else
 113 #    if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
 114 #      ifdef __GNUC__
 115 #        define LZ4_FORCE_INLINE static inline __attribute__((always_inline))
 116 #      else
 117 #        define LZ4_FORCE_INLINE static inline
 118 #      endif
 119 #    else
 120 #      define LZ4_FORCE_INLINE static
 121 #    endif /* __STDC_VERSION__ */
 122 #  endif  /* _MSC_VER */
 123 #endif /* LZ4_FORCE_INLINE */
 124
 125 /* LZ4_FORCE_O2_GCC_PPC64LE and LZ4_FORCE_O2_INLINE_GCC_PPC64LE
 126  * Gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy,
 127  * together with a simple 8-byte copy loop as a fall-back path.
 128  * However, this optimization hurts the decompression speed by >30%,
 129  * because the execution does not go to the optimized loop
 130  * for typical compressible data, and all of the preamble checks
 131  * before going to the fall-back path become useless overhead.
 132  * This optimization happens only with the -O3 flag, and -O2 generates
 133  * a simple 8-byte copy loop.
 134  * With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy
 135  * functions are annotated with __attribute__((optimize("O2"))),
 136  * and also LZ4_wildCopy is forcibly inlined, so that the O2 attribute
 137  * of LZ4_wildCopy does not affect the compression speed.
 138  */
 139 #if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__)
 140 #  define LZ4_FORCE_O2_GCC_PPC64LE __attribute__((optimize("O2")))
 141 #  define LZ4_FORCE_O2_INLINE_GCC_PPC64LE __attribute__((optimize("O2"))) LZ4_FORCE_INLINE
 142 #else
 143 #  define LZ4_FORCE_O2_GCC_PPC64LE
 144 #  define LZ4_FORCE_O2_INLINE_GCC_PPC64LE static
 145 #endif
 146
 147 #if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__)
 148 #  define expect(expr,value)    (__builtin_expect ((expr),(value)) )
 149 #else
 150 #  define expect(expr,value)    (expr)
 151 #endif
 152
 153 #ifndef likely
 154 #define likely(expr)     expect((expr) != 0, 1)
 155 #endif
 156 #ifndef unlikely
 157 #define unlikely(expr)   expect((expr) != 0, 0)
 158 #endif
 159
 160
 161 /*-************************************
 162 *  Memory routines
 163 **************************************/
 164 #include <stdlib.h>   /* malloc, calloc, free */
 165 #define ALLOC(s)          malloc(s)
 166 #define ALLOC_AND_ZERO(s) calloc(1,s)
 167 #define FREEMEM(p)        free(p)
 168 #include <string.h>   /* memset, memcpy */
 169 #define MEM_INIT(p,v,s)   memset((p),(v),(s))
 170
 171
 172 /*-************************************
 173 *  Basic Types
 174 **************************************/
 175 #if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
 176 # include <stdint.h>
 177   typedef  uint8_t BYTE;
 178   typedef uint16_t U16;
 179   typedef uint32_t U32;
 180   typedef  int32_t S32;
 181   typedef uint64_t U64;
 182   typedef uintptr_t uptrval;
 183 #else
 184   typedef unsigned char       BYTE;
 185   typedef unsigned short      U16;
 186   typedef unsigned int        U32;
 187   typedef   signed int        S32;
 188   typedef unsigned long long  U64;
 189   typedef size_t              uptrval;   /* generally true, except OpenVMS-64 */
 190 #endif
 191
 192 #if defined(__x86_64__)
 193   typedef U64    reg_t;   /* 64-bits in x32 mode */
 194 #else
 195   typedef size_t reg_t;   /* 32-bits in x32 mode */
 196 #endif
 197
 198 /*-************************************
 199 *  Reading and writing into memory
 200 **************************************/
 201 static unsigned LZ4_isLittleEndian(void)
 202 {
 203     const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental */
 204     return one.c[0];
 205 }
 206
 207
 208 #if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2)
 209 /* lie to the compiler about data alignment; use with caution */
 210
 211 static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; }
 212 static U32 LZ4_read32(const void* memPtr) { return *(const U32*) memPtr; }
 213 static reg_t LZ4_read_ARCH(const void* memPtr) { return *(const reg_t*) memPtr; }
 214
 215 static void LZ4_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
 216 static void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
 217
 218 #elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1)
 219
 220 /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
 221 /* currently only defined for gcc and icc */
 222 typedef union { U16 u16; U32 u32; reg_t uArch; } __attribute__((packed)) unalign;
 223
 224 static U16 LZ4_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
 225 static U32 LZ4_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
 226 static reg_t LZ4_read_ARCH(const void* ptr) { return ((const unalign*)ptr)->uArch; }
 227
 228 static void LZ4_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
 229 static void LZ4_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
 230
 231 #else  /* safe and portable access through memcpy() */
 232
 233 static U16 LZ4_read16(const void* memPtr)
 234 {
 235     U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
 236 }
 237
 238 static U32 LZ4_read32(const void* memPtr)
 239 {
 240     U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
 241 }
 242
 243 static reg_t LZ4_read_ARCH(const void* memPtr)
 244 {
 245     reg_t val; memcpy(&val, memPtr, sizeof(val)); return val;
 246 }
 247
 248 static void LZ4_write16(void* memPtr, U16 value)
 249 {
 250     memcpy(memPtr, &value, sizeof(value));
 251 }
 252
 253 static void LZ4_write32(void* memPtr, U32 value)
 254 {
 255     memcpy(memPtr, &value, sizeof(value));
 256 }
 257
 258 #endif /* LZ4_FORCE_MEMORY_ACCESS */
 259
 260
 261 static U16 LZ4_readLE16(const void* memPtr)
 262 {
 263     if (LZ4_isLittleEndian()) {
 264         return LZ4_read16(memPtr);
 265     } else {
 266         const BYTE* p = (const BYTE*)memPtr;
 267         return (U16)((U16)p[0] + (p[1]<<8));
 268     }
 269 }
 270
 271 static void LZ4_writeLE16(void* memPtr, U16 value)
 272 {
 273     if (LZ4_isLittleEndian()) {
 274         LZ4_write16(memPtr, value);
 275     } else {
 276         BYTE* p = (BYTE*)memPtr;
 277         p[0] = (BYTE) value;
 278         p[1] = (BYTE)(value>>8);
 279     }
 280 }
 281
 282 /* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */
 283 LZ4_FORCE_O2_INLINE_GCC_PPC64LE
 284 void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd)
 285 {
 286     BYTE* d = (BYTE*)dstPtr;
 287     const BYTE* s = (const BYTE*)srcPtr;
 288     BYTE* const e = (BYTE*)dstEnd;
 289
 290     do { memcpy(d,s,8); d+=8; s+=8; } while (d<e);
 291 }
 292
 293
 294 /*-************************************
 295 *  Common Constants
 296 **************************************/
 297 #define MINMATCH 4
 298
 299 #define WILDCOPYLENGTH 8
 300 #define LASTLITERALS 5
 301 #define MFLIMIT (WILDCOPYLENGTH+MINMATCH)
 302 static const int LZ4_minLength = (MFLIMIT+1);
 303
 304 #define KB *(1 <<10)
 305 #define MB *(1 <<20)
 306 #define GB *(1U<<30)
 307
 308 #define MAXD_LOG 16
 309 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
 310
 311 #define ML_BITS  4
 312 #define ML_MASK  ((1U<<ML_BITS)-1)
 313 #define RUN_BITS (8-ML_BITS)
 314 #define RUN_MASK ((1U<<RUN_BITS)-1)
 315
 316
 317 /*-************************************
 318 *  Error detection
 319 **************************************/
 320 #if defined(LZ4_DEBUG) && (LZ4_DEBUG>=1)
 321 #  include <assert.h>
 322 #else
 323 #  ifndef assert
 324 #    define assert(condition) ((void)0)
 325 #  endif
 326 #endif
 327
 328 #define LZ4_STATIC_ASSERT(c)   { enum { LZ4_static_assert = 1/(int)(!!(c)) }; }   /* use after variable declarations */
 329
 330 #if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2)
 331 #  include <stdio.h>
 332 static int g_debuglog_enable = 1;
 333 #  define DEBUGLOG(l, ...) {                                  \
 334                 if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) {  \
 335                     fprintf(stderr, __FILE__ ": ");           \
 336                     fprintf(stderr, __VA_ARGS__);             \
 337                     fprintf(stderr, " \n");                   \
 338             }   }
 339 #else
 340 #  define DEBUGLOG(l, ...)      {}    /* disabled */
 341 #endif
 342
 343
 344 /*-************************************
 345 *  Common functions
 346 **************************************/
 347 static unsigned LZ4_NbCommonBytes (reg_t val)
 348 {
 349     if (LZ4_isLittleEndian()) {
 350         if (sizeof(val)==8) {
 351 #       if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
 352             unsigned long r = 0;
 353             _BitScanForward64( &r, (U64)val );
 354             return (int)(r>>3);
 355 #       elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
 356             return (__builtin_ctzll((U64)val) >> 3);
 357 #       else
 358             static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
 359                                                      0, 3, 1, 3, 1, 4, 2, 7,
 360                                                      0, 2, 3, 6, 1, 5, 3, 5,
 361                                                      1, 3, 4, 4, 2, 5, 6, 7,
 362                                                      7, 0, 1, 2, 3, 3, 4, 6,
 363                                                      2, 6, 5, 5, 3, 4, 5, 6,
 364                                                      7, 1, 2, 4, 6, 4, 4, 5,
 365                                                      7, 2, 6, 5, 7, 6, 7, 7 };
 366             return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
 367 #       endif
 368         } else /* 32 bits */ {
 369 #       if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
 370             unsigned long r;
 371             _BitScanForward( &r, (U32)val );
 372             return (int)(r>>3);
 373 #       elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
 374             return (__builtin_ctz((U32)val) >> 3);
 375 #       else
 376             static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
 377                                                      3, 2, 2, 1, 3, 2, 0, 1,
 378                                                      3, 3, 1, 2, 2, 2, 2, 0,
 379                                                      3, 1, 2, 0, 1, 0, 1, 1 };
 380             return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
 381 #       endif
 382         }
 383     } else   /* Big Endian CPU */ {
 384         if (sizeof(val)==8) {   /* 64-bits */
 385 #       if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
 386             unsigned long r = 0;
 387             _BitScanReverse64( &r, val );
 388             return (unsigned)(r>>3);
 389 #       elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
 390             return (__builtin_clzll((U64)val) >> 3);
 391 #       else
 392             static const U32 by32 = sizeof(val)*4;  /* 32 on 64 bits (goal), 16 on 32 bits.
 393                 Just to avoid some static analyzer complaining about shift by 32 on 32-bits target.
 394                 Note that this code path is never triggered in 32-bits mode. */
 395             unsigned r;
 396             if (!(val>>by32)) { r=4; } else { r=0; val>>=by32; }
 397             if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
 398             r += (!val);
 399             return r;
 400 #       endif
 401         } else /* 32 bits */ {
 402 #       if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
 403             unsigned long r = 0;
 404             _BitScanReverse( &r, (unsigned long)val );
 405             return (unsigned)(r>>3);
 406 #       elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
 407             return (__builtin_clz((U32)val) >> 3);
 408 #       else
 409             unsigned r;
 410             if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
 411             r += (!val);
 412             return r;
 413 #       endif
 414         }
 415     }
 416 }
 417
 418 #define STEPSIZE sizeof(reg_t)
 419 LZ4_FORCE_INLINE
 420 unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
 421 {
 422     const BYTE* const pStart = pIn;
 423
 424     if (likely(pIn < pInLimit-(STEPSIZE-1))) {
 425         reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
 426         if (!diff) {
 427             pIn+=STEPSIZE; pMatch+=STEPSIZE;
 428         } else {
 429             return LZ4_NbCommonBytes(diff);
 430     }   }
 431
 432     while (likely(pIn < pInLimit-(STEPSIZE-1))) {
 433         reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
 434         if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; continue; }
 435         pIn += LZ4_NbCommonBytes(diff);
 436         return (unsigned)(pIn - pStart);
 437     }
 438
 439     if ((STEPSIZE==8) && (pIn<(pInLimit-3)) && (LZ4_read32(pMatch) == LZ4_read32(pIn))) { pIn+=4; pMatch+=4; }
 440     if ((pIn<(pInLimit-1)) && (LZ4_read16(pMatch) == LZ4_read16(pIn))) { pIn+=2; pMatch+=2; }
 441     if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
 442     return (unsigned)(pIn - pStart);
 443 }
 444
 445
 446 #ifndef LZ4_COMMONDEFS_ONLY
 447 /*-************************************
 448 *  Local Constants
 449 **************************************/
 450 static const int LZ4_64Klimit = ((64 KB) + (MFLIMIT-1));
 451 static const U32 LZ4_skipTrigger = 6;  /* Increase this value ==> compression run slower on incompressible data */
 452
 453
 454 /*-************************************
 455 *  Local Structures and types
 456 **************************************/
 457 typedef enum { notLimited = 0, limitedOutput = 1, fillOutput = 2 } limitedOutput_directive;
 458 typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t;
 459
 460 /**
 461  * This enum distinguishes several different modes of accessing previous
 462  * content in the stream.
 463  *
 464  * - noDict        : There is no preceding content.
 465  * - withPrefix64k : Table entries up to ctx->dictSize before the current blob
 466  *                   blob being compressed are valid and refer to the preceding
 467  *                   content (of length ctx->dictSize), which is available
 468  *                   contiguously preceding in memory the content currently
 469  *                   being compressed.
 470  * - usingExtDict  : Like withPrefix64k, but the preceding content is somewhere
 471  *                   else in memory, starting at ctx->dictionary with length
 472  *                   ctx->dictSize.
 473  * - usingDictCtx  : Like usingExtDict, but everything concerning the preceding
 474  *                   content is in a separate context, pointed to by
 475  *                   ctx->dictCtx. ctx->dictionary, ctx->dictSize, and table
 476  *                   entries in the current context that refer to positions
 477  *                   preceding the beginning of the current compression are
 478  *                   ignored. Instead, ctx->dictCtx->dictionary and ctx->dictCtx
 479  *                   ->dictSize describe the location and size of the preceding
 480  *                   content, and matches are found by looking in the ctx
 481  *                   ->dictCtx->hashTable.
 482  */
 483 typedef enum { noDict = 0, withPrefix64k, usingExtDict, usingDictCtx } dict_directive;
 484 typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
 485
 486 typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
 487 typedef enum { full = 0, partial = 1 } earlyEnd_directive;
 488
 489
 490 /*-************************************
 491 *  Local Utils
 492 **************************************/
 493 int LZ4_versionNumber (void) { return LZ4_VERSION_NUMBER; }
 494 const char* LZ4_versionString(void) { return LZ4_VERSION_STRING; }
 495 int LZ4_compressBound(int isize)  { return LZ4_COMPRESSBOUND(isize); }
 496 int LZ4_sizeofState() { return LZ4_STREAMSIZE; }
 497
 498
 499 /*-******************************
 500 *  Compression functions
 501 ********************************/
 502 static U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
 503 {
 504     if (tableType == byU16)
 505         return ((sequence * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1)));
 506     else
 507         return ((sequence * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG));
 508 }
 509
 510 static U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
 511 {
 512     static const U64 prime5bytes = 889523592379ULL;
 513     static const U64 prime8bytes = 11400714785074694791ULL;
 514     const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG;
 515     if (LZ4_isLittleEndian())
 516         return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog));
 517     else
 518         return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog));
 519 }
 520
 521 LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tableType)
 522 {
 523     if ((sizeof(reg_t)==8) && (tableType != byU16)) return LZ4_hash5(LZ4_read_ARCH(p), tableType);
 524     return LZ4_hash4(LZ4_read32(p), tableType);
 525 }
 526
 527 static void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t const tableType)
 528 {
 529     switch (tableType)
 530     {
 531     default: /* fallthrough */
 532     case clearedTable: /* fallthrough */
 533     case byPtr: { /* illegal! */ assert(0); return; }
 534     case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = idx; return; }
 535     case byU16: { U16* hashTable = (U16*) tableBase; assert(idx < 65536); hashTable[h] = (U16)idx; return; }
 536     }
 537 }
 538
 539 static void LZ4_putPositionOnHash(const BYTE* p, U32 h,
 540                                   void* tableBase, tableType_t const tableType,
 541                             const BYTE* srcBase)
 542 {
 543     switch (tableType)
 544     {
 545     case clearedTable: { /* illegal! */ assert(0); return; }
 546     case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = p; return; }
 547     case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); return; }
 548     case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); return; }
 549     }
 550 }
 551
 552 LZ4_FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
 553 {
 554     U32 const h = LZ4_hashPosition(p, tableType);
 555     LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
 556 }
 557
 558 /* LZ4_getIndexOnHash() :
 559  * Index of match position registered in hash table.
 560  * hash position must be calculated by using base+index, or dictBase+index.
 561  * Assumption 1 : only valid if tableType == byU32 or byU16.
 562  * Assumption 2 : h is presumed valid (within limits of hash table)
 563  */
 564 static U32 LZ4_getIndexOnHash(U32 h, const void* tableBase, tableType_t tableType)
 565 {
 566     LZ4_STATIC_ASSERT(LZ4_MEMORY_USAGE > 2);
 567     if (tableType == byU32) {
 568         const U32* const hashTable = (const U32*) tableBase;
 569         assert(h < (1U << (LZ4_MEMORY_USAGE-2)));
 570         return hashTable[h];
 571     }
 572     if (tableType == byU16) {
 573         const U16* const hashTable = (const U16*) tableBase;
 574         assert(h < (1U << (LZ4_MEMORY_USAGE-1)));
 575         return hashTable[h];
 576     }
 577     assert(0); return 0;  /* forbidden case */
 578 }
 579
 580 static const BYTE* LZ4_getPositionOnHash(U32 h, const void* tableBase, tableType_t tableType, const BYTE* srcBase)
 581 {
 582     if (tableType == byPtr) { const BYTE* const* hashTable = (const BYTE* const*) tableBase; return hashTable[h]; }
 583     if (tableType == byU32) { const U32* const hashTable = (const U32*) tableBase; return hashTable[h] + srcBase; }
 584     { const U16* const hashTable = (const U16*) tableBase; return hashTable[h] + srcBase; }   /* default, to ensure a return */
 585 }
 586
 587 LZ4_FORCE_INLINE const BYTE* LZ4_getPosition(const BYTE* p,
 588                                              const void* tableBase, tableType_t tableType,
 589                                              const BYTE* srcBase)
 590 {
 591     U32 const h = LZ4_hashPosition(p, tableType);
 592     return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
 593 }
 594
 595 LZ4_FORCE_INLINE void LZ4_prepareTable(
 596         LZ4_stream_t_internal* const cctx,
 597         const int inputSize,
 598         const tableType_t tableType) {
 599     /* If the table hasn't been used, it's guaranteed to be zeroed out, and is
 600      * therefore safe to use no matter what mode we're in. Otherwise, we figure
 601      * out if it's safe to leave as is or whether it needs to be reset.
 602      */
 603     if (cctx->tableType != clearedTable) {
 604         if (cctx->tableType != tableType
 605           || (tableType == byU16 && cctx->currentOffset + inputSize >= 0xFFFFU)
 606           || (tableType == byU32 && cctx->currentOffset > 1 GB)
 607           || tableType == byPtr
 608           || inputSize >= 4 KB)
 609         {
 610             DEBUGLOG(4, "LZ4_prepareTable: Resetting table in %p", cctx);
 611             MEM_INIT(cctx->hashTable, 0, LZ4_HASHTABLESIZE);
 612             cctx->currentOffset = 0;
 613             cctx->tableType = clearedTable;
 614         } else {
 615             DEBUGLOG(4, "LZ4_prepareTable: Re-use hash table (no reset)");
 616         }
 617     }
 618
 619     /* Adding a gap, so all previous entries are > MAX_DISTANCE back, is faster
 620      * than compressing without a gap. However, compressing with
 621      * currentOffset == 0 is faster still, so we preserve that case.
 622      */
 623     if (cctx->currentOffset != 0 && tableType == byU32) {
 624         DEBUGLOG(5, "LZ4_prepareTable: adding 64KB to currentOffset");
 625         cctx->currentOffset += 64 KB;
 626     }
 627
 628     /* Finally, clear history */
 629     cctx->dictCtx = NULL;
 630     cctx->dictionary = NULL;
 631     cctx->dictSize = 0;
 632 }
 633
 634 /** LZ4_compress_generic() :
 635     inlined, to ensure branches are decided at compilation time */
 636 LZ4_FORCE_INLINE int LZ4_compress_generic(
 637                  LZ4_stream_t_internal* const cctx,
 638                  const char* const source,
 639                  char* const dest,
 640                  const int inputSize,
 641                  int *inputConsumed, /* only written when outputLimited == fillOutput */
 642                  const int maxOutputSize,
 643                  const limitedOutput_directive outputLimited,
 644                  const tableType_t tableType,
 645                  const dict_directive dictDirective,
 646                  const dictIssue_directive dictIssue,
 647                  const U32 acceleration)
 648 {
 649     const BYTE* ip = (const BYTE*) source;
 650
 651     U32 const startIndex = cctx->currentOffset;
 652     const BYTE* base = (const BYTE*) source - startIndex;
 653     const BYTE* lowLimit;
 654
 655     const LZ4_stream_t_internal* dictCtx = (const LZ4_stream_t_internal*) cctx->dictCtx;
 656     const BYTE* const dictionary =
 657         dictDirective == usingDictCtx ? dictCtx->dictionary : cctx->dictionary;
 658     const U32 dictSize =
 659         dictDirective == usingDictCtx ? dictCtx->dictSize : cctx->dictSize;
 660     const U32 dictDelta = (dictDirective == usingDictCtx) ? startIndex - dictCtx->currentOffset : 0;   /* make indexes in dictCtx comparable with index in current context */
 661
 662     int const maybe_extMem = (dictDirective == usingExtDict) || (dictDirective == usingDictCtx);
 663     U32 const prefixIdxLimit = startIndex - dictSize;   /* used when dictDirective == dictSmall */
 664     const BYTE* const dictEnd = dictionary + dictSize;
 665     const BYTE* anchor = (const BYTE*) source;
 666     const BYTE* const iend = ip + inputSize;
 667     const BYTE* const mflimitPlusOne = iend - MFLIMIT + 1;
 668     const BYTE* const matchlimit = iend - LASTLITERALS;
 669
 670     /* the dictCtx currentOffset is indexed on the start of the dictionary,
 671      * while a dictionary in the current context precedes the currentOffset */
 672     const BYTE* dictBase = dictDirective == usingDictCtx ?
 673         dictionary + dictSize - dictCtx->currentOffset :
 674         dictionary + dictSize - startIndex;
 675
 676     BYTE* op = (BYTE*) dest;
 677     BYTE* const olimit = op + maxOutputSize;
 678
 679     U32 offset = 0;
 680     U32 forwardH;
 681
 682     DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, tableType=%u", inputSize, tableType);
 683     /* Init conditions */
 684     if (outputLimited == fillOutput && maxOutputSize < 1) return 0; /* Impossible to store anything */
 685     if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0;   /* Unsupported inputSize, too large (or negative) */
 686     if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) return 0;  /* Size too large (not within 64K limit) */
 687     if (tableType==byPtr) assert(dictDirective==noDict);      /* only supported use case with byPtr */
 688     assert(acceleration >= 1);
 689
 690     lowLimit = (const BYTE*)source - (dictDirective == withPrefix64k ? dictSize : 0);
 691
 692     /* Update context state */
 693     if (dictDirective == usingDictCtx) {
 694         /* Subsequent linked blocks can't use the dictionary. */
 695         /* Instead, they use the block we just compressed. */
 696         cctx->dictCtx = NULL;
 697         cctx->dictSize = (U32)inputSize;
 698     } else {
 699         cctx->dictSize += (U32)inputSize;
 700     }
 701     cctx->currentOffset += (U32)inputSize;
 702     cctx->tableType = tableType;
 703
 704     if (inputSize<LZ4_minLength) goto _last_literals;        /* Input too small, no compression (all literals) */
 705
 706     /* First Byte */
 707     LZ4_putPosition(ip, cctx->hashTable, tableType, base);
 708     ip++; forwardH = LZ4_hashPosition(ip, tableType);
 709
 710     /* Main Loop */
 711     for ( ; ; ) {
 712         const BYTE* match;
 713         BYTE* token;
 714
 715         /* Find a match */
 716         if (tableType == byPtr) {
 717             const BYTE* forwardIp = ip;
 718             unsigned step = 1;
 719             unsigned searchMatchNb = acceleration << LZ4_skipTrigger;
 720             do {
 721                 U32 const h = forwardH;
 722                 ip = forwardIp;
 723                 forwardIp += step;
 724                 step = (searchMatchNb++ >> LZ4_skipTrigger);
 725
 726                 if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals;
 727                 assert(ip < mflimitPlusOne);
 728
 729                 match = LZ4_getPositionOnHash(h, cctx->hashTable, tableType, base);
 730                 forwardH = LZ4_hashPosition(forwardIp, tableType);
 731                 LZ4_putPositionOnHash(ip, h, cctx->hashTable, tableType, base);
 732
 733             } while ( (match+MAX_DISTANCE < ip)
 734                    || (LZ4_read32(match) != LZ4_read32(ip)) );
 735
 736         } else {   /* byU32, byU16 */
 737
 738             const BYTE* forwardIp = ip;
 739             unsigned step = 1;
 740             unsigned searchMatchNb = acceleration << LZ4_skipTrigger;
 741             do {
 742                 U32 const h = forwardH;
 743                 U32 const current = (U32)(forwardIp - base);
 744                 U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType);
 745                 assert(matchIndex <= current);
 746                 assert(forwardIp - base < (ptrdiff_t)(2 GB - 1));
 747                 ip = forwardIp;
 748                 forwardIp += step;
 749                 step = (searchMatchNb++ >> LZ4_skipTrigger);
 750
 751                 if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals;
 752                 assert(ip < mflimitPlusOne);
 753
 754                 if (dictDirective == usingDictCtx) {
 755                     if (matchIndex < startIndex) {
 756                         /* there was no match, try the dictionary */
 757                         assert(tableType == byU32);
 758                         matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32);
 759                         match = dictBase + matchIndex;
 760                         matchIndex += dictDelta;   /* make dictCtx index comparable with current context */
 761                         lowLimit = dictionary;
 762                     } else {
 763                         match = base + matchIndex;
 764                         lowLimit = (const BYTE*)source;
 765                     }
 766                 } else if (dictDirective==usingExtDict) {
 767                     if (matchIndex < startIndex) {
 768                         DEBUGLOG(7, "extDict candidate: matchIndex=%5u  <  startIndex=%5u", matchIndex, startIndex);
 769                         assert(startIndex - matchIndex >= MINMATCH);
 770                         match = dictBase + matchIndex;
 771                         lowLimit = dictionary;
 772                     } else {
 773                         match = base + matchIndex;
 774                         lowLimit = (const BYTE*)source;
 775                     }
 776                 } else {   /* single continuous memory segment */
 777                     match = base + matchIndex;
 778                 }
 779                 forwardH = LZ4_hashPosition(forwardIp, tableType);
 780                 LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
 781
 782                 if ((dictIssue == dictSmall) && (matchIndex < prefixIdxLimit)) continue;    /* match outside of valid area */
 783                 assert(matchIndex < current);
 784                 if ((tableType != byU16) && (matchIndex+MAX_DISTANCE < current)) continue;  /* too far */
 785                 if (tableType == byU16) assert((current - matchIndex) <= MAX_DISTANCE);     /* too_far presumed impossible with byU16 */
 786
 787                 if (LZ4_read32(match) == LZ4_read32(ip)) {
 788                     if (maybe_extMem) offset = current - matchIndex;
 789                     break;   /* match found */
 790                 }
 791
 792             } while(1);
 793         }
 794
 795         /* Catch up */
 796         while (((ip>anchor) & (match > lowLimit)) && (unlikely(ip[-1]==match[-1]))) { ip--; match--; }
 797
 798         /* Encode Literals */
 799         {   unsigned const litLength = (unsigned)(ip - anchor);
 800             token = op++;
 801             if ((outputLimited == limitedOutput) &&  /* Check output buffer overflow */
 802                 (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)))
 803                 return 0;
 804             if ((outputLimited == fillOutput) &&
 805                 (unlikely(op + (litLength+240)/255 /* litlen */ + litLength /* literals */ + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit))) {
 806                 op--;
 807                 goto _last_literals;
 808             }
 809             if (litLength >= RUN_MASK) {
 810                 int len = (int)litLength-RUN_MASK;
 811                 *token = (RUN_MASK<<ML_BITS);
 812                 for(; len >= 255 ; len-=255) *op++ = 255;
 813                 *op++ = (BYTE)len;
 814             }
 815             else *token = (BYTE)(litLength<<ML_BITS);
 816
 817             /* Copy Literals */
 818             LZ4_wildCopy(op, anchor, op+litLength);
 819             op+=litLength;
 820             DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i",
 821                         (int)(anchor-(const BYTE*)source), litLength, (int)(ip-(const BYTE*)source));
 822         }
 823
 824 _next_match:
 825         /* at this stage, the following variables must be correctly set :
 826          * - ip : at start of LZ operation
 827          * - match : at start of previous pattern occurence; can be within current prefix, or within extDict
 828          * - offset : if maybe_ext_memSegment==1 (constant)
 829          * - lowLimit : must be == dictionary to mean "match is within extDict"; must be == source otherwise
 830          * - token and *token : position to write 4-bits for match length; higher 4-bits for literal length supposed already written
 831          */
 832
 833         if ((outputLimited == fillOutput) &&
 834             (op + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit)) {
 835             /* the match was too close to the end, rewind and go to last literals */
 836             op = token;
 837             goto _last_literals;
 838         }
 839
 840         /* Encode Offset */
 841         if (maybe_extMem) {   /* static test */
 842             DEBUGLOG(6, "             with offset=%u  (ext if > %i)", offset, (int)(ip - (const BYTE*)source));
 843             assert(offset <= MAX_DISTANCE && offset > 0);
 844             LZ4_writeLE16(op, (U16)offset); op+=2;
 845         } else  {
 846             DEBUGLOG(6, "             with offset=%u  (same segment)", (U32)(ip - match));
 847             assert(ip-match <= MAX_DISTANCE);
 848             LZ4_writeLE16(op, (U16)(ip - match)); op+=2;
 849         }
 850
 851         /* Encode MatchLength */
 852         {   unsigned matchCode;
 853
 854             if ( (dictDirective==usingExtDict || dictDirective==usingDictCtx)
 855               && (lowLimit==dictionary) /* match within extDict */ ) {
 856                 const BYTE* limit = ip + (dictEnd-match);
 857                 assert(dictEnd > match);
 858                 if (limit > matchlimit) limit = matchlimit;
 859                 matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, limit);
 860                 ip += MINMATCH + matchCode;
 861                 if (ip==limit) {
 862                     unsigned const more = LZ4_count(limit, (const BYTE*)source, matchlimit);
 863                     matchCode += more;
 864                     ip += more;
 865                 }
 866                 DEBUGLOG(6, "             with matchLength=%u starting in extDict", matchCode+MINMATCH);
 867             } else {
 868                 matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit);
 869                 ip += MINMATCH + matchCode;
 870                 DEBUGLOG(6, "             with matchLength=%u", matchCode+MINMATCH);
 871             }
 872
 873             if ((outputLimited) &&    /* Check output buffer overflow */
 874                 (unlikely(op + (1 + LASTLITERALS) + (matchCode>>8) > olimit)) ) {
 875                 if (outputLimited == limitedOutput)
 876                   return 0;
 877                 if (outputLimited == fillOutput) {
 878                     /* Match description too long : reduce it */
 879                     U32 newMatchCode = 15 /* in token */ - 1 /* to avoid needing a zero byte */ + ((U32)(olimit - op) - 2 - 1 - LASTLITERALS) * 255;
 880                     ip -= matchCode - newMatchCode;
 881                     matchCode = newMatchCode;
 882                 }
 883             }
 884             if (matchCode >= ML_MASK) {
 885                 *token += ML_MASK;
 886                 matchCode -= ML_MASK;
 887                 LZ4_write32(op, 0xFFFFFFFF);
 888                 while (matchCode >= 4*255) {
 889                     op+=4;
 890                     LZ4_write32(op, 0xFFFFFFFF);
 891                     matchCode -= 4*255;
 892                 }
 893                 op += matchCode / 255;
 894                 *op++ = (BYTE)(matchCode % 255);
 895             } else
 896                 *token += (BYTE)(matchCode);
 897         }
 898
 899         anchor = ip;
 900
 901         /* Test end of chunk */
 902         if (ip >= mflimitPlusOne) break;
 903
 904         /* Fill table */
 905         LZ4_putPosition(ip-2, cctx->hashTable, tableType, base);
 906
 907         /* Test next position */
 908         if (tableType == byPtr) {
 909
 910             match = LZ4_getPosition(ip, cctx->hashTable, tableType, base);
 911             LZ4_putPosition(ip, cctx->hashTable, tableType, base);
 912             if ( (match+MAX_DISTANCE >= ip)
 913               && (LZ4_read32(match) == LZ4_read32(ip)) )
 914             { token=op++; *token=0; goto _next_match; }
 915
 916         } else {   /* byU32, byU16 */
 917
 918             U32 const h = LZ4_hashPosition(ip, tableType);
 919             U32 const current = (U32)(ip-base);
 920             U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType);
 921             assert(matchIndex < current);
 922             if (dictDirective == usingDictCtx) {
 923                 if (matchIndex < startIndex) {
 924                     /* there was no match, try the dictionary */
 925                     matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32);
 926                     match = dictBase + matchIndex;
 927                     lowLimit = dictionary;   /* required for match length counter */
 928                     matchIndex += dictDelta;
 929                 } else {
 930                     match = base + matchIndex;
 931                     lowLimit = (const BYTE*)source;  /* required for match length counter */
 932                 }
 933             } else if (dictDirective==usingExtDict) {
 934                 if (matchIndex < startIndex) {
 935                     match = dictBase + matchIndex;
 936                     lowLimit = dictionary;   /* required for match length counter */
 937                 } else {
 938                     match = base + matchIndex;
 939                     lowLimit = (const BYTE*)source;   /* required for match length counter */
 940                 }
 941             } else {   /* single memory segment */
 942                 match = base + matchIndex;
 943             }
 944             LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
 945             assert(matchIndex < current);
 946             if ( ((dictIssue==dictSmall) ? (matchIndex >= prefixIdxLimit) : 1)
 947               && ((tableType==byU16) ? 1 : (matchIndex+MAX_DISTANCE >= current))
 948               && (LZ4_read32(match) == LZ4_read32(ip)) ) {
 949                 token=op++;
 950                 *token=0;
 951                 if (maybe_extMem) offset = current - matchIndex;
 952                 DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i",
 953                             (int)(anchor-(const BYTE*)source), 0, (int)(ip-(const BYTE*)source));
 954                 goto _next_match;
 955             }
 956         }
 957
 958         /* Prepare next loop */
 959         forwardH = LZ4_hashPosition(++ip, tableType);
 960
 961     }
 962
 963 _last_literals:
 964     /* Encode Last Literals */
 965     {   size_t lastRun = (size_t)(iend - anchor);
 966         if ( (outputLimited) &&  /* Check output buffer overflow */
 967             (op + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > olimit)) {
 968             if (outputLimited == fillOutput) {
 969                 /* adapt lastRun to fill 'dst' */
 970                 lastRun  = (olimit-op) - 1;
 971                 lastRun -= (lastRun+240)/255;
 972             }
 973             if (outputLimited == limitedOutput)
 974                 return 0;
 975         }
 976         if (lastRun >= RUN_MASK) {
 977             size_t accumulator = lastRun - RUN_MASK;
 978             *op++ = RUN_MASK << ML_BITS;
 979             for(; accumulator >= 255 ; accumulator-=255) *op++ = 255;
 980             *op++ = (BYTE) accumulator;
 981         } else {
 982             *op++ = (BYTE)(lastRun<<ML_BITS);
 983         }
 984         memcpy(op, anchor, lastRun);
 985         ip = anchor + lastRun;
 986         op += lastRun;
 987     }
 988
 989     if (outputLimited == fillOutput) {
 990         *inputConsumed = (int) (((const char*)ip)-source);
 991     }
 992     DEBUGLOG(5, "LZ4_compress_generic: compressed %i bytes into %i bytes", inputSize, (int)(((char*)op) - dest));
 993     return (int)(((char*)op) - dest);
 994 }
 995
 996
 997 int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
 998 {
 999     LZ4_stream_t_internal* ctx = &((LZ4_stream_t*)state)->internal_donotuse;
1000     if (acceleration < 1) acceleration = ACCELERATION_DEFAULT;
1001     LZ4_resetStream((LZ4_stream_t*)state);
1002     if (maxOutputSize >= LZ4_compressBound(inputSize)) {
1003         if (inputSize < LZ4_64Klimit) {
1004             return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, byU16, noDict, noDictIssue, acceleration);
1005         } else {
1006             const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > MAX_DISTANCE)) ? byPtr : byU32;
1007             return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
1008         }
1009     } else {
1010         if (inputSize < LZ4_64Klimit) {;
1011             return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration);
1012         } else {
1013             const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > MAX_DISTANCE)) ? byPtr : byU32;
1014             return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, noDict, noDictIssue, acceleration);
1015         }
1016     }
1017 }
1018
1019 /**
1020  * LZ4_compress_fast_extState_fastReset() :
1021  * A variant of LZ4_compress_fast_extState().
1022  *
1023  * Using this variant avoids an expensive initialization step. It is only safe
1024  * to call if the state buffer is known to be correctly initialized already
1025  * (see comment in lz4.h on LZ4_resetStream_fast() for a definition of
1026  * "correctly initialized").
1027  */
1028 int LZ4_compress_fast_extState_fastReset(void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration)
1029 {
1030     LZ4_stream_t_internal* ctx = &((LZ4_stream_t*)state)->internal_donotuse;
1031     if (acceleration < 1) acceleration = ACCELERATION_DEFAULT;
1032
1033     if (dstCapacity >= LZ4_compressBound(srcSize)) {
1034         if (srcSize < LZ4_64Klimit) {
1035             const tableType_t tableType = byU16;
1036             LZ4_prepareTable(ctx, srcSize, tableType);
1037             if (ctx->currentOffset) {
1038                 return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, dictSmall, acceleration);
1039             } else {
1040                 return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
1041             }
1042         } else {
1043             const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > MAX_DISTANCE)) ? byPtr : byU32;
1044             LZ4_prepareTable(ctx, srcSize, tableType);
1045             return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
1046         }
1047     } else {
1048         if (srcSize < LZ4_64Klimit) {
1049             const tableType_t tableType = byU16;
1050             LZ4_prepareTable(ctx, srcSize, tableType);
1051             if (ctx->currentOffset) {
1052                 return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, dictSmall, acceleration);
1053             } else {
1054                 return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration);
1055             }
1056         } else {
1057             const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > MAX_DISTANCE)) ? byPtr : byU32;
1058             LZ4_prepareTable(ctx, srcSize, tableType);
1059             return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration);
1060         }
1061     }
1062 }
1063
1064
1065 int LZ4_compress_fast(const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
1066 {
1067     int result;
1068 #if (LZ4_HEAPMODE)
1069     LZ4_stream_t* ctxPtr = ALLOC(sizeof(LZ4_stream_t));   /* malloc-calloc always properly aligned */
1070     if (ctxPtr == NULL) return 0;
1071 #else
1072     LZ4_stream_t ctx;
1073     LZ4_stream_t* const ctxPtr = &ctx;
1074 #endif
1075     result = LZ4_compress_fast_extState(ctxPtr, source, dest, inputSize, maxOutputSize, acceleration);
1076
1077 #if (LZ4_HEAPMODE)
1078     FREEMEM(ctxPtr);
1079 #endif
1080     return result;
1081 }
1082
1083
1084 int LZ4_compress_default(const char* source, char* dest, int inputSize, int maxOutputSize)
1085 {
1086     return LZ4_compress_fast(source, dest, inputSize, maxOutputSize, 1);
1087 }
1088
1089
1090 /* hidden debug function */
1091 /* strangely enough, gcc generates faster code when this function is uncommented, even if unused */
1092 int LZ4_compress_fast_force(const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
1093 {
1094     LZ4_stream_t ctx;
1095     LZ4_resetStream(&ctx);
1096
1097     if (inputSize < LZ4_64Klimit)
1098         return LZ4_compress_generic(&ctx.internal_donotuse, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, byU16,                        noDict, noDictIssue, acceleration);
1099     else
1100         return LZ4_compress_generic(&ctx.internal_donotuse, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, sizeof(void*)==8 ? byU32 : byPtr, noDict, noDictIssue, acceleration);
1101 }
1102
1103
1104 /* Note!: This function leaves the stream in an unclean/broken state!
1105  * It is not safe to subsequently use the same state with a _fastReset() or
1106  * _continue() call without resetting it. */
1107 static int LZ4_compress_destSize_extState (LZ4_stream_t* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize)
1108 {
1109     LZ4_resetStream(state);
1110
1111     if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) {  /* compression success is guaranteed */
1112         return LZ4_compress_fast_extState(state, src, dst, *srcSizePtr, targetDstSize, 1);
1113     } else {
1114         if (*srcSizePtr < LZ4_64Klimit) {
1115             return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, byU16, noDict, noDictIssue, 1);
1116         } else {
1117             tableType_t const tableType = ((sizeof(void*)==4) && ((uptrval)src > MAX_DISTANCE)) ? byPtr : byU32;
1118             return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, tableType, noDict, noDictIssue, 1);
1119     }   }
1120 }
1121
1122
1123 int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targetDstSize)
1124 {
1125 #if (LZ4_HEAPMODE)
1126     LZ4_stream_t* ctx = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t));   /* malloc-calloc always properly aligned */
1127     if (ctx == NULL) return 0;
1128 #else
1129     LZ4_stream_t ctxBody;
1130     LZ4_stream_t* ctx = &ctxBody;
1131 #endif
1132
1133     int result = LZ4_compress_destSize_extState(ctx, src, dst, srcSizePtr, targetDstSize);
1134
1135 #if (LZ4_HEAPMODE)
1136     FREEMEM(ctx);
1137 #endif
1138     return result;
1139 }
1140
1141
1142
1143 /*-******************************
1144 *  Streaming functions
1145 ********************************/
1146
1147 LZ4_stream_t* LZ4_createStream(void)
1148 {
1149     LZ4_stream_t* lz4s = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t));
1150     LZ4_STATIC_ASSERT(LZ4_STREAMSIZE >= sizeof(LZ4_stream_t_internal));    /* A compilation error here means LZ4_STREAMSIZE is not large enough */
1151     DEBUGLOG(4, "LZ4_createStream %p", lz4s);
1152     if (lz4s == NULL) return NULL;
1153     LZ4_resetStream(lz4s);
1154     return lz4s;
1155 }
1156
1157 void LZ4_resetStream (LZ4_stream_t* LZ4_stream)
1158 {
1159     DEBUGLOG(5, "LZ4_resetStream (ctx:%p)", LZ4_stream);
1160     MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t));
1161 }
1162
1163 void LZ4_resetStream_fast(LZ4_stream_t* ctx) {
1164     LZ4_prepareTable(&(ctx->internal_donotuse), 0, byU32);
1165 }
1166
1167 int LZ4_freeStream (LZ4_stream_t* LZ4_stream)
1168 {
1169     if (!LZ4_stream) return 0;   /* support free on NULL */
1170     DEBUGLOG(5, "LZ4_freeStream %p", LZ4_stream);
1171     FREEMEM(LZ4_stream);
1172     return (0);
1173 }
1174
1175
1176 #define HASH_UNIT sizeof(reg_t)
1177 int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize)
1178 {
1179     LZ4_stream_t_internal* dict = &LZ4_dict->internal_donotuse;
1180     const tableType_t tableType = byU32;
1181     const BYTE* p = (const BYTE*)dictionary;
1182     const BYTE* const dictEnd = p + dictSize;
1183     const BYTE* base;
1184
1185     DEBUGLOG(4, "LZ4_loadDict (%i bytes from %p into %p)", dictSize, dictionary, LZ4_dict);
1186
1187     /* It's necessary to reset the context,
1188      * and not just continue it with prepareTable()
1189      * to avoid any risk of generating overflowing matchIndex
1190      * when compressing using this dictionary */
1191     LZ4_resetStream(LZ4_dict);
1192
1193     /* We always increment the offset by 64 KB, since, if the dict is longer,
1194      * we truncate it to the last 64k, and if it's shorter, we still want to
1195      * advance by a whole window length so we can provide the guarantee that
1196      * there are only valid offsets in the window, which allows an optimization
1197      * in LZ4_compress_fast_continue() where it uses noDictIssue even when the
1198      * dictionary isn't a full 64k. */
1199
1200     if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB;
1201     base = dictEnd - 64 KB - dict->currentOffset;
1202     dict->dictionary = p;
1203     dict->dictSize = (U32)(dictEnd - p);
1204     dict->currentOffset += 64 KB;
1205     dict->tableType = tableType;
1206
1207     if (dictSize < (int)HASH_UNIT) {
1208         return 0;
1209     }
1210
1211     while (p <= dictEnd-HASH_UNIT) {
1212         LZ4_putPosition(p, dict->hashTable, tableType, base);
1213         p+=3;
1214     }
1215
1216     return dict->dictSize;
1217 }
1218
1219 void LZ4_attach_dictionary(LZ4_stream_t *working_stream, const LZ4_stream_t *dictionary_stream) {
1220     if (dictionary_stream != NULL) {
1221         /* If the current offset is zero, we will never look in the
1222          * external dictionary context, since there is no value a table
1223          * entry can take that indicate a miss. In that case, we need
1224          * to bump the offset to something non-zero.
1225          */
1226         if (working_stream->internal_donotuse.currentOffset == 0) {
1227             working_stream->internal_donotuse.currentOffset = 64 KB;
1228         }
1229         working_stream->internal_donotuse.dictCtx = &(dictionary_stream->internal_donotuse);
1230     } else {
1231         working_stream->internal_donotuse.dictCtx = NULL;
1232     }
1233 }
1234
1235
1236 static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, int nextSize)
1237 {
1238     if (LZ4_dict->currentOffset + nextSize > 0x80000000) {   /* potential ptrdiff_t overflow (32-bits mode) */
1239         /* rescale hash table */
1240         U32 const delta = LZ4_dict->currentOffset - 64 KB;
1241         const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize;
1242         int i;
1243         DEBUGLOG(4, "LZ4_renormDictT");
1244         for (i=0; i<LZ4_HASH_SIZE_U32; i++) {
1245             if (LZ4_dict->hashTable[i] < delta) LZ4_dict->hashTable[i]=0;
1246             else LZ4_dict->hashTable[i] -= delta;
1247         }
1248         LZ4_dict->currentOffset = 64 KB;
1249         if (LZ4_dict->dictSize > 64 KB) LZ4_dict->dictSize = 64 KB;
1250         LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize;
1251     }
1252 }
1253
1254
1255 int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
1256 {
1257     const tableType_t tableType = byU32;
1258     LZ4_stream_t_internal* streamPtr = &LZ4_stream->internal_donotuse;
1259     const BYTE* dictEnd = streamPtr->dictionary + streamPtr->dictSize;
1260
1261     DEBUGLOG(5, "LZ4_compress_fast_continue (inputSize=%i)", inputSize);
1262
1263     if (streamPtr->initCheck) return 0;   /* Uninitialized structure detected */
1264     LZ4_renormDictT(streamPtr, inputSize);   /* avoid index overflow */
1265     if (acceleration < 1) acceleration = ACCELERATION_DEFAULT;
1266
1267     /* invalidate tiny dictionaries */
1268     if ( (streamPtr->dictSize-1 < 4)   /* intentional underflow */
1269       && (dictEnd != (const BYTE*)source) ) {
1270         DEBUGLOG(5, "LZ4_compress_fast_continue: dictSize(%u) at addr:%p is too small", streamPtr->dictSize, streamPtr->dictionary);
1271         streamPtr->dictSize = 0;
1272         streamPtr->dictionary = (const BYTE*)source;
1273         dictEnd = (const BYTE*)source;
1274     }
1275
1276     /* Check overlapping input/dictionary space */
1277     {   const BYTE* sourceEnd = (const BYTE*) source + inputSize;
1278         if ((sourceEnd > streamPtr->dictionary) && (sourceEnd < dictEnd)) {
1279             streamPtr->dictSize = (U32)(dictEnd - sourceEnd);
1280             if (streamPtr->dictSize > 64 KB) streamPtr->dictSize = 64 KB;
1281             if (streamPtr->dictSize < 4) streamPtr->dictSize = 0;
1282             streamPtr->dictionary = dictEnd - streamPtr->dictSize;
1283         }
1284     }
1285
1286     /* prefix mode : source data follows dictionary */
1287     if (dictEnd == (const BYTE*)source) {
1288         if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset))
1289             return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, dictSmall, acceleration);
1290         else
1291             return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, noDictIssue, acceleration);
1292     }
1293
1294     /* external dictionary mode */
1295     {   int result;
1296         if (streamPtr->dictCtx) {
1297             /* We depend here on the fact that dictCtx'es (produced by
1298              * LZ4_loadDict) guarantee that their tables contain no references
1299              * to offsets between dictCtx->currentOffset - 64 KB and
1300              * dictCtx->currentOffset - dictCtx->dictSize. This makes it safe
1301              * to use noDictIssue even when the dict isn't a full 64 KB.
1302              */
1303             if (inputSize > 4 KB) {
1304                 /* For compressing large blobs, it is faster to pay the setup
1305                  * cost to copy the dictionary's tables into the active context,
1306                  * so that the compression loop is only looking into one table.
1307                  */
1308                 memcpy(streamPtr, streamPtr->dictCtx, sizeof(LZ4_stream_t));
1309                 result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration);
1310             } else {
1311                 result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingDictCtx, noDictIssue, acceleration);
1312             }
1313         } else {
1314             if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) {
1315                 result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, dictSmall, acceleration);
1316             } else {
1317                 result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration);
1318             }
1319         }
1320         streamPtr->dictionary = (const BYTE*)source;
1321         streamPtr->dictSize = (U32)inputSize;
1322         return result;
1323     }
1324 }
1325
1326
1327 /* Hidden debug function, to force-test external dictionary mode */
1328 int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize)
1329 {
1330     LZ4_stream_t_internal* streamPtr = &LZ4_dict->internal_donotuse;
1331     int result;
1332
1333     LZ4_renormDictT(streamPtr, srcSize);
1334
1335     if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) {
1336         result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, dictSmall, 1);
1337     } else {
1338         result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, noDictIssue, 1);
1339     }
1340
1341     streamPtr->dictionary = (const BYTE*)source;
1342     streamPtr->dictSize = (U32)srcSize;
1343
1344     return result;
1345 }
1346
1347
1348 /*! LZ4_saveDict() :
1349  *  If previously compressed data block is not guaranteed to remain available at its memory location,
1350  *  save it into a safer place (char* safeBuffer).
1351  *  Note : you don't need to call LZ4_loadDict() afterwards,
1352  *         dictionary is immediately usable, you can therefore call LZ4_compress_fast_continue().
1353  *  Return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error.
1354  */
1355 int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize)
1356 {
1357     LZ4_stream_t_internal* const dict = &LZ4_dict->internal_donotuse;
1358     const BYTE* const previousDictEnd = dict->dictionary + dict->dictSize;
1359
1360     if ((U32)dictSize > 64 KB) dictSize = 64 KB;   /* useless to define a dictionary > 64 KB */
1361     if ((U32)dictSize > dict->dictSize) dictSize = dict->dictSize;
1362
1363     memmove(safeBuffer, previousDictEnd - dictSize, dictSize);
1364
1365     dict->dictionary = (const BYTE*)safeBuffer;
1366     dict->dictSize = (U32)dictSize;
1367
1368     return dictSize;
1369 }
1370
1371
1372
1373 /*-*****************************
1374 *  Decompression functions
1375 *******************************/
1376 /*! LZ4_decompress_generic() :
1377  *  This generic decompression function covers all use cases.
1378  *  It shall be instantiated several times, using different sets of directives.
1379  *  Note that it is important for performance that this function really get inlined,
1380  *  in order to remove useless branches during compilation optimization.
1381  */
1382 LZ4_FORCE_O2_GCC_PPC64LE
1383 LZ4_FORCE_INLINE int LZ4_decompress_generic(
1384                  const char* const src,
1385                  char* const dst,
1386                  int srcSize,
1387                  int outputSize,         /* If endOnInput==endOnInputSize, this value is `dstCapacity` */
1388
1389                  int endOnInput,         /* endOnOutputSize, endOnInputSize */
1390                  int partialDecoding,    /* full, partial */
1391                  int targetOutputSize,   /* only used if partialDecoding==partial */
1392                  int dict,               /* noDict, withPrefix64k, usingExtDict */
1393                  const BYTE* const lowPrefix,  /* always <= dst, == dst when no prefix */
1394                  const BYTE* const dictStart,  /* only if dict==usingExtDict */
1395                  const size_t dictSize         /* note : = 0 if noDict */
1396                  )
1397 {
1398     const BYTE* ip = (const BYTE*) src;
1399     const BYTE* const iend = ip + srcSize;
1400
1401     BYTE* op = (BYTE*) dst;
1402     BYTE* const oend = op + outputSize;
1403     BYTE* cpy;
1404     BYTE* oexit = op + targetOutputSize;
1405
1406     const BYTE* const dictEnd = (const BYTE*)dictStart + dictSize;
1407     const unsigned inc32table[8] = {0, 1, 2,  1,  0,  4, 4, 4};
1408     const int      dec64table[8] = {0, 0, 0, -1, -4,  1, 2, 3};
1409
1410     const int safeDecode = (endOnInput==endOnInputSize);
1411     const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB)));
1412
1413     /* Set up the "end" pointers for the shortcut. */
1414     const BYTE* const shortiend = iend - (endOnInput ? 14 : 8) /*maxLL*/ - 2 /*offset*/;
1415     const BYTE* const shortoend = oend - (endOnInput ? 14 : 8) /*maxLL*/ - 18 /*maxML*/;
1416
1417     DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i)", srcSize);
1418
1419     /* Special cases */
1420     if ((partialDecoding) && (oexit > oend-MFLIMIT)) oexit = oend-MFLIMIT;                      /* targetOutputSize too high => just decode everything */
1421     if ((endOnInput) && (unlikely(outputSize==0))) return ((srcSize==1) && (*ip==0)) ? 0 : -1;  /* Empty output buffer */
1422     if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0?1:-1);
1423     if ((endOnInput) && unlikely(srcSize==0)) return -1;
1424
1425     /* Main Loop : decode sequences */
1426     while (1) {
1427         const BYTE* match;
1428         size_t offset;
1429
1430         unsigned const token = *ip++;
1431         size_t length = token >> ML_BITS; /* literal length */
1432
1433         assert(!endOnInput || ip <= iend); /* ip < iend before the increment */
1434
1435         /* A two-stage shortcut for the most common case:
1436          * 1) If the literal length is 0..14, and there is enough space,
1437          * enter the shortcut and copy 16 bytes on behalf of the literals
1438          * (in the fast mode, only 8 bytes can be safely copied this way).
1439          * 2) Further if the match length is 4..18, copy 18 bytes in a similar
1440          * manner; but we ensure that there's enough space in the output for
1441          * those 18 bytes earlier, upon entering the shortcut (in other words,
1442          * there is a combined check for both stages).
1443          */
1444         if ( (endOnInput ? length != RUN_MASK : length <= 8)
1445             /* strictly "less than" on input, to re-enter the loop with at least one byte */
1446           && likely((endOnInput ? ip < shortiend : 1) & (op <= shortoend)) ) {
1447             /* Copy the literals */
1448             memcpy(op, ip, endOnInput ? 16 : 8);
1449             op += length; ip += length;
1450
1451             /* The second stage: prepare for match copying, decode full info.
1452              * If it doesn't work out, the info won't be wasted. */
1453             length = token & ML_MASK; /* match length */
1454             offset = LZ4_readLE16(ip); ip += 2;
1455             match = op - offset;
1456
1457             /* Do not deal with overlapping matches. */
1458             if ( (length != ML_MASK)
1459               && (offset >= 8)
1460               && (dict==withPrefix64k || match >= lowPrefix) ) {
1461                 /* Copy the match. */
1462                 memcpy(op + 0, match + 0, 8);
1463                 memcpy(op + 8, match + 8, 8);
1464                 memcpy(op +16, match +16, 2);
1465                 op += length + MINMATCH;
1466                 /* Both stages worked, load the next token. */
1467                 continue;
1468             }
1469
1470             /* The second stage didn't work out, but the info is ready.
1471              * Propel it right to the point of match copying. */
1472             goto _copy_match;
1473         }
1474
1475         /* decode literal length */
1476         if (length == RUN_MASK) {
1477             unsigned s;
1478             if (unlikely(endOnInput ? ip >= iend-RUN_MASK : 0)) goto _output_error;   /* overflow detection */
1479             do {
1480                 s = *ip++;
1481                 length += s;
1482             } while ( likely(endOnInput ? ip<iend-RUN_MASK : 1) & (s==255) );
1483             if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) goto _output_error;   /* overflow detection */
1484             if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) goto _output_error;   /* overflow detection */
1485         }
1486
1487         /* copy literals */
1488         cpy = op+length;
1489         if ( ((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT)) || (ip+length>iend-(2+1+LASTLITERALS))) )
1490             || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) )
1491         {
1492             if (partialDecoding) {
1493                 if (cpy > oend) goto _output_error;                           /* Error : write attempt beyond end of output buffer */
1494                 if ((endOnInput) && (ip+length > iend)) goto _output_error;   /* Error : read attempt beyond end of input buffer */
1495             } else {
1496                 if ((!endOnInput) && (cpy != oend)) goto _output_error;       /* Error : block decoding must stop exactly there */
1497                 if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) goto _output_error;   /* Error : input must be consumed */
1498             }
1499             memcpy(op, ip, length);
1500             ip += length;
1501             op += length;
1502             break;     /* Necessarily EOF, due to parsing restrictions */
1503         }
1504         LZ4_wildCopy(op, ip, cpy);
1505         ip += length; op = cpy;
1506
1507         /* get offset */
1508         offset = LZ4_readLE16(ip); ip+=2;
1509         match = op - offset;
1510
1511         /* get matchlength */
1512         length = token & ML_MASK;
1513
1514 _copy_match:
1515         if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error;   /* Error : offset outside buffers */
1516         LZ4_write32(op, (U32)offset);   /* costs ~1%; silence an msan warning when offset==0 */
1517
1518         if (length == ML_MASK) {
1519             unsigned s;
1520             do {
1521                 s = *ip++;
1522                 if ((endOnInput) && (ip > iend-LASTLITERALS)) goto _output_error;
1523                 length += s;
1524             } while (s==255);
1525             if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error;   /* overflow detection */
1526         }
1527         length += MINMATCH;
1528
1529         /* check external dictionary */
1530         if ((dict==usingExtDict) && (match < lowPrefix)) {
1531             if (unlikely(op+length > oend-LASTLITERALS)) goto _output_error;   /* doesn't respect parsing restriction */
1532
1533             if (length <= (size_t)(lowPrefix-match)) {
1534                 /* match can be copied as a single segment from external dictionary */
1535                 memmove(op, dictEnd - (lowPrefix-match), length);
1536                 op += length;
1537             } else {
1538                 /* match encompass external dictionary and current block */
1539                 size_t const copySize = (size_t)(lowPrefix-match);
1540                 size_t const restSize = length - copySize;
1541                 memcpy(op, dictEnd - copySize, copySize);
1542                 op += copySize;
1543                 if (restSize > (size_t)(op-lowPrefix)) {  /* overlap copy */
1544                     BYTE* const endOfMatch = op + restSize;
1545                     const BYTE* copyFrom = lowPrefix;
1546                     while (op < endOfMatch) *op++ = *copyFrom++;
1547                 } else {
1548                     memcpy(op, lowPrefix, restSize);
1549                     op += restSize;
1550             }   }
1551             continue;
1552         }
1553
1554         /* copy match within block */
1555         cpy = op + length;
1556         if (unlikely(offset<8)) {
1557             op[0] = match[0];
1558             op[1] = match[1];
1559             op[2] = match[2];
1560             op[3] = match[3];
1561             match += inc32table[offset];
1562             memcpy(op+4, match, 4);
1563             match -= dec64table[offset];
1564         } else { memcpy(op, match, 8); match+=8; }
1565         op += 8;
1566
1567         if (unlikely(cpy>oend-12)) {
1568             BYTE* const oCopyLimit = oend-(WILDCOPYLENGTH-1);
1569             if (cpy > oend-LASTLITERALS) goto _output_error;    /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
1570             if (op < oCopyLimit) {
1571                 LZ4_wildCopy(op, match, oCopyLimit);
1572                 match += oCopyLimit - op;
1573                 op = oCopyLimit;
1574             }
1575             while (op<cpy) *op++ = *match++;
1576         } else {
1577             memcpy(op, match, 8);
1578             if (length>16) LZ4_wildCopy(op+8, match+8, cpy);
1579         }
1580         op = cpy;   /* correction */
1581     }
1582
1583     /* end of decoding */
1584     if (endOnInput)
1585        return (int) (((char*)op)-dst);     /* Nb of output bytes decoded */
1586     else
1587        return (int) (((const char*)ip)-src);   /* Nb of input bytes read */
1588
1589     /* Overflow error detected */
1590 _output_error:
1591     return (int) (-(((const char*)ip)-src))-1;
1592 }
1593
1594
1595 /*===== Instantiate the API decoding functions. =====*/
1596
1597 LZ4_FORCE_O2_GCC_PPC64LE
1598 int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
1599 {
1600     return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize,
1601                                   endOnInputSize, full, 0, noDict,
1602                                   (BYTE*)dest, NULL, 0);
1603 }
1604
1605 LZ4_FORCE_O2_GCC_PPC64LE
1606 int LZ4_decompress_safe_partial(const char* source, char* dest, int compressedSize, int targetOutputSize, int maxDecompressedSize)
1607 {
1608     return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize,
1609                                   endOnInputSize, partial, targetOutputSize,
1610                                   noDict, (BYTE*)dest, NULL, 0);
1611 }
1612
1613 LZ4_FORCE_O2_GCC_PPC64LE
1614 int LZ4_decompress_fast(const char* source, char* dest, int originalSize)
1615 {
1616     return LZ4_decompress_generic(source, dest, 0, originalSize,
1617                                   endOnOutputSize, full, 0, withPrefix64k,
1618                                   (BYTE*)dest - 64 KB, NULL, 0);
1619 }
1620
1621 /*===== Instantiate a few more decoding cases, used more than once. =====*/
1622
1623 LZ4_FORCE_O2_GCC_PPC64LE /* Exported, an obsolete API function. */
1624 int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize)
1625 {
1626     return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
1627                                   endOnInputSize, full, 0, withPrefix64k,
1628                                   (BYTE*)dest - 64 KB, NULL, 0);
1629 }
1630
1631 /* Another obsolete API function, paired with the previous one. */
1632 int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize)
1633 {
1634     /* LZ4_decompress_fast doesn't validate match offsets,
1635      * and thus serves well with any prefixed dictionary. */
1636     return LZ4_decompress_fast(source, dest, originalSize);
1637 }
1638
1639 LZ4_FORCE_O2_GCC_PPC64LE
1640 static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, int compressedSize, int maxOutputSize,
1641                                                size_t prefixSize)
1642 {
1643     return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
1644                                   endOnInputSize, full, 0, noDict,
1645                                   (BYTE*)dest-prefixSize, NULL, 0);
1646 }
1647
1648 LZ4_FORCE_O2_GCC_PPC64LE /* Exported under another name, for tests/fullbench.c */
1649 #define LZ4_decompress_safe_extDict LZ4_decompress_safe_forceExtDict
1650 int LZ4_decompress_safe_extDict(const char* source, char* dest, int compressedSize, int maxOutputSize,
1651                                 const void* dictStart, size_t dictSize)
1652 {
1653     return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
1654                                   endOnInputSize, full, 0, usingExtDict,
1655                                   (BYTE*)dest, (const BYTE*)dictStart, dictSize);
1656 }
1657
1658 LZ4_FORCE_O2_GCC_PPC64LE
1659 static int LZ4_decompress_fast_extDict(const char* source, char* dest, int originalSize,
1660                                        const void* dictStart, size_t dictSize)
1661 {
1662     return LZ4_decompress_generic(source, dest, 0, originalSize,
1663                                   endOnOutputSize, full, 0, usingExtDict,
1664                                   (BYTE*)dest, (const BYTE*)dictStart, dictSize);
1665 }
1666
1667 /* The "double dictionary" mode, for use with e.g. ring buffers: the first part
1668  * of the dictionary is passed as prefix, and the second via dictStart + dictSize.
1669  * These routines are used only once, in LZ4_decompress_*_continue().
1670  */
1671 LZ4_FORCE_INLINE
1672 int LZ4_decompress_safe_doubleDict(const char* source, char* dest, int compressedSize, int maxOutputSize,
1673                                    size_t prefixSize, const void* dictStart, size_t dictSize)
1674 {
1675     return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
1676                                   endOnInputSize, full, 0, usingExtDict,
1677                                   (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize);
1678 }
1679
1680 LZ4_FORCE_INLINE
1681 int LZ4_decompress_fast_doubleDict(const char* source, char* dest, int originalSize,
1682                                    size_t prefixSize, const void* dictStart, size_t dictSize)
1683 {
1684     return LZ4_decompress_generic(source, dest, 0, originalSize,
1685                                   endOnOutputSize, full, 0, usingExtDict,
1686                                   (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize);
1687 }
1688
1689 /*===== streaming decompression functions =====*/
1690
1691 LZ4_streamDecode_t* LZ4_createStreamDecode(void)
1692 {
1693     LZ4_streamDecode_t* lz4s = (LZ4_streamDecode_t*) ALLOC_AND_ZERO(sizeof(LZ4_streamDecode_t));
1694     return lz4s;
1695 }
1696
1697 int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream)
1698 {
1699     if (!LZ4_stream) return 0;   /* support free on NULL */
1700     FREEMEM(LZ4_stream);
1701     return 0;
1702 }
1703
1704 /*! LZ4_setStreamDecode() :
1705  *  Use this function to instruct where to find the dictionary.
1706  *  This function is not necessary if previous data is still available where it was decoded.
1707  *  Loading a size of 0 is allowed (same effect as no dictionary).
1708  * @return : 1 if OK, 0 if error
1709  */
1710 int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize)
1711 {
1712     LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
1713     lz4sd->prefixSize = (size_t) dictSize;
1714     lz4sd->prefixEnd = (const BYTE*) dictionary + dictSize;
1715     lz4sd->externalDict = NULL;
1716     lz4sd->extDictSize  = 0;
1717     return 1;
1718 }
1719
1720 /*! LZ4_decoderRingBufferSize() :
1721  *  when setting a ring buffer for streaming decompression (optional scenario),
1722  *  provides the minimum size of this ring buffer
1723  *  to be compatible with any source respecting maxBlockSize condition.
1724  *  Note : in a ring buffer scenario,
1725  *  blocks are presumed decompressed next to each other.
1726  *  When not enough space remains for next block (remainingSize < maxBlockSize),
1727  *  decoding resumes from beginning of ring buffer.
1728  * @return : minimum ring buffer size,
1729  *           or 0 if there is an error (invalid maxBlockSize).
1730  */
1731 int LZ4_decoderRingBufferSize(int maxBlockSize)
1732 {
1733     if (maxBlockSize < 0) return 0;
1734     if (maxBlockSize > LZ4_MAX_INPUT_SIZE) return 0;
1735     if (maxBlockSize < 16) maxBlockSize = 16;
1736     return LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize);
1737 }
1738
1739 /*
1740 *_continue() :
1741     These decoding functions allow decompression of multiple blocks in "streaming" mode.
1742     Previously decoded blocks must still be available at the memory position where they were decoded.
1743     If it's not possible, save the relevant part of decoded data into a safe buffer,
1744     and indicate where it stands using LZ4_setStreamDecode()
1745 */
1746 LZ4_FORCE_O2_GCC_PPC64LE
1747 int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize)
1748 {
1749     LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
1750     int result;
1751
1752     if (lz4sd->prefixSize == 0) {
1753         /* The first call, no dictionary yet. */
1754         assert(lz4sd->extDictSize == 0);
1755         result = LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize);
1756         if (result <= 0) return result;
1757         lz4sd->prefixSize = result;
1758         lz4sd->prefixEnd = (BYTE*)dest + result;
1759     } else if (lz4sd->prefixEnd == (BYTE*)dest) {
1760         /* They're rolling the current segment. */
1761         if (lz4sd->prefixSize >= 64 KB - 1)
1762             result = LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize);
1763         else if (lz4sd->extDictSize == 0)
1764             result = LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize,
1765                                                          lz4sd->prefixSize);
1766         else
1767             result = LZ4_decompress_safe_doubleDict(source, dest, compressedSize, maxOutputSize,
1768                                                     lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
1769         if (result <= 0) return result;
1770         lz4sd->prefixSize += result;
1771         lz4sd->prefixEnd  += result;
1772     } else {
1773         /* The buffer wraps around, or they're switching to another buffer. */
1774         lz4sd->extDictSize = lz4sd->prefixSize;
1775         lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
1776         result = LZ4_decompress_safe_extDict(source, dest, compressedSize, maxOutputSize,
1777                                              lz4sd->externalDict, lz4sd->extDictSize);
1778         if (result <= 0) return result;
1779         lz4sd->prefixSize = result;
1780         lz4sd->prefixEnd  = (BYTE*)dest + result;
1781     }
1782
1783     return result;
1784 }
1785
1786 LZ4_FORCE_O2_GCC_PPC64LE
1787 int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize)
1788 {
1789     LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
1790     int result;
1791
1792     if (lz4sd->prefixSize == 0) {
1793         assert(lz4sd->extDictSize == 0);
1794         result = LZ4_decompress_fast(source, dest, originalSize);
1795         if (result <= 0) return result;
1796         lz4sd->prefixSize = originalSize;
1797         lz4sd->prefixEnd = (BYTE*)dest + originalSize;
1798     } else if (lz4sd->prefixEnd == (BYTE*)dest) {
1799         if (lz4sd->prefixSize >= 64 KB - 1 || lz4sd->extDictSize == 0)
1800             result = LZ4_decompress_fast(source, dest, originalSize);
1801         else
1802             result = LZ4_decompress_fast_doubleDict(source, dest, originalSize,
1803                                                     lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
1804         if (result <= 0) return result;
1805         lz4sd->prefixSize += originalSize;
1806         lz4sd->prefixEnd  += originalSize;
1807     } else {
1808         lz4sd->extDictSize = lz4sd->prefixSize;
1809         lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
1810         result = LZ4_decompress_fast_extDict(source, dest, originalSize,
1811                                              lz4sd->externalDict, lz4sd->extDictSize);
1812         if (result <= 0) return result;
1813         lz4sd->prefixSize = originalSize;
1814         lz4sd->prefixEnd  = (BYTE*)dest + originalSize;
1815     }
1816
1817     return result;
1818 }
1819
1820
1821 /*
1822 Advanced decoding functions :
1823 *_usingDict() :
1824     These decoding functions work the same as "_continue" ones,
1825     the dictionary must be explicitly provided within parameters
1826 */
1827
1828 int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
1829 {
1830     if (dictSize==0)
1831         return LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize);
1832     if (dictStart+dictSize == dest) {
1833         if (dictSize >= 64 KB - 1)
1834             return LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize);
1835         return LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, dictSize);
1836     }
1837     return LZ4_decompress_safe_extDict(source, dest, compressedSize, maxOutputSize, dictStart, dictSize);
1838 }
1839
1840 int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize)
1841 {
1842     if (dictSize==0 || dictStart+dictSize == dest)
1843         return LZ4_decompress_fast(source, dest, originalSize);
1844     return LZ4_decompress_fast_extDict(source, dest, originalSize, dictStart, dictSize);
1845 }
1846
1847
1848 /*=*************************************************
1849 *  Obsolete Functions
1850 ***************************************************/
1851 /* obsolete compression functions */
1852 int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize)
1853 {
1854     return LZ4_compress_default(source, dest, inputSize, maxOutputSize);
1855 }
1856 int LZ4_compress(const char* source, char* dest, int inputSize)
1857 {
1858     return LZ4_compress_default(source, dest, inputSize, LZ4_compressBound(inputSize));
1859 }
1860 int LZ4_compress_limitedOutput_withState (void* state, const char* src, char* dst, int srcSize, int dstSize)
1861 {
1862     return LZ4_compress_fast_extState(state, src, dst, srcSize, dstSize, 1);
1863 }
1864 int LZ4_compress_withState (void* state, const char* src, char* dst, int srcSize)
1865 {
1866     return LZ4_compress_fast_extState(state, src, dst, srcSize, LZ4_compressBound(srcSize), 1);
1867 }
1868 int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* src, char* dst, int srcSize, int dstCapacity)
1869 {
1870     return LZ4_compress_fast_continue(LZ4_stream, src, dst, srcSize, dstCapacity, 1);
1871 }
1872 int LZ4_compress_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize)
1873 {
1874     return LZ4_compress_fast_continue(LZ4_stream, source, dest, inputSize, LZ4_compressBound(inputSize), 1);
1875 }
1876
1877 /*
1878 These decompression functions are deprecated and should no longer be used.
1879 They are only provided here for compatibility with older user programs.
1880 - LZ4_uncompress is totally equivalent to LZ4_decompress_fast
1881 - LZ4_uncompress_unknownOutputSize is totally equivalent to LZ4_decompress_safe
1882 */
1883 int LZ4_uncompress (const char* source, char* dest, int outputSize)
1884 {
1885     return LZ4_decompress_fast(source, dest, outputSize);
1886 }
1887 int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize)
1888 {
1889     return LZ4_decompress_safe(source, dest, isize, maxOutputSize);
1890 }
1891
1892 /* Obsolete Streaming functions */
1893
1894 int LZ4_sizeofStreamState() { return LZ4_STREAMSIZE; }
1895
1896 int LZ4_resetStreamState(void* state, char* inputBuffer)
1897 {
1898     (void)inputBuffer;
1899     LZ4_resetStream((LZ4_stream_t*)state);
1900     return 0;
1901 }
1902
1903 void* LZ4_create (char* inputBuffer)
1904 {
1905     (void)inputBuffer;
1906     return LZ4_createStream();
1907 }
1908
1909 char* LZ4_slideInputBuffer (void* state)
1910 {
1911     /* avoid const char * -> char * conversion warning */
1912     return (char *)(uptrval)((LZ4_stream_t*)state)->internal_donotuse.dictionary;
1913 }
1914
1915 #endif   /* LZ4_COMMONDEFS_ONLY */