mfbt/lz4/lz4.c

   1 /*
   2    LZ4 - Fast LZ compression algorithm
   3    Copyright (C) 2011-2023, Yann Collet.
   4
   5    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
   6
   7    Redistribution and use in source and binary forms, with or without
   8    modification, are permitted provided that the following conditions are
   9    met:
  10
  11        * Redistributions of source code must retain the above copyright
  12    notice, this list of conditions and the following disclaimer.
  13        * Redistributions in binary form must reproduce the above
  14    copyright notice, this list of conditions and the following disclaimer
  15    in the documentation and/or other materials provided with the
  16    distribution.
  17
  18    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  22    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  23    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  24    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30    You can contact the author at :
  31     - LZ4 homepage : http://www.lz4.org
  32     - LZ4 source repository : https://github.com/lz4/lz4
  33 */
  34
  35 /*-************************************
  36 *  Tuning parameters
  37 **************************************/
  38 /*
  39  * LZ4_HEAPMODE :
  40  * Select how stateless compression functions like `LZ4_compress_default()`
  41  * allocate memory for their hash table,
  42  * in memory stack (0:default, fastest), or in memory heap (1:requires malloc()).
  43  */
  44 #ifndef LZ4_HEAPMODE
  45 #  define LZ4_HEAPMODE 0
  46 #endif
  47
  48 /*
  49  * LZ4_ACCELERATION_DEFAULT :
  50  * Select "acceleration" for LZ4_compress_fast() when parameter value <= 0
  51  */
  52 #define LZ4_ACCELERATION_DEFAULT 1
  53 /*
  54  * LZ4_ACCELERATION_MAX :
  55  * Any "acceleration" value higher than this threshold
  56  * get treated as LZ4_ACCELERATION_MAX instead (fix #876)
  57  */
  58 #define LZ4_ACCELERATION_MAX 65537
  59
  60
  61 /*-************************************
  62 *  CPU Feature Detection
  63 **************************************/
  64 /* LZ4_FORCE_MEMORY_ACCESS
  65  * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
  66  * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
  67  * The below switch allow to select different access method for improved performance.
  68  * Method 0 (default) : use `memcpy()`. Safe and portable.
  69  * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
  70  *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
  71  * Method 2 : direct access. This method is portable but violate C standard.
  72  *            It can generate buggy code on targets which assembly generation depends on alignment.
  73  *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
  74  * See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
  75  * Prefer these methods in priority order (0 > 1 > 2)
  76  */
  77 #ifndef LZ4_FORCE_MEMORY_ACCESS   /* can be defined externally */
  78 #  if defined(__GNUC__) && \
  79   ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) \
  80   || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
  81 #    define LZ4_FORCE_MEMORY_ACCESS 2
  82 #  elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__) || defined(_MSC_VER)
  83 #    define LZ4_FORCE_MEMORY_ACCESS 1
  84 #  endif
  85 #endif
  86
  87 /*
  88  * LZ4_FORCE_SW_BITCOUNT
  89  * Define this parameter if your target system or compiler does not support hardware bit count
  90  */
  91 #if defined(_MSC_VER) && defined(_WIN32_WCE)   /* Visual Studio for WinCE doesn't support Hardware bit count */
  92 #  undef  LZ4_FORCE_SW_BITCOUNT  /* avoid double def */
  93 #  define LZ4_FORCE_SW_BITCOUNT
  94 #endif
  95
  96
  97
  98 /*-************************************
  99 *  Dependency
 100 **************************************/
 101 /*
 102  * LZ4_SRC_INCLUDED:
 103  * Amalgamation flag, whether lz4.c is included
 104  */
 105 #ifndef LZ4_SRC_INCLUDED
 106 #  define LZ4_SRC_INCLUDED 1
 107 #endif
 108
 109 #ifndef LZ4_DISABLE_DEPRECATE_WARNINGS
 110 #  define LZ4_DISABLE_DEPRECATE_WARNINGS /* due to LZ4_decompress_safe_withPrefix64k */
 111 #endif
 112
 113 #ifndef LZ4_STATIC_LINKING_ONLY
 114 #  define LZ4_STATIC_LINKING_ONLY
 115 #endif
 116 #include "lz4.h"
 117 /* see also "memory routines" below */
 118
 119
 120 /*-************************************
 121 *  Compiler Options
 122 **************************************/
 123 #if defined(_MSC_VER) && (_MSC_VER >= 1400)  /* Visual Studio 2005+ */
 124 #  include <intrin.h>               /* only present in VS2005+ */
 125 #  pragma warning(disable : 4127)   /* disable: C4127: conditional expression is constant */
 126 #  pragma warning(disable : 6237)   /* disable: C6237: conditional expression is always 0 */
 127 #  pragma warning(disable : 6239)   /* disable: C6239: (<non-zero constant> && <expression>) always evaluates to the result of <expression> */
 128 #  pragma warning(disable : 6240)   /* disable: C6240: (<expression> && <non-zero constant>) always evaluates to the result of <expression> */
 129 #  pragma warning(disable : 6326)   /* disable: C6326: Potential comparison of a constant with another constant */
 130 #endif  /* _MSC_VER */
 131
 132 #ifndef LZ4_FORCE_INLINE
 133 #  if defined (_MSC_VER) && !defined (__clang__)    /* MSVC */
 134 #    define LZ4_FORCE_INLINE static __forceinline
 135 #  else
 136 #    if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
 137 #      if defined (__GNUC__) || defined (__clang__)
 138 #        define LZ4_FORCE_INLINE static inline __attribute__((always_inline))
 139 #      else
 140 #        define LZ4_FORCE_INLINE static inline
 141 #      endif
 142 #    else
 143 #      define LZ4_FORCE_INLINE static
 144 #    endif /* __STDC_VERSION__ */
 145 #  endif  /* _MSC_VER */
 146 #endif /* LZ4_FORCE_INLINE */
 147
 148 /* LZ4_FORCE_O2 and LZ4_FORCE_INLINE
 149  * gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8,
 150  * together with a simple 8-byte copy loop as a fall-back path.
 151  * However, this optimization hurts the decompression speed by >30%,
 152  * because the execution does not go to the optimized loop
 153  * for typical compressible data, and all of the preamble checks
 154  * before going to the fall-back path become useless overhead.
 155  * This optimization happens only with the -O3 flag, and -O2 generates
 156  * a simple 8-byte copy loop.
 157  * With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy8
 158  * functions are annotated with __attribute__((optimize("O2"))),
 159  * and also LZ4_wildCopy8 is forcibly inlined, so that the O2 attribute
 160  * of LZ4_wildCopy8 does not affect the compression speed.
 161  */
 162 #if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__)
 163 #  define LZ4_FORCE_O2  __attribute__((optimize("O2")))
 164 #  undef LZ4_FORCE_INLINE
 165 #  define LZ4_FORCE_INLINE  static __inline __attribute__((optimize("O2"),always_inline))
 166 #else
 167 #  define LZ4_FORCE_O2
 168 #endif
 169
 170 #if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__)
 171 #  define expect(expr,value)    (__builtin_expect ((expr),(value)) )
 172 #else
 173 #  define expect(expr,value)    (expr)
 174 #endif
 175
 176 #ifndef likely
 177 #define likely(expr)     expect((expr) != 0, 1)
 178 #endif
 179 #ifndef unlikely
 180 #define unlikely(expr)   expect((expr) != 0, 0)
 181 #endif
 182
 183 /* Should the alignment test prove unreliable, for some reason,
 184  * it can be disabled by setting LZ4_ALIGN_TEST to 0 */
 185 #ifndef LZ4_ALIGN_TEST  /* can be externally provided */
 186 # define LZ4_ALIGN_TEST 1
 187 #endif
 188
 189
 190 /*-************************************
 191 *  Memory routines
 192 **************************************/
 193
 194 /*! LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION :
 195  *  Disable relatively high-level LZ4/HC functions that use dynamic memory
 196  *  allocation functions (malloc(), calloc(), free()).
 197  *
 198  *  Note that this is a compile-time switch. And since it disables
 199  *  public/stable LZ4 v1 API functions, we don't recommend using this
 200  *  symbol to generate a library for distribution.
 201  *
 202  *  The following public functions are removed when this symbol is defined.
 203  *  - lz4   : LZ4_createStream, LZ4_freeStream,
 204  *            LZ4_createStreamDecode, LZ4_freeStreamDecode, LZ4_create (deprecated)
 205  *  - lz4hc : LZ4_createStreamHC, LZ4_freeStreamHC,
 206  *            LZ4_createHC (deprecated), LZ4_freeHC  (deprecated)
 207  *  - lz4frame, lz4file : All LZ4F_* functions
 208  */
 209 #if defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
 210 #  define ALLOC(s)          lz4_error_memory_allocation_is_disabled
 211 #  define ALLOC_AND_ZERO(s) lz4_error_memory_allocation_is_disabled
 212 #  define FREEMEM(p)        lz4_error_memory_allocation_is_disabled
 213 #elif defined(LZ4_USER_MEMORY_FUNCTIONS)
 214 /* memory management functions can be customized by user project.
 215  * Below functions must exist somewhere in the Project
 216  * and be available at link time */
 217 void* LZ4_malloc(size_t s);
 218 void* LZ4_calloc(size_t n, size_t s);
 219 void  LZ4_free(void* p);
 220 # define ALLOC(s)          LZ4_malloc(s)
 221 # define ALLOC_AND_ZERO(s) LZ4_calloc(1,s)
 222 # define FREEMEM(p)        LZ4_free(p)
 223 #else
 224 # include <stdlib.h>   /* malloc, calloc, free */
 225 # define ALLOC(s)          malloc(s)
 226 # define ALLOC_AND_ZERO(s) calloc(1,s)
 227 # define FREEMEM(p)        free(p)
 228 #endif
 229
 230 #if ! LZ4_FREESTANDING
 231 #  include <string.h>   /* memset, memcpy */
 232 #endif
 233 #if !defined(LZ4_memset)
 234 #  define LZ4_memset(p,v,s) memset((p),(v),(s))
 235 #endif
 236 #define MEM_INIT(p,v,s)   LZ4_memset((p),(v),(s))
 237
 238
 239 /*-************************************
 240 *  Common Constants
 241 **************************************/
 242 #define MINMATCH 4
 243
 244 #define WILDCOPYLENGTH 8
 245 #define LASTLITERALS   5   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
 246 #define MFLIMIT       12   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
 247 #define MATCH_SAFEGUARD_DISTANCE  ((2*WILDCOPYLENGTH) - MINMATCH)   /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */
 248 #define FASTLOOP_SAFE_DISTANCE 64
 249 static const int LZ4_minLength = (MFLIMIT+1);
 250
 251 #define KB *(1 <<10)
 252 #define MB *(1 <<20)
 253 #define GB *(1U<<30)
 254
 255 #define LZ4_DISTANCE_ABSOLUTE_MAX 65535
 256 #if (LZ4_DISTANCE_MAX > LZ4_DISTANCE_ABSOLUTE_MAX)   /* max supported by LZ4 format */
 257 #  error "LZ4_DISTANCE_MAX is too big : must be <= 65535"
 258 #endif
 259
 260 #define ML_BITS  4
 261 #define ML_MASK  ((1U<<ML_BITS)-1)
 262 #define RUN_BITS (8-ML_BITS)
 263 #define RUN_MASK ((1U<<RUN_BITS)-1)
 264
 265
 266 /*-************************************
 267 *  Error detection
 268 **************************************/
 269 #if defined(LZ4_DEBUG) && (LZ4_DEBUG>=1)
 270 #  include <assert.h>
 271 #else
 272 #  ifndef assert
 273 #    define assert(condition) ((void)0)
 274 #  endif
 275 #endif
 276
 277 #define LZ4_STATIC_ASSERT(c)   { enum { LZ4_static_assert = 1/(int)(!!(c)) }; }   /* use after variable declarations */
 278
 279 #if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2)
 280 #  include <stdio.h>
 281    static int g_debuglog_enable = 1;
 282 #  define DEBUGLOG(l, ...) {                          \
 283         if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) {  \
 284             fprintf(stderr, __FILE__  " %i: ", __LINE__); \
 285             fprintf(stderr, __VA_ARGS__);             \
 286             fprintf(stderr, " \n");                   \
 287     }   }
 288 #else
 289 #  define DEBUGLOG(l, ...) {}    /* disabled */
 290 #endif
 291
 292 static int LZ4_isAligned(const void* ptr, size_t alignment)
 293 {
 294     return ((size_t)ptr & (alignment -1)) == 0;
 295 }
 296
 297
 298 /*-************************************
 299 *  Types
 300 **************************************/
 301 #include <limits.h>
 302 #if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
 303 # include <stdint.h>
 304   typedef  uint8_t BYTE;
 305   typedef uint16_t U16;
 306   typedef uint32_t U32;
 307   typedef  int32_t S32;
 308   typedef uint64_t U64;
 309   typedef uintptr_t uptrval;
 310 #else
 311 # if UINT_MAX != 4294967295UL
 312 #   error "LZ4 code (when not C++ or C99) assumes that sizeof(int) == 4"
 313 # endif
 314   typedef unsigned char       BYTE;
 315   typedef unsigned short      U16;
 316   typedef unsigned int        U32;
 317   typedef   signed int        S32;
 318   typedef unsigned long long  U64;
 319   typedef size_t              uptrval;   /* generally true, except OpenVMS-64 */
 320 #endif
 321
 322 #if defined(__x86_64__)
 323   typedef U64    reg_t;   /* 64-bits in x32 mode */
 324 #else
 325   typedef size_t reg_t;   /* 32-bits in x32 mode */
 326 #endif
 327
 328 typedef enum {
 329     notLimited = 0,
 330     limitedOutput = 1,
 331     fillOutput = 2
 332 } limitedOutput_directive;
 333
 334
 335 /*-************************************
 336 *  Reading and writing into memory
 337 **************************************/
 338
 339 /**
 340  * LZ4 relies on memcpy with a constant size being inlined. In freestanding
 341  * environments, the compiler can't assume the implementation of memcpy() is
 342  * standard compliant, so it can't apply its specialized memcpy() inlining
 343  * logic. When possible, use __builtin_memcpy() to tell the compiler to analyze
 344  * memcpy() as if it were standard compliant, so it can inline it in freestanding
 345  * environments. This is needed when decompressing the Linux Kernel, for example.
 346  */
 347 #if !defined(LZ4_memcpy)
 348 #  if defined(__GNUC__) && (__GNUC__ >= 4)
 349 #    define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size)
 350 #  else
 351 #    define LZ4_memcpy(dst, src, size) memcpy(dst, src, size)
 352 #  endif
 353 #endif
 354
 355 #if !defined(LZ4_memmove)
 356 #  if defined(__GNUC__) && (__GNUC__ >= 4)
 357 #    define LZ4_memmove __builtin_memmove
 358 #  else
 359 #    define LZ4_memmove memmove
 360 #  endif
 361 #endif
 362
 363 static unsigned LZ4_isLittleEndian(void)
 364 {
 365     const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental */
 366     return one.c[0];
 367 }
 368
 369 #if defined(__GNUC__) || defined(__INTEL_COMPILER)
 370 #define LZ4_PACK( __Declaration__ ) __Declaration__ __attribute__((__packed__))
 371 #elif defined(_MSC_VER)
 372 #define LZ4_PACK( __Declaration__ ) __pragma( pack(push, 1) ) __Declaration__ __pragma( pack(pop))
 373 #endif
 374
 375 #if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2)
 376 /* lie to the compiler about data alignment; use with caution */
 377
 378 static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; }
 379 static U32 LZ4_read32(const void* memPtr) { return *(const U32*) memPtr; }
 380 static reg_t LZ4_read_ARCH(const void* memPtr) { return *(const reg_t*) memPtr; }
 381
 382 static void LZ4_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
 383 static void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
 384
 385 #elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1)
 386
 387 /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
 388 /* currently only defined for gcc and icc */
 389 LZ4_PACK(typedef struct { U16 u16; }) LZ4_unalign16;
 390 LZ4_PACK(typedef struct { U32 u32; }) LZ4_unalign32;
 391 LZ4_PACK(typedef struct { reg_t uArch; }) LZ4_unalignST;
 392
 393 static U16 LZ4_read16(const void* ptr) { return ((const LZ4_unalign16*)ptr)->u16; }
 394 static U32 LZ4_read32(const void* ptr) { return ((const LZ4_unalign32*)ptr)->u32; }
 395 static reg_t LZ4_read_ARCH(const void* ptr) { return ((const LZ4_unalignST*)ptr)->uArch; }
 396
 397 static void LZ4_write16(void* memPtr, U16 value) { ((LZ4_unalign16*)memPtr)->u16 = value; }
 398 static void LZ4_write32(void* memPtr, U32 value) { ((LZ4_unalign32*)memPtr)->u32 = value; }
 399
 400 #else  /* safe and portable access using memcpy() */
 401
 402 static U16 LZ4_read16(const void* memPtr)
 403 {
 404     U16 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
 405 }
 406
 407 static U32 LZ4_read32(const void* memPtr)
 408 {
 409     U32 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
 410 }
 411
 412 static reg_t LZ4_read_ARCH(const void* memPtr)
 413 {
 414     reg_t val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
 415 }
 416
 417 static void LZ4_write16(void* memPtr, U16 value)
 418 {
 419     LZ4_memcpy(memPtr, &value, sizeof(value));
 420 }
 421
 422 static void LZ4_write32(void* memPtr, U32 value)
 423 {
 424     LZ4_memcpy(memPtr, &value, sizeof(value));
 425 }
 426
 427 #endif /* LZ4_FORCE_MEMORY_ACCESS */
 428
 429
 430 static U16 LZ4_readLE16(const void* memPtr)
 431 {
 432     if (LZ4_isLittleEndian()) {
 433         return LZ4_read16(memPtr);
 434     } else {
 435         const BYTE* p = (const BYTE*)memPtr;
 436         return (U16)((U16)p[0] | (p[1]<<8));
 437     }
 438 }
 439
 440 #ifdef LZ4_STATIC_LINKING_ONLY_ENDIANNESS_INDEPENDENT_OUTPUT
 441 static U32 LZ4_readLE32(const void* memPtr)
 442 {
 443     if (LZ4_isLittleEndian()) {
 444         return LZ4_read32(memPtr);
 445     } else {
 446         const BYTE* p = (const BYTE*)memPtr;
 447         return (U32)p[0] | (p[1]<<8) | (p[2]<<16) | (p[3]<<24);
 448     }
 449 }
 450 #endif
 451
 452 static void LZ4_writeLE16(void* memPtr, U16 value)
 453 {
 454     if (LZ4_isLittleEndian()) {
 455         LZ4_write16(memPtr, value);
 456     } else {
 457         BYTE* p = (BYTE*)memPtr;
 458         p[0] = (BYTE) value;
 459         p[1] = (BYTE)(value>>8);
 460     }
 461 }
 462
 463 /* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */
 464 LZ4_FORCE_INLINE
 465 void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd)
 466 {
 467     BYTE* d = (BYTE*)dstPtr;
 468     const BYTE* s = (const BYTE*)srcPtr;
 469     BYTE* const e = (BYTE*)dstEnd;
 470
 471     do { LZ4_memcpy(d,s,8); d+=8; s+=8; } while (d<e);
 472 }
 473
 474 static const unsigned inc32table[8] = {0, 1, 2,  1,  0,  4, 4, 4};
 475 static const int      dec64table[8] = {0, 0, 0, -1, -4,  1, 2, 3};
 476
 477
 478 #ifndef LZ4_FAST_DEC_LOOP
 479 #  if defined __i386__ || defined _M_IX86 || defined __x86_64__ || defined _M_X64
 480 #    define LZ4_FAST_DEC_LOOP 1
 481 #  elif defined(__aarch64__) && defined(__APPLE__)
 482 #    define LZ4_FAST_DEC_LOOP 1
 483 #  elif defined(__aarch64__) && !defined(__clang__)
 484      /* On non-Apple aarch64, we disable this optimization for clang because
 485       * on certain mobile chipsets, performance is reduced with clang. For
 486       * more information refer to https://github.com/lz4/lz4/pull/707 */
 487 #    define LZ4_FAST_DEC_LOOP 1
 488 #  else
 489 #    define LZ4_FAST_DEC_LOOP 0
 490 #  endif
 491 #endif
 492
 493 #if LZ4_FAST_DEC_LOOP
 494
 495 LZ4_FORCE_INLINE void
 496 LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
 497 {
 498     assert(srcPtr + offset == dstPtr);
 499     if (offset < 8) {
 500         LZ4_write32(dstPtr, 0);   /* silence an msan warning when offset==0 */
 501         dstPtr[0] = srcPtr[0];
 502         dstPtr[1] = srcPtr[1];
 503         dstPtr[2] = srcPtr[2];
 504         dstPtr[3] = srcPtr[3];
 505         srcPtr += inc32table[offset];
 506         LZ4_memcpy(dstPtr+4, srcPtr, 4);
 507         srcPtr -= dec64table[offset];
 508         dstPtr += 8;
 509     } else {
 510         LZ4_memcpy(dstPtr, srcPtr, 8);
 511         dstPtr += 8;
 512         srcPtr += 8;
 513     }
 514
 515     LZ4_wildCopy8(dstPtr, srcPtr, dstEnd);
 516 }
 517
 518 /* customized variant of memcpy, which can overwrite up to 32 bytes beyond dstEnd
 519  * this version copies two times 16 bytes (instead of one time 32 bytes)
 520  * because it must be compatible with offsets >= 16. */
 521 LZ4_FORCE_INLINE void
 522 LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd)
 523 {
 524     BYTE* d = (BYTE*)dstPtr;
 525     const BYTE* s = (const BYTE*)srcPtr;
 526     BYTE* const e = (BYTE*)dstEnd;
 527
 528     do { LZ4_memcpy(d,s,16); LZ4_memcpy(d+16,s+16,16); d+=32; s+=32; } while (d<e);
 529 }
 530
 531 /* LZ4_memcpy_using_offset()  presumes :
 532  * - dstEnd >= dstPtr + MINMATCH
 533  * - there is at least 12 bytes available to write after dstEnd */
 534 LZ4_FORCE_INLINE void
 535 LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
 536 {
 537     BYTE v[8];
 538
 539     assert(dstEnd >= dstPtr + MINMATCH);
 540
 541     switch(offset) {
 542     case 1:
 543         MEM_INIT(v, *srcPtr, 8);
 544         break;
 545     case 2:
 546         LZ4_memcpy(v, srcPtr, 2);
 547         LZ4_memcpy(&v[2], srcPtr, 2);
 548 #if defined(_MSC_VER) && (_MSC_VER <= 1937) /* MSVC 2022 ver 17.7 or earlier */
 549 #  pragma warning(push)
 550 #  pragma warning(disable : 6385) /* warning C6385: Reading invalid data from 'v'. */
 551 #endif
 552         LZ4_memcpy(&v[4], v, 4);
 553 #if defined(_MSC_VER) && (_MSC_VER <= 1937) /* MSVC 2022 ver 17.7 or earlier */
 554 #  pragma warning(pop)
 555 #endif
 556         break;
 557     case 4:
 558         LZ4_memcpy(v, srcPtr, 4);
 559         LZ4_memcpy(&v[4], srcPtr, 4);
 560         break;
 561     default:
 562         LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset);
 563         return;
 564     }
 565
 566     LZ4_memcpy(dstPtr, v, 8);
 567     dstPtr += 8;
 568     while (dstPtr < dstEnd) {
 569         LZ4_memcpy(dstPtr, v, 8);
 570         dstPtr += 8;
 571     }
 572 }
 573 #endif
 574
 575
 576 /*-************************************
 577 *  Common functions
 578 **************************************/
 579 static unsigned LZ4_NbCommonBytes (reg_t val)
 580 {
 581     assert(val != 0);
 582     if (LZ4_isLittleEndian()) {
 583         if (sizeof(val) == 8) {
 584 #       if defined(_MSC_VER) && (_MSC_VER >= 1800) && (defined(_M_AMD64) && !defined(_M_ARM64EC)) && !defined(LZ4_FORCE_SW_BITCOUNT)
 585 /*-*************************************************************************************************
 586 * ARM64EC is a Microsoft-designed ARM64 ABI compatible with AMD64 applications on ARM64 Windows 11.
 587 * The ARM64EC ABI does not support AVX/AVX2/AVX512 instructions, nor their relevant intrinsics
 588 * including _tzcnt_u64. Therefore, we need to neuter the _tzcnt_u64 code path for ARM64EC.
 589 ****************************************************************************************************/
 590 #         if defined(__clang__) && (__clang_major__ < 10)
 591             /* Avoid undefined clang-cl intrinsics issue.
 592              * See https://github.com/lz4/lz4/pull/1017 for details. */
 593             return (unsigned)__builtin_ia32_tzcnt_u64(val) >> 3;
 594 #         else
 595             /* x64 CPUS without BMI support interpret `TZCNT` as `REP BSF` */
 596             return (unsigned)_tzcnt_u64(val) >> 3;
 597 #         endif
 598 #       elif defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
 599             unsigned long r = 0;
 600             _BitScanForward64(&r, (U64)val);
 601             return (unsigned)r >> 3;
 602 #       elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
 603                             ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
 604                                         !defined(LZ4_FORCE_SW_BITCOUNT)
 605             return (unsigned)__builtin_ctzll((U64)val) >> 3;
 606 #       else
 607             const U64 m = 0x0101010101010101ULL;
 608             val ^= val - 1;
 609             return (unsigned)(((U64)((val & (m - 1)) * m)) >> 56);
 610 #       endif
 611         } else /* 32 bits */ {
 612 #       if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(LZ4_FORCE_SW_BITCOUNT)
 613             unsigned long r;
 614             _BitScanForward(&r, (U32)val);
 615             return (unsigned)r >> 3;
 616 #       elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
 617                             ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
 618                         !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT)
 619             return (unsigned)__builtin_ctz((U32)val) >> 3;
 620 #       else
 621             const U32 m = 0x01010101;
 622             return (unsigned)((((val - 1) ^ val) & (m - 1)) * m) >> 24;
 623 #       endif
 624         }
 625     } else   /* Big Endian CPU */ {
 626         if (sizeof(val)==8) {
 627 #       if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
 628                             ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
 629                         !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT)
 630             return (unsigned)__builtin_clzll((U64)val) >> 3;
 631 #       else
 632 #if 1
 633             /* this method is probably faster,
 634              * but adds a 128 bytes lookup table */
 635             static const unsigned char ctz7_tab[128] = {
 636                 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
 637                 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
 638                 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
 639                 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
 640                 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
 641                 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
 642                 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
 643                 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
 644             };
 645             U64 const mask = 0x0101010101010101ULL;
 646             U64 const t = (((val >> 8) - mask) | val) & mask;
 647             return ctz7_tab[(t * 0x0080402010080402ULL) >> 57];
 648 #else
 649             /* this method doesn't consume memory space like the previous one,
 650              * but it contains several branches,
 651              * that may end up slowing execution */
 652             static const U32 by32 = sizeof(val)*4;  /* 32 on 64 bits (goal), 16 on 32 bits.
 653             Just to avoid some static analyzer complaining about shift by 32 on 32-bits target.
 654             Note that this code path is never triggered in 32-bits mode. */
 655             unsigned r;
 656             if (!(val>>by32)) { r=4; } else { r=0; val>>=by32; }
 657             if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
 658             r += (!val);
 659             return r;
 660 #endif
 661 #       endif
 662         } else /* 32 bits */ {
 663 #       if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
 664                             ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
 665                                         !defined(LZ4_FORCE_SW_BITCOUNT)
 666             return (unsigned)__builtin_clz((U32)val) >> 3;
 667 #       else
 668             val >>= 8;
 669             val = ((((val + 0x00FFFF00) | 0x00FFFFFF) + val) |
 670               (val + 0x00FF0000)) >> 24;
 671             return (unsigned)val ^ 3;
 672 #       endif
 673         }
 674     }
 675 }
 676
 677
 678 #define STEPSIZE sizeof(reg_t)
 679 LZ4_FORCE_INLINE
 680 unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
 681 {
 682     const BYTE* const pStart = pIn;
 683
 684     if (likely(pIn < pInLimit-(STEPSIZE-1))) {
 685         reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
 686         if (!diff) {
 687             pIn+=STEPSIZE; pMatch+=STEPSIZE;
 688         } else {
 689             return LZ4_NbCommonBytes(diff);
 690     }   }
 691
 692     while (likely(pIn < pInLimit-(STEPSIZE-1))) {
 693         reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
 694         if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; continue; }
 695         pIn += LZ4_NbCommonBytes(diff);
 696         return (unsigned)(pIn - pStart);
 697     }
 698
 699     if ((STEPSIZE==8) && (pIn<(pInLimit-3)) && (LZ4_read32(pMatch) == LZ4_read32(pIn))) { pIn+=4; pMatch+=4; }
 700     if ((pIn<(pInLimit-1)) && (LZ4_read16(pMatch) == LZ4_read16(pIn))) { pIn+=2; pMatch+=2; }
 701     if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
 702     return (unsigned)(pIn - pStart);
 703 }
 704
 705
 706 #ifndef LZ4_COMMONDEFS_ONLY
 707 /*-************************************
 708 *  Local Constants
 709 **************************************/
 710 static const int LZ4_64Klimit = ((64 KB) + (MFLIMIT-1));
 711 static const U32 LZ4_skipTrigger = 6;  /* Increase this value ==> compression run slower on incompressible data */
 712
 713
 714 /*-************************************
 715 *  Local Structures and types
 716 **************************************/
 717 typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t;
 718
 719 /**
 720  * This enum distinguishes several different modes of accessing previous
 721  * content in the stream.
 722  *
 723  * - noDict        : There is no preceding content.
 724  * - withPrefix64k : Table entries up to ctx->dictSize before the current blob
 725  *                   blob being compressed are valid and refer to the preceding
 726  *                   content (of length ctx->dictSize), which is available
 727  *                   contiguously preceding in memory the content currently
 728  *                   being compressed.
 729  * - usingExtDict  : Like withPrefix64k, but the preceding content is somewhere
 730  *                   else in memory, starting at ctx->dictionary with length
 731  *                   ctx->dictSize.
 732  * - usingDictCtx  : Everything concerning the preceding content is
 733  *                   in a separate context, pointed to by ctx->dictCtx.
 734  *                   ctx->dictionary, ctx->dictSize, and table entries
 735  *                   in the current context that refer to positions
 736  *                   preceding the beginning of the current compression are
 737  *                   ignored. Instead, ctx->dictCtx->dictionary and ctx->dictCtx
 738  *                   ->dictSize describe the location and size of the preceding
 739  *                   content, and matches are found by looking in the ctx
 740  *                   ->dictCtx->hashTable.
 741  */
 742 typedef enum { noDict = 0, withPrefix64k, usingExtDict, usingDictCtx } dict_directive;
 743 typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
 744
 745
 746 /*-************************************
 747 *  Local Utils
 748 **************************************/
 749 int LZ4_versionNumber (void) { return LZ4_VERSION_NUMBER; }
 750 const char* LZ4_versionString(void) { return LZ4_VERSION_STRING; }
 751 int LZ4_compressBound(int isize)  { return LZ4_COMPRESSBOUND(isize); }
 752 int LZ4_sizeofState(void) { return sizeof(LZ4_stream_t); }
 753
 754
 755 /*-****************************************
 756 *  Internal Definitions, used only in Tests
 757 *******************************************/
 758 #if defined (__cplusplus)
 759 extern "C" {
 760 #endif
 761
 762 int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize);
 763
 764 int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
 765                                      int compressedSize, int maxOutputSize,
 766                                      const void* dictStart, size_t dictSize);
 767 int LZ4_decompress_safe_partial_forceExtDict(const char* source, char* dest,
 768                                      int compressedSize, int targetOutputSize, int dstCapacity,
 769                                      const void* dictStart, size_t dictSize);
 770 #if defined (__cplusplus)
 771 }
 772 #endif
 773
 774 /*-******************************
 775 *  Compression functions
 776 ********************************/
 777 LZ4_FORCE_INLINE U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
 778 {
 779     if (tableType == byU16)
 780         return ((sequence * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1)));
 781     else
 782         return ((sequence * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG));
 783 }
 784
 785 LZ4_FORCE_INLINE U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
 786 {
 787     const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG;
 788     if (LZ4_isLittleEndian()) {
 789         const U64 prime5bytes = 889523592379ULL;
 790         return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog));
 791     } else {
 792         const U64 prime8bytes = 11400714785074694791ULL;
 793         return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog));
 794     }
 795 }
 796
 797 LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tableType)
 798 {
 799     if ((sizeof(reg_t)==8) && (tableType != byU16)) return LZ4_hash5(LZ4_read_ARCH(p), tableType);
 800
 801 #ifdef LZ4_STATIC_LINKING_ONLY_ENDIANNESS_INDEPENDENT_OUTPUT
 802     return LZ4_hash4(LZ4_readLE32(p), tableType);
 803 #else
 804     return LZ4_hash4(LZ4_read32(p), tableType);
 805 #endif
 806 }
 807
 808 LZ4_FORCE_INLINE void LZ4_clearHash(U32 h, void* tableBase, tableType_t const tableType)
 809 {
 810     switch (tableType)
 811     {
 812     default: /* fallthrough */
 813     case clearedTable: { /* illegal! */ assert(0); return; }
 814     case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = NULL; return; }
 815     case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = 0; return; }
 816     case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = 0; return; }
 817     }
 818 }
 819
 820 LZ4_FORCE_INLINE void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t const tableType)
 821 {
 822     switch (tableType)
 823     {
 824     default: /* fallthrough */
 825     case clearedTable: /* fallthrough */
 826     case byPtr: { /* illegal! */ assert(0); return; }
 827     case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = idx; return; }
 828     case byU16: { U16* hashTable = (U16*) tableBase; assert(idx < 65536); hashTable[h] = (U16)idx; return; }
 829     }
 830 }
 831
 832 /* LZ4_putPosition*() : only used in byPtr mode */
 833 LZ4_FORCE_INLINE void LZ4_putPositionOnHash(const BYTE* p, U32 h,
 834                                   void* tableBase, tableType_t const tableType)
 835 {
 836     const BYTE** const hashTable = (const BYTE**)tableBase;
 837     assert(tableType == byPtr); (void)tableType;
 838     hashTable[h] = p;
 839 }
 840
 841 LZ4_FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType)
 842 {
 843     U32 const h = LZ4_hashPosition(p, tableType);
 844     LZ4_putPositionOnHash(p, h, tableBase, tableType);
 845 }
 846
 847 /* LZ4_getIndexOnHash() :
 848  * Index of match position registered in hash table.
 849  * hash position must be calculated by using base+index, or dictBase+index.
 850  * Assumption 1 : only valid if tableType == byU32 or byU16.
 851  * Assumption 2 : h is presumed valid (within limits of hash table)
 852  */
 853 LZ4_FORCE_INLINE U32 LZ4_getIndexOnHash(U32 h, const void* tableBase, tableType_t tableType)
 854 {
 855     LZ4_STATIC_ASSERT(LZ4_MEMORY_USAGE > 2);
 856     if (tableType == byU32) {
 857         const U32* const hashTable = (const U32*) tableBase;
 858         assert(h < (1U << (LZ4_MEMORY_USAGE-2)));
 859         return hashTable[h];
 860     }
 861     if (tableType == byU16) {
 862         const U16* const hashTable = (const U16*) tableBase;
 863         assert(h < (1U << (LZ4_MEMORY_USAGE-1)));
 864         return hashTable[h];
 865     }
 866     assert(0); return 0;  /* forbidden case */
 867 }
 868
 869 static const BYTE* LZ4_getPositionOnHash(U32 h, const void* tableBase, tableType_t tableType)
 870 {
 871     assert(tableType == byPtr); (void)tableType;
 872     { const BYTE* const* hashTable = (const BYTE* const*) tableBase; return hashTable[h]; }
 873 }
 874
 875 LZ4_FORCE_INLINE const BYTE*
 876 LZ4_getPosition(const BYTE* p,
 877                 const void* tableBase, tableType_t tableType)
 878 {
 879     U32 const h = LZ4_hashPosition(p, tableType);
 880     return LZ4_getPositionOnHash(h, tableBase, tableType);
 881 }
 882
 883 LZ4_FORCE_INLINE void
 884 LZ4_prepareTable(LZ4_stream_t_internal* const cctx,
 885            const int inputSize,
 886            const tableType_t tableType) {
 887     /* If the table hasn't been used, it's guaranteed to be zeroed out, and is
 888      * therefore safe to use no matter what mode we're in. Otherwise, we figure
 889      * out if it's safe to leave as is or whether it needs to be reset.
 890      */
 891     if ((tableType_t)cctx->tableType != clearedTable) {
 892         assert(inputSize >= 0);
 893         if ((tableType_t)cctx->tableType != tableType
 894           || ((tableType == byU16) && cctx->currentOffset + (unsigned)inputSize >= 0xFFFFU)
 895           || ((tableType == byU32) && cctx->currentOffset > 1 GB)
 896           || tableType == byPtr
 897           || inputSize >= 4 KB)
 898         {
 899             DEBUGLOG(4, "LZ4_prepareTable: Resetting table in %p", cctx);
 900             MEM_INIT(cctx->hashTable, 0, LZ4_HASHTABLESIZE);
 901             cctx->currentOffset = 0;
 902             cctx->tableType = (U32)clearedTable;
 903         } else {
 904             DEBUGLOG(4, "LZ4_prepareTable: Re-use hash table (no reset)");
 905         }
 906     }
 907
 908     /* Adding a gap, so all previous entries are > LZ4_DISTANCE_MAX back,
 909      * is faster than compressing without a gap.
 910      * However, compressing with currentOffset == 0 is faster still,
 911      * so we preserve that case.
 912      */
 913     if (cctx->currentOffset != 0 && tableType == byU32) {
 914         DEBUGLOG(5, "LZ4_prepareTable: adding 64KB to currentOffset");
 915         cctx->currentOffset += 64 KB;
 916     }
 917
 918     /* Finally, clear history */
 919     cctx->dictCtx = NULL;
 920     cctx->dictionary = NULL;
 921     cctx->dictSize = 0;
 922 }
 923
 924 /** LZ4_compress_generic_validated() :
 925  *  inlined, to ensure branches are decided at compilation time.
 926  *  The following conditions are presumed already validated:
 927  *  - source != NULL
 928  *  - inputSize > 0
 929  */
 930 LZ4_FORCE_INLINE int LZ4_compress_generic_validated(
 931                  LZ4_stream_t_internal* const cctx,
 932                  const char* const source,
 933                  char* const dest,
 934                  const int inputSize,
 935                  int*  inputConsumed, /* only written when outputDirective == fillOutput */
 936                  const int maxOutputSize,
 937                  const limitedOutput_directive outputDirective,
 938                  const tableType_t tableType,
 939                  const dict_directive dictDirective,
 940                  const dictIssue_directive dictIssue,
 941                  const int acceleration)
 942 {
 943     int result;
 944     const BYTE* ip = (const BYTE*)source;
 945
 946     U32 const startIndex = cctx->currentOffset;
 947     const BYTE* base = (const BYTE*)source - startIndex;
 948     const BYTE* lowLimit;
 949
 950     const LZ4_stream_t_internal* dictCtx = (const LZ4_stream_t_internal*) cctx->dictCtx;
 951     const BYTE* const dictionary =
 952         dictDirective == usingDictCtx ? dictCtx->dictionary : cctx->dictionary;
 953     const U32 dictSize =
 954         dictDirective == usingDictCtx ? dictCtx->dictSize : cctx->dictSize;
 955     const U32 dictDelta =
 956         (dictDirective == usingDictCtx) ? startIndex - dictCtx->currentOffset : 0;   /* make indexes in dictCtx comparable with indexes in current context */
 957
 958     int const maybe_extMem = (dictDirective == usingExtDict) || (dictDirective == usingDictCtx);
 959     U32 const prefixIdxLimit = startIndex - dictSize;   /* used when dictDirective == dictSmall */
 960     const BYTE* const dictEnd = dictionary ? dictionary + dictSize : dictionary;
 961     const BYTE* anchor = (const BYTE*) source;
 962     const BYTE* const iend = ip + inputSize;
 963     const BYTE* const mflimitPlusOne = iend - MFLIMIT + 1;
 964     const BYTE* const matchlimit = iend - LASTLITERALS;
 965
 966     /* the dictCtx currentOffset is indexed on the start of the dictionary,
 967      * while a dictionary in the current context precedes the currentOffset */
 968     const BYTE* dictBase = (dictionary == NULL) ? NULL :
 969                            (dictDirective == usingDictCtx) ?
 970                             dictionary + dictSize - dictCtx->currentOffset :
 971                             dictionary + dictSize - startIndex;
 972
 973     BYTE* op = (BYTE*) dest;
 974     BYTE* const olimit = op + maxOutputSize;
 975
 976     U32 offset = 0;
 977     U32 forwardH;
 978
 979     DEBUGLOG(5, "LZ4_compress_generic_validated: srcSize=%i, tableType=%u", inputSize, tableType);
 980     assert(ip != NULL);
 981     if (tableType == byU16) assert(inputSize<LZ4_64Klimit);  /* Size too large (not within 64K limit) */
 982     if (tableType == byPtr) assert(dictDirective==noDict);   /* only supported use case with byPtr */
 983     /* If init conditions are not met, we don't have to mark stream
 984      * as having dirty context, since no action was taken yet */
 985     if (outputDirective == fillOutput && maxOutputSize < 1) { return 0; } /* Impossible to store anything */
 986     assert(acceleration >= 1);
 987
 988     lowLimit = (const BYTE*)source - (dictDirective == withPrefix64k ? dictSize : 0);
 989
 990     /* Update context state */
 991     if (dictDirective == usingDictCtx) {
 992         /* Subsequent linked blocks can't use the dictionary. */
 993         /* Instead, they use the block we just compressed. */
 994         cctx->dictCtx = NULL;
 995         cctx->dictSize = (U32)inputSize;
 996     } else {
 997         cctx->dictSize += (U32)inputSize;
 998     }
 999     cctx->currentOffset += (U32)inputSize;
1000     cctx->tableType = (U32)tableType;
1001
1002     if (inputSize<LZ4_minLength) goto _last_literals;        /* Input too small, no compression (all literals) */
1003
1004     /* First Byte */
1005     {   U32 const h = LZ4_hashPosition(ip, tableType);
1006         if (tableType == byPtr) {
1007             LZ4_putPositionOnHash(ip, h, cctx->hashTable, byPtr);
1008         } else {
1009             LZ4_putIndexOnHash(startIndex, h, cctx->hashTable, tableType);
1010     }   }
1011     ip++; forwardH = LZ4_hashPosition(ip, tableType);
1012
1013     /* Main Loop */
1014     for ( ; ; ) {
1015         const BYTE* match;
1016         BYTE* token;
1017         const BYTE* filledIp;
1018
1019         /* Find a match */
1020         if (tableType == byPtr) {
1021             const BYTE* forwardIp = ip;
1022             int step = 1;
1023             int searchMatchNb = acceleration << LZ4_skipTrigger;
1024             do {
1025                 U32 const h = forwardH;
1026                 ip = forwardIp;
1027                 forwardIp += step;
1028                 step = (searchMatchNb++ >> LZ4_skipTrigger);
1029
1030                 if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals;
1031                 assert(ip < mflimitPlusOne);
1032
1033                 match = LZ4_getPositionOnHash(h, cctx->hashTable, tableType);
1034                 forwardH = LZ4_hashPosition(forwardIp, tableType);
1035                 LZ4_putPositionOnHash(ip, h, cctx->hashTable, tableType);
1036
1037             } while ( (match+LZ4_DISTANCE_MAX < ip)
1038                    || (LZ4_read32(match) != LZ4_read32(ip)) );
1039
1040         } else {   /* byU32, byU16 */
1041
1042             const BYTE* forwardIp = ip;
1043             int step = 1;
1044             int searchMatchNb = acceleration << LZ4_skipTrigger;
1045             do {
1046                 U32 const h = forwardH;
1047                 U32 const current = (U32)(forwardIp - base);
1048                 U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType);
1049                 assert(matchIndex <= current);
1050                 assert(forwardIp - base < (ptrdiff_t)(2 GB - 1));
1051                 ip = forwardIp;
1052                 forwardIp += step;
1053                 step = (searchMatchNb++ >> LZ4_skipTrigger);
1054
1055                 if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals;
1056                 assert(ip < mflimitPlusOne);
1057
1058                 if (dictDirective == usingDictCtx) {
1059                     if (matchIndex < startIndex) {
1060                         /* there was no match, try the dictionary */
1061                         assert(tableType == byU32);
1062                         matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32);
1063                         match = dictBase + matchIndex;
1064                         matchIndex += dictDelta;   /* make dictCtx index comparable with current context */
1065                         lowLimit = dictionary;
1066                     } else {
1067                         match = base + matchIndex;
1068                         lowLimit = (const BYTE*)source;
1069                     }
1070                 } else if (dictDirective == usingExtDict) {
1071                     if (matchIndex < startIndex) {
1072                         DEBUGLOG(7, "extDict candidate: matchIndex=%5u  <  startIndex=%5u", matchIndex, startIndex);
1073                         assert(startIndex - matchIndex >= MINMATCH);
1074                         assert(dictBase);
1075                         match = dictBase + matchIndex;
1076                         lowLimit = dictionary;
1077                     } else {
1078                         match = base + matchIndex;
1079                         lowLimit = (const BYTE*)source;
1080                     }
1081                 } else {   /* single continuous memory segment */
1082                     match = base + matchIndex;
1083                 }
1084                 forwardH = LZ4_hashPosition(forwardIp, tableType);
1085                 LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
1086
1087                 DEBUGLOG(7, "candidate at pos=%u  (offset=%u \n", matchIndex, current - matchIndex);
1088                 if ((dictIssue == dictSmall) && (matchIndex < prefixIdxLimit)) { continue; }    /* match outside of valid area */
1089                 assert(matchIndex < current);
1090                 if ( ((tableType != byU16) || (LZ4_DISTANCE_MAX < LZ4_DISTANCE_ABSOLUTE_MAX))
1091                   && (matchIndex+LZ4_DISTANCE_MAX < current)) {
1092                     continue;
1093                 } /* too far */
1094                 assert((current - matchIndex) <= LZ4_DISTANCE_MAX);  /* match now expected within distance */
1095
1096                 if (LZ4_read32(match) == LZ4_read32(ip)) {
1097                     if (maybe_extMem) offset = current - matchIndex;
1098                     break;   /* match found */
1099                 }
1100
1101             } while(1);
1102         }
1103
1104         /* Catch up */
1105         filledIp = ip;
1106         assert(ip > anchor); /* this is always true as ip has been advanced before entering the main loop */
1107         if ((match > lowLimit) && unlikely(ip[-1] == match[-1])) {
1108             do { ip--; match--; } while (((ip > anchor) & (match > lowLimit)) && (unlikely(ip[-1] == match[-1])));
1109         }
1110
1111         /* Encode Literals */
1112         {   unsigned const litLength = (unsigned)(ip - anchor);
1113             token = op++;
1114             if ((outputDirective == limitedOutput) &&  /* Check output buffer overflow */
1115                 (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)) ) {
1116                 return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
1117             }
1118             if ((outputDirective == fillOutput) &&
1119                 (unlikely(op + (litLength+240)/255 /* litlen */ + litLength /* literals */ + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit))) {
1120                 op--;
1121                 goto _last_literals;
1122             }
1123             if (litLength >= RUN_MASK) {
1124                 unsigned len = litLength - RUN_MASK;
1125                 *token = (RUN_MASK<<ML_BITS);
1126                 for(; len >= 255 ; len-=255) *op++ = 255;
1127                 *op++ = (BYTE)len;
1128             }
1129             else *token = (BYTE)(litLength<<ML_BITS);
1130
1131             /* Copy Literals */
1132             LZ4_wildCopy8(op, anchor, op+litLength);
1133             op+=litLength;
1134             DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i",
1135                         (int)(anchor-(const BYTE*)source), litLength, (int)(ip-(const BYTE*)source));
1136         }
1137
1138 _next_match:
1139         /* at this stage, the following variables must be correctly set :
1140          * - ip : at start of LZ operation
1141          * - match : at start of previous pattern occurrence; can be within current prefix, or within extDict
1142          * - offset : if maybe_ext_memSegment==1 (constant)
1143          * - lowLimit : must be == dictionary to mean "match is within extDict"; must be == source otherwise
1144          * - token and *token : position to write 4-bits for match length; higher 4-bits for literal length supposed already written
1145          */
1146
1147         if ((outputDirective == fillOutput) &&
1148             (op + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit)) {
1149             /* the match was too close to the end, rewind and go to last literals */
1150             op = token;
1151             goto _last_literals;
1152         }
1153
1154         /* Encode Offset */
1155         if (maybe_extMem) {   /* static test */
1156             DEBUGLOG(6, "             with offset=%u  (ext if > %i)", offset, (int)(ip - (const BYTE*)source));
1157             assert(offset <= LZ4_DISTANCE_MAX && offset > 0);
1158             LZ4_writeLE16(op, (U16)offset); op+=2;
1159         } else  {
1160             DEBUGLOG(6, "             with offset=%u  (same segment)", (U32)(ip - match));
1161             assert(ip-match <= LZ4_DISTANCE_MAX);
1162             LZ4_writeLE16(op, (U16)(ip - match)); op+=2;
1163         }
1164
1165         /* Encode MatchLength */
1166         {   unsigned matchCode;
1167
1168             if ( (dictDirective==usingExtDict || dictDirective==usingDictCtx)
1169               && (lowLimit==dictionary) /* match within extDict */ ) {
1170                 const BYTE* limit = ip + (dictEnd-match);
1171                 assert(dictEnd > match);
1172                 if (limit > matchlimit) limit = matchlimit;
1173                 matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, limit);
1174                 ip += (size_t)matchCode + MINMATCH;
1175                 if (ip==limit) {
1176                     unsigned const more = LZ4_count(limit, (const BYTE*)source, matchlimit);
1177                     matchCode += more;
1178                     ip += more;
1179                 }
1180                 DEBUGLOG(6, "             with matchLength=%u starting in extDict", matchCode+MINMATCH);
1181             } else {
1182                 matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit);
1183                 ip += (size_t)matchCode + MINMATCH;
1184                 DEBUGLOG(6, "             with matchLength=%u", matchCode+MINMATCH);
1185             }
1186
1187             if ((outputDirective) &&    /* Check output buffer overflow */
1188                 (unlikely(op + (1 + LASTLITERALS) + (matchCode+240)/255 > olimit)) ) {
1189                 if (outputDirective == fillOutput) {
1190                     /* Match description too long : reduce it */
1191                     U32 newMatchCode = 15 /* in token */ - 1 /* to avoid needing a zero byte */ + ((U32)(olimit - op) - 1 - LASTLITERALS) * 255;
1192                     ip -= matchCode - newMatchCode;
1193                     assert(newMatchCode < matchCode);
1194                     matchCode = newMatchCode;
1195                     if (unlikely(ip <= filledIp)) {
1196                         /* We have already filled up to filledIp so if ip ends up less than filledIp
1197                          * we have positions in the hash table beyond the current position. This is
1198                          * a problem if we reuse the hash table. So we have to remove these positions
1199                          * from the hash table.
1200                          */
1201                         const BYTE* ptr;
1202                         DEBUGLOG(5, "Clearing %u positions", (U32)(filledIp - ip));
1203                         for (ptr = ip; ptr <= filledIp; ++ptr) {
1204                             U32 const h = LZ4_hashPosition(ptr, tableType);
1205                             LZ4_clearHash(h, cctx->hashTable, tableType);
1206                         }
1207                     }
1208                 } else {
1209                     assert(outputDirective == limitedOutput);
1210                     return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
1211                 }
1212             }
1213             if (matchCode >= ML_MASK) {
1214                 *token += ML_MASK;
1215                 matchCode -= ML_MASK;
1216                 LZ4_write32(op, 0xFFFFFFFF);
1217                 while (matchCode >= 4*255) {
1218                     op+=4;
1219                     LZ4_write32(op, 0xFFFFFFFF);
1220                     matchCode -= 4*255;
1221                 }
1222                 op += matchCode / 255;
1223                 *op++ = (BYTE)(matchCode % 255);
1224             } else
1225                 *token += (BYTE)(matchCode);
1226         }
1227         /* Ensure we have enough space for the last literals. */
1228         assert(!(outputDirective == fillOutput && op + 1 + LASTLITERALS > olimit));
1229
1230         anchor = ip;
1231
1232         /* Test end of chunk */
1233         if (ip >= mflimitPlusOne) break;
1234
1235         /* Fill table */
1236         {   U32 const h = LZ4_hashPosition(ip-2, tableType);
1237             if (tableType == byPtr) {
1238                 LZ4_putPositionOnHash(ip-2, h, cctx->hashTable, byPtr);
1239             } else {
1240                 U32 const idx = (U32)((ip-2) - base);
1241                 LZ4_putIndexOnHash(idx, h, cctx->hashTable, tableType);
1242         }   }
1243
1244         /* Test next position */
1245         if (tableType == byPtr) {
1246
1247             match = LZ4_getPosition(ip, cctx->hashTable, tableType);
1248             LZ4_putPosition(ip, cctx->hashTable, tableType);
1249             if ( (match+LZ4_DISTANCE_MAX >= ip)
1250               && (LZ4_read32(match) == LZ4_read32(ip)) )
1251             { token=op++; *token=0; goto _next_match; }
1252
1253         } else {   /* byU32, byU16 */
1254
1255             U32 const h = LZ4_hashPosition(ip, tableType);
1256             U32 const current = (U32)(ip-base);
1257             U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType);
1258             assert(matchIndex < current);
1259             if (dictDirective == usingDictCtx) {
1260                 if (matchIndex < startIndex) {
1261                     /* there was no match, try the dictionary */
1262                     assert(tableType == byU32);
1263                     matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32);
1264                     match = dictBase + matchIndex;
1265                     lowLimit = dictionary;   /* required for match length counter */
1266                     matchIndex += dictDelta;
1267                 } else {
1268                     match = base + matchIndex;
1269                     lowLimit = (const BYTE*)source;  /* required for match length counter */
1270                 }
1271             } else if (dictDirective==usingExtDict) {
1272                 if (matchIndex < startIndex) {
1273                     assert(dictBase);
1274                     match = dictBase + matchIndex;
1275                     lowLimit = dictionary;   /* required for match length counter */
1276                 } else {
1277                     match = base + matchIndex;
1278                     lowLimit = (const BYTE*)source;   /* required for match length counter */
1279                 }
1280             } else {   /* single memory segment */
1281                 match = base + matchIndex;
1282             }
1283             LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
1284             assert(matchIndex < current);
1285             if ( ((dictIssue==dictSmall) ? (matchIndex >= prefixIdxLimit) : 1)
1286               && (((tableType==byU16) && (LZ4_DISTANCE_MAX == LZ4_DISTANCE_ABSOLUTE_MAX)) ? 1 : (matchIndex+LZ4_DISTANCE_MAX >= current))
1287               && (LZ4_read32(match) == LZ4_read32(ip)) ) {
1288                 token=op++;
1289                 *token=0;
1290                 if (maybe_extMem) offset = current - matchIndex;
1291                 DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i",
1292                             (int)(anchor-(const BYTE*)source), 0, (int)(ip-(const BYTE*)source));
1293                 goto _next_match;
1294             }
1295         }
1296
1297         /* Prepare next loop */
1298         forwardH = LZ4_hashPosition(++ip, tableType);
1299
1300     }
1301
1302 _last_literals:
1303     /* Encode Last Literals */
1304     {   size_t lastRun = (size_t)(iend - anchor);
1305         if ( (outputDirective) &&  /* Check output buffer overflow */
1306             (op + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > olimit)) {
1307             if (outputDirective == fillOutput) {
1308                 /* adapt lastRun to fill 'dst' */
1309                 assert(olimit >= op);
1310                 lastRun  = (size_t)(olimit-op) - 1/*token*/;
1311                 lastRun -= (lastRun + 256 - RUN_MASK) / 256;  /*additional length tokens*/
1312             } else {
1313                 assert(outputDirective == limitedOutput);
1314                 return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
1315             }
1316         }
1317         DEBUGLOG(6, "Final literal run : %i literals", (int)lastRun);
1318         if (lastRun >= RUN_MASK) {
1319             size_t accumulator = lastRun - RUN_MASK;
1320             *op++ = RUN_MASK << ML_BITS;
1321             for(; accumulator >= 255 ; accumulator-=255) *op++ = 255;
1322             *op++ = (BYTE) accumulator;
1323         } else {
1324             *op++ = (BYTE)(lastRun<<ML_BITS);
1325         }
1326         LZ4_memcpy(op, anchor, lastRun);
1327         ip = anchor + lastRun;
1328         op += lastRun;
1329     }
1330
1331     if (outputDirective == fillOutput) {
1332         *inputConsumed = (int) (((const char*)ip)-source);
1333     }
1334     result = (int)(((char*)op) - dest);
1335     assert(result > 0);
1336     DEBUGLOG(5, "LZ4_compress_generic: compressed %i bytes into %i bytes", inputSize, result);
1337     return result;
1338 }
1339
1340 /** LZ4_compress_generic() :
1341  *  inlined, to ensure branches are decided at compilation time;
1342  *  takes care of src == (NULL, 0)
1343  *  and forward the rest to LZ4_compress_generic_validated */
1344 LZ4_FORCE_INLINE int LZ4_compress_generic(
1345                  LZ4_stream_t_internal* const cctx,
1346                  const char* const src,
1347                  char* const dst,
1348                  const int srcSize,
1349                  int *inputConsumed, /* only written when outputDirective == fillOutput */
1350                  const int dstCapacity,
1351                  const limitedOutput_directive outputDirective,
1352                  const tableType_t tableType,
1353                  const dict_directive dictDirective,
1354                  const dictIssue_directive dictIssue,
1355                  const int acceleration)
1356 {
1357     DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, dstCapacity=%i",
1358                 srcSize, dstCapacity);
1359
1360     if ((U32)srcSize > (U32)LZ4_MAX_INPUT_SIZE) { return 0; }  /* Unsupported srcSize, too large (or negative) */
1361     if (srcSize == 0) {   /* src == NULL supported if srcSize == 0 */
1362         if (outputDirective != notLimited && dstCapacity <= 0) return 0;  /* no output, can't write anything */
1363         DEBUGLOG(5, "Generating an empty block");
1364         assert(outputDirective == notLimited || dstCapacity >= 1);
1365         assert(dst != NULL);
1366         dst[0] = 0;
1367         if (outputDirective == fillOutput) {
1368             assert (inputConsumed != NULL);
1369             *inputConsumed = 0;
1370         }
1371         return 1;
1372     }
1373     assert(src != NULL);
1374
1375     return LZ4_compress_generic_validated(cctx, src, dst, srcSize,
1376                 inputConsumed, /* only written into if outputDirective == fillOutput */
1377                 dstCapacity, outputDirective,
1378                 tableType, dictDirective, dictIssue, acceleration);
1379 }
1380
1381
1382 int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
1383 {
1384     LZ4_stream_t_internal* const ctx = & LZ4_initStream(state, sizeof(LZ4_stream_t)) -> internal_donotuse;
1385     assert(ctx != NULL);
1386     if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
1387     if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
1388     if (maxOutputSize >= LZ4_compressBound(inputSize)) {
1389         if (inputSize < LZ4_64Klimit) {
1390             return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, byU16, noDict, noDictIssue, acceleration);
1391         } else {
1392             const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
1393             return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
1394         }
1395     } else {
1396         if (inputSize < LZ4_64Klimit) {
1397             return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration);
1398         } else {
1399             const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
1400             return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, noDict, noDictIssue, acceleration);
1401         }
1402     }
1403 }
1404
1405 /**
1406  * LZ4_compress_fast_extState_fastReset() :
1407  * A variant of LZ4_compress_fast_extState().
1408  *
1409  * Using this variant avoids an expensive initialization step. It is only safe
1410  * to call if the state buffer is known to be correctly initialized already
1411  * (see comment in lz4.h on LZ4_resetStream_fast() for a definition of
1412  * "correctly initialized").
1413  */
1414 int LZ4_compress_fast_extState_fastReset(void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration)
1415 {
1416     LZ4_stream_t_internal* const ctx = &((LZ4_stream_t*)state)->internal_donotuse;
1417     if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
1418     if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
1419     assert(ctx != NULL);
1420
1421     if (dstCapacity >= LZ4_compressBound(srcSize)) {
1422         if (srcSize < LZ4_64Klimit) {
1423             const tableType_t tableType = byU16;
1424             LZ4_prepareTable(ctx, srcSize, tableType);
1425             if (ctx->currentOffset) {
1426                 return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, dictSmall, acceleration);
1427             } else {
1428                 return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
1429             }
1430         } else {
1431             const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
1432             LZ4_prepareTable(ctx, srcSize, tableType);
1433             return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
1434         }
1435     } else {
1436         if (srcSize < LZ4_64Klimit) {
1437             const tableType_t tableType = byU16;
1438             LZ4_prepareTable(ctx, srcSize, tableType);
1439             if (ctx->currentOffset) {
1440                 return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, dictSmall, acceleration);
1441             } else {
1442                 return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration);
1443             }
1444         } else {
1445             const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
1446             LZ4_prepareTable(ctx, srcSize, tableType);
1447             return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration);
1448         }
1449     }
1450 }
1451
1452
1453 int LZ4_compress_fast(const char* src, char* dest, int srcSize, int dstCapacity, int acceleration)
1454 {
1455     int result;
1456 #if (LZ4_HEAPMODE)
1457     LZ4_stream_t* const ctxPtr = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t));   /* malloc-calloc always properly aligned */
1458     if (ctxPtr == NULL) return 0;
1459 #else
1460     LZ4_stream_t ctx;
1461     LZ4_stream_t* const ctxPtr = &ctx;
1462 #endif
1463     result = LZ4_compress_fast_extState(ctxPtr, src, dest, srcSize, dstCapacity, acceleration);
1464
1465 #if (LZ4_HEAPMODE)
1466     FREEMEM(ctxPtr);
1467 #endif
1468     return result;
1469 }
1470
1471
1472 int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity)
1473 {
1474     return LZ4_compress_fast(src, dst, srcSize, dstCapacity, 1);
1475 }
1476
1477
1478 /* Note!: This function leaves the stream in an unclean/broken state!
1479  * It is not safe to subsequently use the same state with a _fastReset() or
1480  * _continue() call without resetting it. */
1481 static int LZ4_compress_destSize_extState_internal(LZ4_stream_t* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize, int acceleration)
1482 {
1483     void* const s = LZ4_initStream(state, sizeof (*state));
1484     assert(s != NULL); (void)s;
1485
1486     if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) {  /* compression success is guaranteed */
1487         return LZ4_compress_fast_extState(state, src, dst, *srcSizePtr, targetDstSize, acceleration);
1488     } else {
1489         if (*srcSizePtr < LZ4_64Klimit) {
1490             return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, byU16, noDict, noDictIssue, acceleration);
1491         } else {
1492             tableType_t const addrMode = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
1493             return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, addrMode, noDict, noDictIssue, acceleration);
1494     }   }
1495 }
1496
1497 int LZ4_compress_destSize_extState(void* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize, int acceleration)
1498 {
1499     int const r = LZ4_compress_destSize_extState_internal((LZ4_stream_t*)state, src, dst, srcSizePtr, targetDstSize, acceleration);
1500     /* clean the state on exit */
1501     LZ4_initStream(state, sizeof (LZ4_stream_t));
1502     return r;
1503 }
1504
1505
1506 int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targetDstSize)
1507 {
1508 #if (LZ4_HEAPMODE)
1509     LZ4_stream_t* const ctx = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t));   /* malloc-calloc always properly aligned */
1510     if (ctx == NULL) return 0;
1511 #else
1512     LZ4_stream_t ctxBody;
1513     LZ4_stream_t* const ctx = &ctxBody;
1514 #endif
1515
1516     int result = LZ4_compress_destSize_extState_internal(ctx, src, dst, srcSizePtr, targetDstSize, 1);
1517
1518 #if (LZ4_HEAPMODE)
1519     FREEMEM(ctx);
1520 #endif
1521     return result;
1522 }
1523
1524
1525
1526 /*-******************************
1527 *  Streaming functions
1528 ********************************/
1529
1530 #if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
1531 LZ4_stream_t* LZ4_createStream(void)
1532 {
1533     LZ4_stream_t* const lz4s = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t));
1534     LZ4_STATIC_ASSERT(sizeof(LZ4_stream_t) >= sizeof(LZ4_stream_t_internal));
1535     DEBUGLOG(4, "LZ4_createStream %p", lz4s);
1536     if (lz4s == NULL) return NULL;
1537     LZ4_initStream(lz4s, sizeof(*lz4s));
1538     return lz4s;
1539 }
1540 #endif
1541
1542 static size_t LZ4_stream_t_alignment(void)
1543 {
1544 #if LZ4_ALIGN_TEST
1545     typedef struct { char c; LZ4_stream_t t; } t_a;
1546     return sizeof(t_a) - sizeof(LZ4_stream_t);
1547 #else
1548     return 1;  /* effectively disabled */
1549 #endif
1550 }
1551
1552 LZ4_stream_t* LZ4_initStream (void* buffer, size_t size)
1553 {
1554     DEBUGLOG(5, "LZ4_initStream");
1555     if (buffer == NULL) { return NULL; }
1556     if (size < sizeof(LZ4_stream_t)) { return NULL; }
1557     if (!LZ4_isAligned(buffer, LZ4_stream_t_alignment())) return NULL;
1558     MEM_INIT(buffer, 0, sizeof(LZ4_stream_t_internal));
1559     return (LZ4_stream_t*)buffer;
1560 }
1561
1562 /* resetStream is now deprecated,
1563  * prefer initStream() which is more general */
1564 void LZ4_resetStream (LZ4_stream_t* LZ4_stream)
1565 {
1566     DEBUGLOG(5, "LZ4_resetStream (ctx:%p)", LZ4_stream);
1567     MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t_internal));
1568 }
1569
1570 void LZ4_resetStream_fast(LZ4_stream_t* ctx) {
1571     LZ4_prepareTable(&(ctx->internal_donotuse), 0, byU32);
1572 }
1573
1574 #if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
1575 int LZ4_freeStream (LZ4_stream_t* LZ4_stream)
1576 {
1577     if (!LZ4_stream) return 0;   /* support free on NULL */
1578     DEBUGLOG(5, "LZ4_freeStream %p", LZ4_stream);
1579     FREEMEM(LZ4_stream);
1580     return (0);
1581 }
1582 #endif
1583
1584
1585 typedef enum { _ld_fast, _ld_slow } LoadDict_mode_e;
1586 #define HASH_UNIT sizeof(reg_t)
1587 int LZ4_loadDict_internal(LZ4_stream_t* LZ4_dict,
1588                     const char* dictionary, int dictSize,
1589                     LoadDict_mode_e _ld)
1590 {
1591     LZ4_stream_t_internal* const dict = &LZ4_dict->internal_donotuse;
1592     const tableType_t tableType = byU32;
1593     const BYTE* p = (const BYTE*)dictionary;
1594     const BYTE* const dictEnd = p + dictSize;
1595     U32 idx32;
1596
1597     DEBUGLOG(4, "LZ4_loadDict (%i bytes from %p into %p)", dictSize, dictionary, LZ4_dict);
1598
1599     /* It's necessary to reset the context,
1600      * and not just continue it with prepareTable()
1601      * to avoid any risk of generating overflowing matchIndex
1602      * when compressing using this dictionary */
1603     LZ4_resetStream(LZ4_dict);
1604
1605     /* We always increment the offset by 64 KB, since, if the dict is longer,
1606      * we truncate it to the last 64k, and if it's shorter, we still want to
1607      * advance by a whole window length so we can provide the guarantee that
1608      * there are only valid offsets in the window, which allows an optimization
1609      * in LZ4_compress_fast_continue() where it uses noDictIssue even when the
1610      * dictionary isn't a full 64k. */
1611     dict->currentOffset += 64 KB;
1612
1613     if (dictSize < (int)HASH_UNIT) {
1614         return 0;
1615     }
1616
1617     if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB;
1618     dict->dictionary = p;
1619     dict->dictSize = (U32)(dictEnd - p);
1620     dict->tableType = (U32)tableType;
1621     idx32 = dict->currentOffset - dict->dictSize;
1622
1623     while (p <= dictEnd-HASH_UNIT) {
1624         U32 const h = LZ4_hashPosition(p, tableType);
1625         /* Note: overwriting => favors positions end of dictionary */
1626         LZ4_putIndexOnHash(idx32, h, dict->hashTable, tableType);
1627         p+=3; idx32+=3;
1628     }
1629
1630     if (_ld == _ld_slow) {
1631         /* Fill hash table with additional references, to improve compression capability */
1632         p = dict->dictionary;
1633         idx32 = dict->currentOffset - dict->dictSize;
1634         while (p <= dictEnd-HASH_UNIT) {
1635             U32 const h = LZ4_hashPosition(p, tableType);
1636             U32 const limit = dict->currentOffset - 64 KB;
1637             if (LZ4_getIndexOnHash(h, dict->hashTable, tableType) <= limit) {
1638                 /* Note: not overwriting => favors positions beginning of dictionary */
1639                 LZ4_putIndexOnHash(idx32, h, dict->hashTable, tableType);
1640             }
1641             p++; idx32++;
1642         }
1643     }
1644
1645     return (int)dict->dictSize;
1646 }
1647
1648 int LZ4_loadDict(LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize)
1649 {
1650     return LZ4_loadDict_internal(LZ4_dict, dictionary, dictSize, _ld_fast);
1651 }
1652
1653 int LZ4_loadDictSlow(LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize)
1654 {
1655     return LZ4_loadDict_internal(LZ4_dict, dictionary, dictSize, _ld_slow);
1656 }
1657
1658 void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream)
1659 {
1660     const LZ4_stream_t_internal* dictCtx = (dictionaryStream == NULL) ? NULL :
1661         &(dictionaryStream->internal_donotuse);
1662
1663     DEBUGLOG(4, "LZ4_attach_dictionary (%p, %p, size %u)",
1664              workingStream, dictionaryStream,
1665              dictCtx != NULL ? dictCtx->dictSize : 0);
1666
1667     if (dictCtx != NULL) {
1668         /* If the current offset is zero, we will never look in the
1669          * external dictionary context, since there is no value a table
1670          * entry can take that indicate a miss. In that case, we need
1671          * to bump the offset to something non-zero.
1672          */
1673         if (workingStream->internal_donotuse.currentOffset == 0) {
1674             workingStream->internal_donotuse.currentOffset = 64 KB;
1675         }
1676
1677         /* Don't actually attach an empty dictionary.
1678          */
1679         if (dictCtx->dictSize == 0) {
1680             dictCtx = NULL;
1681         }
1682     }
1683     workingStream->internal_donotuse.dictCtx = dictCtx;
1684 }
1685
1686
1687 static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, int nextSize)
1688 {
1689     assert(nextSize >= 0);
1690     if (LZ4_dict->currentOffset + (unsigned)nextSize > 0x80000000) {   /* potential ptrdiff_t overflow (32-bits mode) */
1691         /* rescale hash table */
1692         U32 const delta = LZ4_dict->currentOffset - 64 KB;
1693         const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize;
1694         int i;
1695         DEBUGLOG(4, "LZ4_renormDictT");
1696         for (i=0; i<LZ4_HASH_SIZE_U32; i++) {
1697             if (LZ4_dict->hashTable[i] < delta) LZ4_dict->hashTable[i]=0;
1698             else LZ4_dict->hashTable[i] -= delta;
1699         }
1700         LZ4_dict->currentOffset = 64 KB;
1701         if (LZ4_dict->dictSize > 64 KB) LZ4_dict->dictSize = 64 KB;
1702         LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize;
1703     }
1704 }
1705
1706
1707 int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream,
1708                                 const char* source, char* dest,
1709                                 int inputSize, int maxOutputSize,
1710                                 int acceleration)
1711 {
1712     const tableType_t tableType = byU32;
1713     LZ4_stream_t_internal* const streamPtr = &LZ4_stream->internal_donotuse;
1714     const char* dictEnd = streamPtr->dictSize ? (const char*)streamPtr->dictionary + streamPtr->dictSize : NULL;
1715
1716     DEBUGLOG(5, "LZ4_compress_fast_continue (inputSize=%i, dictSize=%u)", inputSize, streamPtr->dictSize);
1717
1718     LZ4_renormDictT(streamPtr, inputSize);   /* fix index overflow */
1719     if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
1720     if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
1721
1722     /* invalidate tiny dictionaries */
1723     if ( (streamPtr->dictSize < 4)     /* tiny dictionary : not enough for a hash */
1724       && (dictEnd != source)           /* prefix mode */
1725       && (inputSize > 0)               /* tolerance : don't lose history, in case next invocation would use prefix mode */
1726       && (streamPtr->dictCtx == NULL)  /* usingDictCtx */
1727       ) {
1728         DEBUGLOG(5, "LZ4_compress_fast_continue: dictSize(%u) at addr:%p is too small", streamPtr->dictSize, streamPtr->dictionary);
1729         /* remove dictionary existence from history, to employ faster prefix mode */
1730         streamPtr->dictSize = 0;
1731         streamPtr->dictionary = (const BYTE*)source;
1732         dictEnd = source;
1733     }
1734
1735     /* Check overlapping input/dictionary space */
1736     {   const char* const sourceEnd = source + inputSize;
1737         if ((sourceEnd > (const char*)streamPtr->dictionary) && (sourceEnd < dictEnd)) {
1738             streamPtr->dictSize = (U32)(dictEnd - sourceEnd);
1739             if (streamPtr->dictSize > 64 KB) streamPtr->dictSize = 64 KB;
1740             if (streamPtr->dictSize < 4) streamPtr->dictSize = 0;
1741             streamPtr->dictionary = (const BYTE*)dictEnd - streamPtr->dictSize;
1742         }
1743     }
1744
1745     /* prefix mode : source data follows dictionary */
1746     if (dictEnd == source) {
1747         if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset))
1748             return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, dictSmall, acceleration);
1749         else
1750             return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, noDictIssue, acceleration);
1751     }
1752
1753     /* external dictionary mode */
1754     {   int result;
1755         if (streamPtr->dictCtx) {
1756             /* We depend here on the fact that dictCtx'es (produced by
1757              * LZ4_loadDict) guarantee that their tables contain no references
1758              * to offsets between dictCtx->currentOffset - 64 KB and
1759              * dictCtx->currentOffset - dictCtx->dictSize. This makes it safe
1760              * to use noDictIssue even when the dict isn't a full 64 KB.
1761              */
1762             if (inputSize > 4 KB) {
1763                 /* For compressing large blobs, it is faster to pay the setup
1764                  * cost to copy the dictionary's tables into the active context,
1765                  * so that the compression loop is only looking into one table.
1766                  */
1767                 LZ4_memcpy(streamPtr, streamPtr->dictCtx, sizeof(*streamPtr));
1768                 result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration);
1769             } else {
1770                 result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingDictCtx, noDictIssue, acceleration);
1771             }
1772         } else {  /* small data <= 4 KB */
1773             if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) {
1774                 result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, dictSmall, acceleration);
1775             } else {
1776                 result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration);
1777             }
1778         }
1779         streamPtr->dictionary = (const BYTE*)source;
1780         streamPtr->dictSize = (U32)inputSize;
1781         return result;
1782     }
1783 }
1784
1785
1786 /* Hidden debug function, to force-test external dictionary mode */
1787 int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize)
1788 {
1789     LZ4_stream_t_internal* const streamPtr = &LZ4_dict->internal_donotuse;
1790     int result;
1791
1792     LZ4_renormDictT(streamPtr, srcSize);
1793
1794     if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) {
1795         result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, dictSmall, 1);
1796     } else {
1797         result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, noDictIssue, 1);
1798     }
1799
1800     streamPtr->dictionary = (const BYTE*)source;
1801     streamPtr->dictSize = (U32)srcSize;
1802
1803     return result;
1804 }
1805
1806
1807 /*! LZ4_saveDict() :
1808  *  If previously compressed data block is not guaranteed to remain available at its memory location,
1809  *  save it into a safer place (char* safeBuffer).
1810  *  Note : no need to call LZ4_loadDict() afterwards, dictionary is immediately usable,
1811  *         one can therefore call LZ4_compress_fast_continue() right after.
1812  * @return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error.
1813  */
1814 int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize)
1815 {
1816     LZ4_stream_t_internal* const dict = &LZ4_dict->internal_donotuse;
1817
1818     DEBUGLOG(5, "LZ4_saveDict : dictSize=%i, safeBuffer=%p", dictSize, safeBuffer);
1819
1820     if ((U32)dictSize > 64 KB) { dictSize = 64 KB; } /* useless to define a dictionary > 64 KB */
1821     if ((U32)dictSize > dict->dictSize) { dictSize = (int)dict->dictSize; }
1822
1823     if (safeBuffer == NULL) assert(dictSize == 0);
1824     if (dictSize > 0) {
1825         const BYTE* const previousDictEnd = dict->dictionary + dict->dictSize;
1826         assert(dict->dictionary);
1827         LZ4_memmove(safeBuffer, previousDictEnd - dictSize, (size_t)dictSize);
1828     }
1829
1830     dict->dictionary = (const BYTE*)safeBuffer;
1831     dict->dictSize = (U32)dictSize;
1832
1833     return dictSize;
1834 }
1835
1836
1837
1838 /*-*******************************
1839  *  Decompression functions
1840  ********************************/
1841
1842 typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive;
1843
1844 #undef MIN
1845 #define MIN(a,b)    ( (a) < (b) ? (a) : (b) )
1846
1847
1848 /* variant for decompress_unsafe()
1849  * does not know end of input
1850  * presumes input is well formed
1851  * note : will consume at least one byte */
1852 static size_t read_long_length_no_check(const BYTE** pp)
1853 {
1854     size_t b, l = 0;
1855     do { b = **pp; (*pp)++; l += b; } while (b==255);
1856     DEBUGLOG(6, "read_long_length_no_check: +length=%zu using %zu input bytes", l, l/255 + 1)
1857     return l;
1858 }
1859
1860 /* core decoder variant for LZ4_decompress_fast*()
1861  * for legacy support only : these entry points are deprecated.
1862  * - Presumes input is correctly formed (no defense vs malformed inputs)
1863  * - Does not know input size (presume input buffer is "large enough")
1864  * - Decompress a full block (only)
1865  * @return : nb of bytes read from input.
1866  * Note : this variant is not optimized for speed, just for maintenance.
1867  *        the goal is to remove support of decompress_fast*() variants by v2.0
1868 **/
1869 LZ4_FORCE_INLINE int
1870 LZ4_decompress_unsafe_generic(
1871                  const BYTE* const istart,
1872                  BYTE* const ostart,
1873                  int decompressedSize,
1874
1875                  size_t prefixSize,
1876                  const BYTE* const dictStart,  /* only if dict==usingExtDict */
1877                  const size_t dictSize         /* note: =0 if dictStart==NULL */
1878                  )
1879 {
1880     const BYTE* ip = istart;
1881     BYTE* op = (BYTE*)ostart;
1882     BYTE* const oend = ostart + decompressedSize;
1883     const BYTE* const prefixStart = ostart - prefixSize;
1884
1885     DEBUGLOG(5, "LZ4_decompress_unsafe_generic");
1886     if (dictStart == NULL) assert(dictSize == 0);
1887
1888     while (1) {
1889         /* start new sequence */
1890         unsigned token = *ip++;
1891
1892         /* literals */
1893         {   size_t ll = token >> ML_BITS;
1894             if (ll==15) {
1895                 /* long literal length */
1896                 ll += read_long_length_no_check(&ip);
1897             }
1898             if ((size_t)(oend-op) < ll) return -1; /* output buffer overflow */
1899             LZ4_memmove(op, ip, ll); /* support in-place decompression */
1900             op += ll;
1901             ip += ll;
1902             if ((size_t)(oend-op) < MFLIMIT) {
1903                 if (op==oend) break;  /* end of block */
1904                 DEBUGLOG(5, "invalid: literals end at distance %zi from end of block", oend-op);
1905                 /* incorrect end of block :
1906                  * last match must start at least MFLIMIT==12 bytes before end of output block */
1907                 return -1;
1908         }   }
1909
1910         /* match */
1911         {   size_t ml = token & 15;
1912             size_t const offset = LZ4_readLE16(ip);
1913             ip+=2;
1914
1915             if (ml==15) {
1916                 /* long literal length */
1917                 ml += read_long_length_no_check(&ip);
1918             }
1919             ml += MINMATCH;
1920
1921             if ((size_t)(oend-op) < ml) return -1; /* output buffer overflow */
1922
1923             {   const BYTE* match = op - offset;
1924
1925                 /* out of range */
1926                 if (offset > (size_t)(op - prefixStart) + dictSize) {
1927                     DEBUGLOG(6, "offset out of range");
1928                     return -1;
1929                 }
1930
1931                 /* check special case : extDict */
1932                 if (offset > (size_t)(op - prefixStart)) {
1933                     /* extDict scenario */
1934                     const BYTE* const dictEnd = dictStart + dictSize;
1935                     const BYTE* extMatch = dictEnd - (offset - (size_t)(op-prefixStart));
1936                     size_t const extml = (size_t)(dictEnd - extMatch);
1937                     if (extml > ml) {
1938                         /* match entirely within extDict */
1939                         LZ4_memmove(op, extMatch, ml);
1940                         op += ml;
1941                         ml = 0;
1942                     } else {
1943                         /* match split between extDict & prefix */
1944                         LZ4_memmove(op, extMatch, extml);
1945                         op += extml;
1946                         ml -= extml;
1947                     }
1948                     match = prefixStart;
1949                 }
1950
1951                 /* match copy - slow variant, supporting overlap copy */
1952                 {   size_t u;
1953                     for (u=0; u<ml; u++) {
1954                         op[u] = match[u];
1955             }   }   }
1956             op += ml;
1957             if ((size_t)(oend-op) < LASTLITERALS) {
1958                 DEBUGLOG(5, "invalid: match ends at distance %zi from end of block", oend-op);
1959                 /* incorrect end of block :
1960                  * last match must stop at least LASTLITERALS==5 bytes before end of output block */
1961                 return -1;
1962             }
1963         } /* match */
1964     } /* main loop */
1965     return (int)(ip - istart);
1966 }
1967
1968
1969 /* Read the variable-length literal or match length.
1970  *
1971  * @ip : input pointer
1972  * @ilimit : position after which if length is not decoded, the input is necessarily corrupted.
1973  * @initial_check - check ip >= ipmax before start of loop.  Returns initial_error if so.
1974  * @error (output) - error code.  Must be set to 0 before call.
1975 **/
1976 typedef size_t Rvl_t;
1977 static const Rvl_t rvl_error = (Rvl_t)(-1);
1978 LZ4_FORCE_INLINE Rvl_t
1979 read_variable_length(const BYTE** ip, const BYTE* ilimit,
1980                      int initial_check)
1981 {
1982     Rvl_t s, length = 0;
1983     assert(ip != NULL);
1984     assert(*ip !=  NULL);
1985     assert(ilimit != NULL);
1986     if (initial_check && unlikely((*ip) >= ilimit)) {    /* read limit reached */
1987         return rvl_error;
1988     }
1989     s = **ip;
1990     (*ip)++;
1991     length += s;
1992     if (unlikely((*ip) > ilimit)) {    /* read limit reached */
1993         return rvl_error;
1994     }
1995     /* accumulator overflow detection (32-bit mode only) */
1996     if ((sizeof(length) < 8) && unlikely(length > ((Rvl_t)(-1)/2)) ) {
1997         return rvl_error;
1998     }
1999     if (likely(s != 255)) return length;
2000     do {
2001         s = **ip;
2002         (*ip)++;
2003         length += s;
2004         if (unlikely((*ip) > ilimit)) {    /* read limit reached */
2005             return rvl_error;
2006         }
2007         /* accumulator overflow detection (32-bit mode only) */
2008         if ((sizeof(length) < 8) && unlikely(length > ((Rvl_t)(-1)/2)) ) {
2009             return rvl_error;
2010         }
2011     } while (s == 255);
2012
2013     return length;
2014 }
2015
2016 /*! LZ4_decompress_generic() :
2017  *  This generic decompression function covers all use cases.
2018  *  It shall be instantiated several times, using different sets of directives.
2019  *  Note that it is important for performance that this function really get inlined,
2020  *  in order to remove useless branches during compilation optimization.
2021  */
2022 LZ4_FORCE_INLINE int
2023 LZ4_decompress_generic(
2024                  const char* const src,
2025                  char* const dst,
2026                  int srcSize,
2027                  int outputSize,         /* If endOnInput==endOnInputSize, this value is `dstCapacity` */
2028
2029                  earlyEnd_directive partialDecoding,  /* full, partial */
2030                  dict_directive dict,                 /* noDict, withPrefix64k, usingExtDict */
2031                  const BYTE* const lowPrefix,  /* always <= dst, == dst when no prefix */
2032                  const BYTE* const dictStart,  /* only if dict==usingExtDict */
2033                  const size_t dictSize         /* note : = 0 if noDict */
2034                  )
2035 {
2036     if ((src == NULL) || (outputSize < 0)) { return -1; }
2037
2038     {   const BYTE* ip = (const BYTE*) src;
2039         const BYTE* const iend = ip + srcSize;
2040
2041         BYTE* op = (BYTE*) dst;
2042         BYTE* const oend = op + outputSize;
2043         BYTE* cpy;
2044
2045         const BYTE* const dictEnd = (dictStart == NULL) ? NULL : dictStart + dictSize;
2046
2047         const int checkOffset = (dictSize < (int)(64 KB));
2048
2049
2050         /* Set up the "end" pointers for the shortcut. */
2051         const BYTE* const shortiend = iend - 14 /*maxLL*/ - 2 /*offset*/;
2052         const BYTE* const shortoend = oend - 14 /*maxLL*/ - 18 /*maxML*/;
2053
2054         const BYTE* match;
2055         size_t offset;
2056         unsigned token;
2057         size_t length;
2058
2059
2060         DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize);
2061
2062         /* Special cases */
2063         assert(lowPrefix <= op);
2064         if (unlikely(outputSize==0)) {
2065             /* Empty output buffer */
2066             if (partialDecoding) return 0;
2067             return ((srcSize==1) && (*ip==0)) ? 0 : -1;
2068         }
2069         if (unlikely(srcSize==0)) { return -1; }
2070
2071     /* LZ4_FAST_DEC_LOOP:
2072      * designed for modern OoO performance cpus,
2073      * where copying reliably 32-bytes is preferable to an unpredictable branch.
2074      * note : fast loop may show a regression for some client arm chips. */
2075 #if LZ4_FAST_DEC_LOOP
2076         if ((oend - op) < FASTLOOP_SAFE_DISTANCE) {
2077             DEBUGLOG(6, "move to safe decode loop");
2078             goto safe_decode;
2079         }
2080
2081         /* Fast loop : decode sequences as long as output < oend-FASTLOOP_SAFE_DISTANCE */
2082         DEBUGLOG(6, "using fast decode loop");
2083         while (1) {
2084             /* Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE */
2085             assert(oend - op >= FASTLOOP_SAFE_DISTANCE);
2086             assert(ip < iend);
2087             token = *ip++;
2088             length = token >> ML_BITS;  /* literal length */
2089             DEBUGLOG(7, "blockPos%6u: litLength token = %u", (unsigned)(op-(BYTE*)dst), (unsigned)length);
2090
2091             /* decode literal length */
2092             if (length == RUN_MASK) {
2093                 size_t const addl = read_variable_length(&ip, iend-RUN_MASK, 1);
2094                 if (addl == rvl_error) {
2095                     DEBUGLOG(6, "error reading long literal length");
2096                     goto _output_error;
2097                 }
2098                 length += addl;
2099                 if (unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
2100                 if (unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
2101
2102                 /* copy literals */
2103                 LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
2104                 if ((op+length>oend-32) || (ip+length>iend-32)) { goto safe_literal_copy; }
2105                 LZ4_wildCopy32(op, ip, op+length);
2106                 ip += length; op += length;
2107             } else if (ip <= iend-(16 + 1/*max lit + offset + nextToken*/)) {
2108                 /* We don't need to check oend, since we check it once for each loop below */
2109                 DEBUGLOG(7, "copy %u bytes in a 16-bytes stripe", (unsigned)length);
2110                 /* Literals can only be <= 14, but hope compilers optimize better when copy by a register size */
2111                 LZ4_memcpy(op, ip, 16);
2112                 ip += length; op += length;
2113             } else {
2114                 goto safe_literal_copy;
2115             }
2116
2117             /* get offset */
2118             offset = LZ4_readLE16(ip); ip+=2;
2119             DEBUGLOG(6, "blockPos%6u: offset = %u", (unsigned)(op-(BYTE*)dst), (unsigned)offset);
2120             match = op - offset;
2121             assert(match <= op);  /* overflow check */
2122
2123             /* get matchlength */
2124             length = token & ML_MASK;
2125             DEBUGLOG(7, "  match length token = %u (len==%u)", (unsigned)length, (unsigned)length+MINMATCH);
2126
2127             if (length == ML_MASK) {
2128                 size_t const addl = read_variable_length(&ip, iend - LASTLITERALS + 1, 0);
2129                 if (addl == rvl_error) {
2130                     DEBUGLOG(5, "error reading long match length");
2131                     goto _output_error;
2132                 }
2133                 length += addl;
2134                 length += MINMATCH;
2135                 DEBUGLOG(7, "  long match length == %u", (unsigned)length);
2136                 if (unlikely((uptrval)(op)+length<(uptrval)op)) { goto _output_error; } /* overflow detection */
2137                 if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
2138                     goto safe_match_copy;
2139                 }
2140             } else {
2141                 length += MINMATCH;
2142                 if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
2143                     DEBUGLOG(7, "moving to safe_match_copy (ml==%u)", (unsigned)length);
2144                     goto safe_match_copy;
2145                 }
2146
2147                 /* Fastpath check: skip LZ4_wildCopy32 when true */
2148                 if ((dict == withPrefix64k) || (match >= lowPrefix)) {
2149                     if (offset >= 8) {
2150                         assert(match >= lowPrefix);
2151                         assert(match <= op);
2152                         assert(op + 18 <= oend);
2153
2154                         LZ4_memcpy(op, match, 8);
2155                         LZ4_memcpy(op+8, match+8, 8);
2156                         LZ4_memcpy(op+16, match+16, 2);
2157                         op += length;
2158                         continue;
2159             }   }   }
2160
2161             if ( checkOffset && (unlikely(match + dictSize < lowPrefix)) ) {
2162                 DEBUGLOG(5, "Error : pos=%zi, offset=%zi => outside buffers", op-lowPrefix, op-match);
2163                 goto _output_error;
2164             }
2165             /* match starting within external dictionary */
2166             if ((dict==usingExtDict) && (match < lowPrefix)) {
2167                 assert(dictEnd != NULL);
2168                 if (unlikely(op+length > oend-LASTLITERALS)) {
2169                     if (partialDecoding) {
2170                         DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd");
2171                         length = MIN(length, (size_t)(oend-op));
2172                     } else {
2173                         DEBUGLOG(6, "end-of-block condition violated")
2174                         goto _output_error;
2175                 }   }
2176
2177                 if (length <= (size_t)(lowPrefix-match)) {
2178                     /* match fits entirely within external dictionary : just copy */
2179                     LZ4_memmove(op, dictEnd - (lowPrefix-match), length);
2180                     op += length;
2181                 } else {
2182                     /* match stretches into both external dictionary and current block */
2183                     size_t const copySize = (size_t)(lowPrefix - match);
2184                     size_t const restSize = length - copySize;
2185                     LZ4_memcpy(op, dictEnd - copySize, copySize);
2186                     op += copySize;
2187                     if (restSize > (size_t)(op - lowPrefix)) {  /* overlap copy */
2188                         BYTE* const endOfMatch = op + restSize;
2189                         const BYTE* copyFrom = lowPrefix;
2190                         while (op < endOfMatch) { *op++ = *copyFrom++; }
2191                     } else {
2192                         LZ4_memcpy(op, lowPrefix, restSize);
2193                         op += restSize;
2194                 }   }
2195                 continue;
2196             }
2197
2198             /* copy match within block */
2199             cpy = op + length;
2200
2201             assert((op <= oend) && (oend-op >= 32));
2202             if (unlikely(offset<16)) {
2203                 LZ4_memcpy_using_offset(op, match, cpy, offset);
2204             } else {
2205                 LZ4_wildCopy32(op, match, cpy);
2206             }
2207
2208             op = cpy;   /* wildcopy correction */
2209         }
2210     safe_decode:
2211 #endif
2212
2213         /* Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE */
2214         DEBUGLOG(6, "using safe decode loop");
2215         while (1) {
2216             assert(ip < iend);
2217             token = *ip++;
2218             length = token >> ML_BITS;  /* literal length */
2219             DEBUGLOG(7, "blockPos%6u: litLength token = %u", (unsigned)(op-(BYTE*)dst), (unsigned)length);
2220
2221             /* A two-stage shortcut for the most common case:
2222              * 1) If the literal length is 0..14, and there is enough space,
2223              * enter the shortcut and copy 16 bytes on behalf of the literals
2224              * (in the fast mode, only 8 bytes can be safely copied this way).
2225              * 2) Further if the match length is 4..18, copy 18 bytes in a similar
2226              * manner; but we ensure that there's enough space in the output for
2227              * those 18 bytes earlier, upon entering the shortcut (in other words,
2228              * there is a combined check for both stages).
2229              */
2230             if ( (length != RUN_MASK)
2231                 /* strictly "less than" on input, to re-enter the loop with at least one byte */
2232               && likely((ip < shortiend) & (op <= shortoend)) ) {
2233                 /* Copy the literals */
2234                 LZ4_memcpy(op, ip, 16);
2235                 op += length; ip += length;
2236
2237                 /* The second stage: prepare for match copying, decode full info.
2238                  * If it doesn't work out, the info won't be wasted. */
2239                 length = token & ML_MASK; /* match length */
2240                 DEBUGLOG(7, "blockPos%6u: matchLength token = %u (len=%u)", (unsigned)(op-(BYTE*)dst), (unsigned)length, (unsigned)length + 4);
2241                 offset = LZ4_readLE16(ip); ip += 2;
2242                 match = op - offset;
2243                 assert(match <= op); /* check overflow */
2244
2245                 /* Do not deal with overlapping matches. */
2246                 if ( (length != ML_MASK)
2247                   && (offset >= 8)
2248                   && (dict==withPrefix64k || match >= lowPrefix) ) {
2249                     /* Copy the match. */
2250                     LZ4_memcpy(op + 0, match + 0, 8);
2251                     LZ4_memcpy(op + 8, match + 8, 8);
2252                     LZ4_memcpy(op +16, match +16, 2);
2253                     op += length + MINMATCH;
2254                     /* Both stages worked, load the next token. */
2255                     continue;
2256                 }
2257
2258                 /* The second stage didn't work out, but the info is ready.
2259                  * Propel it right to the point of match copying. */
2260                 goto _copy_match;
2261             }
2262
2263             /* decode literal length */
2264             if (length == RUN_MASK) {
2265                 size_t const addl = read_variable_length(&ip, iend-RUN_MASK, 1);
2266                 if (addl == rvl_error) { goto _output_error; }
2267                 length += addl;
2268                 if (unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
2269                 if (unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
2270             }
2271
2272 #if LZ4_FAST_DEC_LOOP
2273         safe_literal_copy:
2274 #endif
2275             /* copy literals */
2276             cpy = op+length;
2277
2278             LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
2279             if ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) {
2280                 /* We've either hit the input parsing restriction or the output parsing restriction.
2281                  * In the normal scenario, decoding a full block, it must be the last sequence,
2282                  * otherwise it's an error (invalid input or dimensions).
2283                  * In partialDecoding scenario, it's necessary to ensure there is no buffer overflow.
2284                  */
2285                 if (partialDecoding) {
2286                     /* Since we are partial decoding we may be in this block because of the output parsing
2287                      * restriction, which is not valid since the output buffer is allowed to be undersized.
2288                      */
2289                     DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end")
2290                     DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length);
2291                     DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op));
2292                     DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip));
2293                     /* Finishing in the middle of a literals segment,
2294                      * due to lack of input.
2295                      */
2296                     if (ip+length > iend) {
2297                         length = (size_t)(iend-ip);
2298                         cpy = op + length;
2299                     }
2300                     /* Finishing in the middle of a literals segment,
2301                      * due to lack of output space.
2302                      */
2303                     if (cpy > oend) {
2304                         cpy = oend;
2305                         assert(op<=oend);
2306                         length = (size_t)(oend-op);
2307                     }
2308                 } else {
2309                      /* We must be on the last sequence (or invalid) because of the parsing limitations
2310                       * so check that we exactly consume the input and don't overrun the output buffer.
2311                       */
2312                     if ((ip+length != iend) || (cpy > oend)) {
2313                         DEBUGLOG(5, "should have been last run of literals")
2314                         DEBUGLOG(5, "ip(%p) + length(%i) = %p != iend (%p)", ip, (int)length, ip+length, iend);
2315                         DEBUGLOG(5, "or cpy(%p) > (oend-MFLIMIT)(%p)", cpy, oend-MFLIMIT);
2316                         DEBUGLOG(5, "after writing %u bytes / %i bytes available", (unsigned)(op-(BYTE*)dst), outputSize);
2317                         goto _output_error;
2318                     }
2319                 }
2320                 LZ4_memmove(op, ip, length);  /* supports overlapping memory regions, for in-place decompression scenarios */
2321                 ip += length;
2322                 op += length;
2323                 /* Necessarily EOF when !partialDecoding.
2324                  * When partialDecoding, it is EOF if we've either
2325                  * filled the output buffer or
2326                  * can't proceed with reading an offset for following match.
2327                  */
2328                 if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) {
2329                     break;
2330                 }
2331             } else {
2332                 LZ4_wildCopy8(op, ip, cpy);   /* can overwrite up to 8 bytes beyond cpy */
2333                 ip += length; op = cpy;
2334             }
2335
2336             /* get offset */
2337             offset = LZ4_readLE16(ip); ip+=2;
2338             match = op - offset;
2339
2340             /* get matchlength */
2341             length = token & ML_MASK;
2342             DEBUGLOG(7, "blockPos%6u: matchLength token = %u", (unsigned)(op-(BYTE*)dst), (unsigned)length);
2343
2344     _copy_match:
2345             if (length == ML_MASK) {
2346                 size_t const addl = read_variable_length(&ip, iend - LASTLITERALS + 1, 0);
2347                 if (addl == rvl_error) { goto _output_error; }
2348                 length += addl;
2349                 if (unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error;   /* overflow detection */
2350             }
2351             length += MINMATCH;
2352
2353 #if LZ4_FAST_DEC_LOOP
2354         safe_match_copy:
2355 #endif
2356             if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error;   /* Error : offset outside buffers */
2357             /* match starting within external dictionary */
2358             if ((dict==usingExtDict) && (match < lowPrefix)) {
2359                 assert(dictEnd != NULL);
2360                 if (unlikely(op+length > oend-LASTLITERALS)) {
2361                     if (partialDecoding) length = MIN(length, (size_t)(oend-op));
2362                     else goto _output_error;   /* doesn't respect parsing restriction */
2363                 }
2364
2365                 if (length <= (size_t)(lowPrefix-match)) {
2366                     /* match fits entirely within external dictionary : just copy */
2367                     LZ4_memmove(op, dictEnd - (lowPrefix-match), length);
2368                     op += length;
2369                 } else {
2370                     /* match stretches into both external dictionary and current block */
2371                     size_t const copySize = (size_t)(lowPrefix - match);
2372                     size_t const restSize = length - copySize;
2373                     LZ4_memcpy(op, dictEnd - copySize, copySize);
2374                     op += copySize;
2375                     if (restSize > (size_t)(op - lowPrefix)) {  /* overlap copy */
2376                         BYTE* const endOfMatch = op + restSize;
2377                         const BYTE* copyFrom = lowPrefix;
2378                         while (op < endOfMatch) *op++ = *copyFrom++;
2379                     } else {
2380                         LZ4_memcpy(op, lowPrefix, restSize);
2381                         op += restSize;
2382                 }   }
2383                 continue;
2384             }
2385             assert(match >= lowPrefix);
2386
2387             /* copy match within block */
2388             cpy = op + length;
2389
2390             /* partialDecoding : may end anywhere within the block */
2391             assert(op<=oend);
2392             if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
2393                 size_t const mlen = MIN(length, (size_t)(oend-op));
2394                 const BYTE* const matchEnd = match + mlen;
2395                 BYTE* const copyEnd = op + mlen;
2396                 if (matchEnd > op) {   /* overlap copy */
2397                     while (op < copyEnd) { *op++ = *match++; }
2398                 } else {
2399                     LZ4_memcpy(op, match, mlen);
2400                 }
2401                 op = copyEnd;
2402                 if (op == oend) { break; }
2403                 continue;
2404             }
2405
2406             if (unlikely(offset<8)) {
2407                 LZ4_write32(op, 0);   /* silence msan warning when offset==0 */
2408                 op[0] = match[0];
2409                 op[1] = match[1];
2410                 op[2] = match[2];
2411                 op[3] = match[3];
2412                 match += inc32table[offset];
2413                 LZ4_memcpy(op+4, match, 4);
2414                 match -= dec64table[offset];
2415             } else {
2416                 LZ4_memcpy(op, match, 8);
2417                 match += 8;
2418             }
2419             op += 8;
2420
2421             if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
2422                 BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1);
2423                 if (cpy > oend-LASTLITERALS) { goto _output_error; } /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
2424                 if (op < oCopyLimit) {
2425                     LZ4_wildCopy8(op, match, oCopyLimit);
2426                     match += oCopyLimit - op;
2427                     op = oCopyLimit;
2428                 }
2429                 while (op < cpy) { *op++ = *match++; }
2430             } else {
2431                 LZ4_memcpy(op, match, 8);
2432                 if (length > 16) { LZ4_wildCopy8(op+8, match+8, cpy); }
2433             }
2434             op = cpy;   /* wildcopy correction */
2435         }
2436
2437         /* end of decoding */
2438         DEBUGLOG(5, "decoded %i bytes", (int) (((char*)op)-dst));
2439         return (int) (((char*)op)-dst);     /* Nb of output bytes decoded */
2440
2441         /* Overflow error detected */
2442     _output_error:
2443         return (int) (-(((const char*)ip)-src))-1;
2444     }
2445 }
2446
2447
2448 /*===== Instantiate the API decoding functions. =====*/
2449
2450 LZ4_FORCE_O2
2451 int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
2452 {
2453     return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize,
2454                                   decode_full_block, noDict,
2455                                   (BYTE*)dest, NULL, 0);
2456 }
2457
2458 LZ4_FORCE_O2
2459 int LZ4_decompress_safe_partial(const char* src, char* dst, int compressedSize, int targetOutputSize, int dstCapacity)
2460 {
2461     dstCapacity = MIN(targetOutputSize, dstCapacity);
2462     return LZ4_decompress_generic(src, dst, compressedSize, dstCapacity,
2463                                   partial_decode,
2464                                   noDict, (BYTE*)dst, NULL, 0);
2465 }
2466
2467 LZ4_FORCE_O2
2468 int LZ4_decompress_fast(const char* source, char* dest, int originalSize)
2469 {
2470     DEBUGLOG(5, "LZ4_decompress_fast");
2471     return LZ4_decompress_unsafe_generic(
2472                 (const BYTE*)source, (BYTE*)dest, originalSize,
2473                 0, NULL, 0);
2474 }
2475
2476 /*===== Instantiate a few more decoding cases, used more than once. =====*/
2477
2478 LZ4_FORCE_O2 /* Exported, an obsolete API function. */
2479 int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize)
2480 {
2481     return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
2482                                   decode_full_block, withPrefix64k,
2483                                   (BYTE*)dest - 64 KB, NULL, 0);
2484 }
2485
2486 LZ4_FORCE_O2
2487 static int LZ4_decompress_safe_partial_withPrefix64k(const char* source, char* dest, int compressedSize, int targetOutputSize, int dstCapacity)
2488 {
2489     dstCapacity = MIN(targetOutputSize, dstCapacity);
2490     return LZ4_decompress_generic(source, dest, compressedSize, dstCapacity,
2491                                   partial_decode, withPrefix64k,
2492                                   (BYTE*)dest - 64 KB, NULL, 0);
2493 }
2494
2495 /* Another obsolete API function, paired with the previous one. */
2496 int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize)
2497 {
2498     return LZ4_decompress_unsafe_generic(
2499                 (const BYTE*)source, (BYTE*)dest, originalSize,
2500                 64 KB, NULL, 0);
2501 }
2502
2503 LZ4_FORCE_O2
2504 static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, int compressedSize, int maxOutputSize,
2505                                                size_t prefixSize)
2506 {
2507     return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
2508                                   decode_full_block, noDict,
2509                                   (BYTE*)dest-prefixSize, NULL, 0);
2510 }
2511
2512 LZ4_FORCE_O2
2513 static int LZ4_decompress_safe_partial_withSmallPrefix(const char* source, char* dest, int compressedSize, int targetOutputSize, int dstCapacity,
2514                                                size_t prefixSize)
2515 {
2516     dstCapacity = MIN(targetOutputSize, dstCapacity);
2517     return LZ4_decompress_generic(source, dest, compressedSize, dstCapacity,
2518                                   partial_decode, noDict,
2519                                   (BYTE*)dest-prefixSize, NULL, 0);
2520 }
2521
2522 LZ4_FORCE_O2
2523 int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
2524                                      int compressedSize, int maxOutputSize,
2525                                      const void* dictStart, size_t dictSize)
2526 {
2527     DEBUGLOG(5, "LZ4_decompress_safe_forceExtDict");
2528     return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
2529                                   decode_full_block, usingExtDict,
2530                                   (BYTE*)dest, (const BYTE*)dictStart, dictSize);
2531 }
2532
2533 LZ4_FORCE_O2
2534 int LZ4_decompress_safe_partial_forceExtDict(const char* source, char* dest,
2535                                      int compressedSize, int targetOutputSize, int dstCapacity,
2536                                      const void* dictStart, size_t dictSize)
2537 {
2538     dstCapacity = MIN(targetOutputSize, dstCapacity);
2539     return LZ4_decompress_generic(source, dest, compressedSize, dstCapacity,
2540                                   partial_decode, usingExtDict,
2541                                   (BYTE*)dest, (const BYTE*)dictStart, dictSize);
2542 }
2543
2544 LZ4_FORCE_O2
2545 static int LZ4_decompress_fast_extDict(const char* source, char* dest, int originalSize,
2546                                        const void* dictStart, size_t dictSize)
2547 {
2548     return LZ4_decompress_unsafe_generic(
2549                 (const BYTE*)source, (BYTE*)dest, originalSize,
2550                 0, (const BYTE*)dictStart, dictSize);
2551 }
2552
2553 /* The "double dictionary" mode, for use with e.g. ring buffers: the first part
2554  * of the dictionary is passed as prefix, and the second via dictStart + dictSize.
2555  * These routines are used only once, in LZ4_decompress_*_continue().
2556  */
2557 LZ4_FORCE_INLINE
2558 int LZ4_decompress_safe_doubleDict(const char* source, char* dest, int compressedSize, int maxOutputSize,
2559                                    size_t prefixSize, const void* dictStart, size_t dictSize)
2560 {
2561     return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
2562                                   decode_full_block, usingExtDict,
2563                                   (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize);
2564 }
2565
2566 /*===== streaming decompression functions =====*/
2567
2568 #if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
2569 LZ4_streamDecode_t* LZ4_createStreamDecode(void)
2570 {
2571     LZ4_STATIC_ASSERT(sizeof(LZ4_streamDecode_t) >= sizeof(LZ4_streamDecode_t_internal));
2572     return (LZ4_streamDecode_t*) ALLOC_AND_ZERO(sizeof(LZ4_streamDecode_t));
2573 }
2574
2575 int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream)
2576 {
2577     if (LZ4_stream == NULL) { return 0; }  /* support free on NULL */
2578     FREEMEM(LZ4_stream);
2579     return 0;
2580 }
2581 #endif
2582
2583 /*! LZ4_setStreamDecode() :
2584  *  Use this function to instruct where to find the dictionary.
2585  *  This function is not necessary if previous data is still available where it was decoded.
2586  *  Loading a size of 0 is allowed (same effect as no dictionary).
2587  * @return : 1 if OK, 0 if error
2588  */
2589 int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize)
2590 {
2591     LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
2592     lz4sd->prefixSize = (size_t)dictSize;
2593     if (dictSize) {
2594         assert(dictionary != NULL);
2595         lz4sd->prefixEnd = (const BYTE*) dictionary + dictSize;
2596     } else {
2597         lz4sd->prefixEnd = (const BYTE*) dictionary;
2598     }
2599     lz4sd->externalDict = NULL;
2600     lz4sd->extDictSize  = 0;
2601     return 1;
2602 }
2603
2604 /*! LZ4_decoderRingBufferSize() :
2605  *  when setting a ring buffer for streaming decompression (optional scenario),
2606  *  provides the minimum size of this ring buffer
2607  *  to be compatible with any source respecting maxBlockSize condition.
2608  *  Note : in a ring buffer scenario,
2609  *  blocks are presumed decompressed next to each other.
2610  *  When not enough space remains for next block (remainingSize < maxBlockSize),
2611  *  decoding resumes from beginning of ring buffer.
2612  * @return : minimum ring buffer size,
2613  *           or 0 if there is an error (invalid maxBlockSize).
2614  */
2615 int LZ4_decoderRingBufferSize(int maxBlockSize)
2616 {
2617     if (maxBlockSize < 0) return 0;
2618     if (maxBlockSize > LZ4_MAX_INPUT_SIZE) return 0;
2619     if (maxBlockSize < 16) maxBlockSize = 16;
2620     return LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize);
2621 }
2622
2623 /*
2624 *_continue() :
2625     These decoding functions allow decompression of multiple blocks in "streaming" mode.
2626     Previously decoded blocks must still be available at the memory position where they were decoded.
2627     If it's not possible, save the relevant part of decoded data into a safe buffer,
2628     and indicate where it stands using LZ4_setStreamDecode()
2629 */
2630 LZ4_FORCE_O2
2631 int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize)
2632 {
2633     LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
2634     int result;
2635
2636     if (lz4sd->prefixSize == 0) {
2637         /* The first call, no dictionary yet. */
2638         assert(lz4sd->extDictSize == 0);
2639         result = LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize);
2640         if (result <= 0) return result;
2641         lz4sd->prefixSize = (size_t)result;
2642         lz4sd->prefixEnd = (BYTE*)dest + result;
2643     } else if (lz4sd->prefixEnd == (BYTE*)dest) {
2644         /* They're rolling the current segment. */
2645         if (lz4sd->prefixSize >= 64 KB - 1)
2646             result = LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize);
2647         else if (lz4sd->extDictSize == 0)
2648             result = LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize,
2649                                                          lz4sd->prefixSize);
2650         else
2651             result = LZ4_decompress_safe_doubleDict(source, dest, compressedSize, maxOutputSize,
2652                                                     lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
2653         if (result <= 0) return result;
2654         lz4sd->prefixSize += (size_t)result;
2655         lz4sd->prefixEnd  += result;
2656     } else {
2657         /* The buffer wraps around, or they're switching to another buffer. */
2658         lz4sd->extDictSize = lz4sd->prefixSize;
2659         lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
2660         result = LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize,
2661                                                   lz4sd->externalDict, lz4sd->extDictSize);
2662         if (result <= 0) return result;
2663         lz4sd->prefixSize = (size_t)result;
2664         lz4sd->prefixEnd  = (BYTE*)dest + result;
2665     }
2666
2667     return result;
2668 }
2669
2670 LZ4_FORCE_O2 int
2671 LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode,
2672                         const char* source, char* dest, int originalSize)
2673 {
2674     LZ4_streamDecode_t_internal* const lz4sd =
2675         (assert(LZ4_streamDecode!=NULL), &LZ4_streamDecode->internal_donotuse);
2676     int result;
2677
2678     DEBUGLOG(5, "LZ4_decompress_fast_continue (toDecodeSize=%i)", originalSize);
2679     assert(originalSize >= 0);
2680
2681     if (lz4sd->prefixSize == 0) {
2682         DEBUGLOG(5, "first invocation : no prefix nor extDict");
2683         assert(lz4sd->extDictSize == 0);
2684         result = LZ4_decompress_fast(source, dest, originalSize);
2685         if (result <= 0) return result;
2686         lz4sd->prefixSize = (size_t)originalSize;
2687         lz4sd->prefixEnd = (BYTE*)dest + originalSize;
2688     } else if (lz4sd->prefixEnd == (BYTE*)dest) {
2689         DEBUGLOG(5, "continue using existing prefix");
2690         result = LZ4_decompress_unsafe_generic(
2691                         (const BYTE*)source, (BYTE*)dest, originalSize,
2692                         lz4sd->prefixSize,
2693                         lz4sd->externalDict, lz4sd->extDictSize);
2694         if (result <= 0) return result;
2695         lz4sd->prefixSize += (size_t)originalSize;
2696         lz4sd->prefixEnd  += originalSize;
2697     } else {
2698         DEBUGLOG(5, "prefix becomes extDict");
2699         lz4sd->extDictSize = lz4sd->prefixSize;
2700         lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
2701         result = LZ4_decompress_fast_extDict(source, dest, originalSize,
2702                                              lz4sd->externalDict, lz4sd->extDictSize);
2703         if (result <= 0) return result;
2704         lz4sd->prefixSize = (size_t)originalSize;
2705         lz4sd->prefixEnd  = (BYTE*)dest + originalSize;
2706     }
2707
2708     return result;
2709 }
2710
2711
2712 /*
2713 Advanced decoding functions :
2714 *_usingDict() :
2715     These decoding functions work the same as "_continue" ones,
2716     the dictionary must be explicitly provided within parameters
2717 */
2718
2719 int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
2720 {
2721     if (dictSize==0)
2722         return LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize);
2723     if (dictStart+dictSize == dest) {
2724         if (dictSize >= 64 KB - 1) {
2725             return LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize);
2726         }
2727         assert(dictSize >= 0);
2728         return LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, (size_t)dictSize);
2729     }
2730     assert(dictSize >= 0);
2731     return LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, dictStart, (size_t)dictSize);
2732 }
2733
2734 int LZ4_decompress_safe_partial_usingDict(const char* source, char* dest, int compressedSize, int targetOutputSize, int dstCapacity, const char* dictStart, int dictSize)
2735 {
2736     if (dictSize==0)
2737         return LZ4_decompress_safe_partial(source, dest, compressedSize, targetOutputSize, dstCapacity);
2738     if (dictStart+dictSize == dest) {
2739         if (dictSize >= 64 KB - 1) {
2740             return LZ4_decompress_safe_partial_withPrefix64k(source, dest, compressedSize, targetOutputSize, dstCapacity);
2741         }
2742         assert(dictSize >= 0);
2743         return LZ4_decompress_safe_partial_withSmallPrefix(source, dest, compressedSize, targetOutputSize, dstCapacity, (size_t)dictSize);
2744     }
2745     assert(dictSize >= 0);
2746     return LZ4_decompress_safe_partial_forceExtDict(source, dest, compressedSize, targetOutputSize, dstCapacity, dictStart, (size_t)dictSize);
2747 }
2748
2749 int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize)
2750 {
2751     if (dictSize==0 || dictStart+dictSize == dest)
2752         return LZ4_decompress_unsafe_generic(
2753                         (const BYTE*)source, (BYTE*)dest, originalSize,
2754                         (size_t)dictSize, NULL, 0);
2755     assert(dictSize >= 0);
2756     return LZ4_decompress_fast_extDict(source, dest, originalSize, dictStart, (size_t)dictSize);
2757 }
2758
2759
2760 /*=*************************************************
2761 *  Obsolete Functions
2762 ***************************************************/
2763 /* obsolete compression functions */
2764 int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize)
2765 {
2766     return LZ4_compress_default(source, dest, inputSize, maxOutputSize);
2767 }
2768 int LZ4_compress(const char* src, char* dest, int srcSize)
2769 {
2770     return LZ4_compress_default(src, dest, srcSize, LZ4_compressBound(srcSize));
2771 }
2772 int LZ4_compress_limitedOutput_withState (void* state, const char* src, char* dst, int srcSize, int dstSize)
2773 {
2774     return LZ4_compress_fast_extState(state, src, dst, srcSize, dstSize, 1);
2775 }
2776 int LZ4_compress_withState (void* state, const char* src, char* dst, int srcSize)
2777 {
2778     return LZ4_compress_fast_extState(state, src, dst, srcSize, LZ4_compressBound(srcSize), 1);
2779 }
2780 int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* src, char* dst, int srcSize, int dstCapacity)
2781 {
2782     return LZ4_compress_fast_continue(LZ4_stream, src, dst, srcSize, dstCapacity, 1);
2783 }
2784 int LZ4_compress_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize)
2785 {
2786     return LZ4_compress_fast_continue(LZ4_stream, source, dest, inputSize, LZ4_compressBound(inputSize), 1);
2787 }
2788
2789 /*
2790 These decompression functions are deprecated and should no longer be used.
2791 They are only provided here for compatibility with older user programs.
2792 - LZ4_uncompress is totally equivalent to LZ4_decompress_fast
2793 - LZ4_uncompress_unknownOutputSize is totally equivalent to LZ4_decompress_safe
2794 */
2795 int LZ4_uncompress (const char* source, char* dest, int outputSize)
2796 {
2797     return LZ4_decompress_fast(source, dest, outputSize);
2798 }
2799 int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize)
2800 {
2801     return LZ4_decompress_safe(source, dest, isize, maxOutputSize);
2802 }
2803
2804 /* Obsolete Streaming functions */
2805
2806 int LZ4_sizeofStreamState(void) { return sizeof(LZ4_stream_t); }
2807
2808 int LZ4_resetStreamState(void* state, char* inputBuffer)
2809 {
2810     (void)inputBuffer;
2811     LZ4_resetStream((LZ4_stream_t*)state);
2812     return 0;
2813 }
2814
2815 #if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
2816 void* LZ4_create (char* inputBuffer)
2817 {
2818     (void)inputBuffer;
2819     return LZ4_createStream();
2820 }
2821 #endif
2822
2823 char* LZ4_slideInputBuffer (void* state)
2824 {
2825     /* avoid const char * -> char * conversion warning */
2826     return (char *)(uptrval)((LZ4_stream_t*)state)->internal_donotuse.dictionary;
2827 }
2828
2829 #endif   /* LZ4_COMMONDEFS_ONLY */