mfbt/lz4/lz4.c

   1 /*
   2    LZ4 - Fast LZ compression algorithm
   3    Copyright (C) 2011-2020, Yann Collet.
   4
   5    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
   6
   7    Redistribution and use in source and binary forms, with or without
   8    modification, are permitted provided that the following conditions are
   9    met:
  10
  11        * Redistributions of source code must retain the above copyright
  12    notice, this list of conditions and the following disclaimer.
  13        * Redistributions in binary form must reproduce the above
  14    copyright notice, this list of conditions and the following disclaimer
  15    in the documentation and/or other materials provided with the
  16    distribution.
  17
  18    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  22    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  23    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  24    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30    You can contact the author at :
  31     - LZ4 homepage : http://www.lz4.org
  32     - LZ4 source repository : https://github.com/lz4/lz4
  33 */
  34
  35 /*-************************************
  36 *  Tuning parameters
  37 **************************************/
  38 /*
  39  * LZ4_HEAPMODE :
  40  * Select how default compression functions will allocate memory for their hash table,
  41  * in memory stack (0:default, fastest), or in memory heap (1:requires malloc()).
  42  */
  43 #ifndef LZ4_HEAPMODE
  44 #  define LZ4_HEAPMODE 0
  45 #endif
  46
  47 /*
  48  * LZ4_ACCELERATION_DEFAULT :
  49  * Select "acceleration" for LZ4_compress_fast() when parameter value <= 0
  50  */
  51 #define LZ4_ACCELERATION_DEFAULT 1
  52 /*
  53  * LZ4_ACCELERATION_MAX :
  54  * Any "acceleration" value higher than this threshold
  55  * get treated as LZ4_ACCELERATION_MAX instead (fix #876)
  56  */
  57 #define LZ4_ACCELERATION_MAX 65537
  58
  59
  60 /*-************************************
  61 *  CPU Feature Detection
  62 **************************************/
  63 /* LZ4_FORCE_MEMORY_ACCESS
  64  * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
  65  * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
  66  * The below switch allow to select different access method for improved performance.
  67  * Method 0 (default) : use `memcpy()`. Safe and portable.
  68  * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
  69  *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
  70  * Method 2 : direct access. This method is portable but violate C standard.
  71  *            It can generate buggy code on targets which assembly generation depends on alignment.
  72  *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
  73  * See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
  74  * Prefer these methods in priority order (0 > 1 > 2)
  75  */
  76 #ifndef LZ4_FORCE_MEMORY_ACCESS   /* can be defined externally */
  77 #  if defined(__GNUC__) && \
  78   ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) \
  79   || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
  80 #    define LZ4_FORCE_MEMORY_ACCESS 2
  81 #  elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__)
  82 #    define LZ4_FORCE_MEMORY_ACCESS 1
  83 #  endif
  84 #endif
  85
  86 /*
  87  * LZ4_FORCE_SW_BITCOUNT
  88  * Define this parameter if your target system or compiler does not support hardware bit count
  89  */
  90 #if defined(_MSC_VER) && defined(_WIN32_WCE)   /* Visual Studio for WinCE doesn't support Hardware bit count */
  91 #  undef  LZ4_FORCE_SW_BITCOUNT  /* avoid double def */
  92 #  define LZ4_FORCE_SW_BITCOUNT
  93 #endif
  94
  95
  96
  97 /*-************************************
  98 *  Dependency
  99 **************************************/
 100 /*
 101  * LZ4_SRC_INCLUDED:
 102  * Amalgamation flag, whether lz4.c is included
 103  */
 104 #ifndef LZ4_SRC_INCLUDED
 105 #  define LZ4_SRC_INCLUDED 1
 106 #endif
 107
 108 #ifndef LZ4_STATIC_LINKING_ONLY
 109 #define LZ4_STATIC_LINKING_ONLY
 110 #endif
 111
 112 #ifndef LZ4_DISABLE_DEPRECATE_WARNINGS
 113 #define LZ4_DISABLE_DEPRECATE_WARNINGS /* due to LZ4_decompress_safe_withPrefix64k */
 114 #endif
 115
 116 #define LZ4_STATIC_LINKING_ONLY  /* LZ4_DISTANCE_MAX */
 117 #include "lz4.h"
 118 /* see also "memory routines" below */
 119
 120
 121 /*-************************************
 122 *  Compiler Options
 123 **************************************/
 124 #if defined(_MSC_VER) && (_MSC_VER >= 1400)  /* Visual Studio 2005+ */
 125 #  include <intrin.h>               /* only present in VS2005+ */
 126 #  pragma warning(disable : 4127)   /* disable: C4127: conditional expression is constant */
 127 #  pragma warning(disable : 6237)   /* disable: C6237: conditional expression is always 0 */
 128 #endif  /* _MSC_VER */
 129
 130 #ifndef LZ4_FORCE_INLINE
 131 #  ifdef _MSC_VER    /* Visual Studio */
 132 #    define LZ4_FORCE_INLINE static __forceinline
 133 #  else
 134 #    if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
 135 #      ifdef __GNUC__
 136 #        define LZ4_FORCE_INLINE static inline __attribute__((always_inline))
 137 #      else
 138 #        define LZ4_FORCE_INLINE static inline
 139 #      endif
 140 #    else
 141 #      define LZ4_FORCE_INLINE static
 142 #    endif /* __STDC_VERSION__ */
 143 #  endif  /* _MSC_VER */
 144 #endif /* LZ4_FORCE_INLINE */
 145
 146 /* LZ4_FORCE_O2 and LZ4_FORCE_INLINE
 147  * gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8,
 148  * together with a simple 8-byte copy loop as a fall-back path.
 149  * However, this optimization hurts the decompression speed by >30%,
 150  * because the execution does not go to the optimized loop
 151  * for typical compressible data, and all of the preamble checks
 152  * before going to the fall-back path become useless overhead.
 153  * This optimization happens only with the -O3 flag, and -O2 generates
 154  * a simple 8-byte copy loop.
 155  * With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy8
 156  * functions are annotated with __attribute__((optimize("O2"))),
 157  * and also LZ4_wildCopy8 is forcibly inlined, so that the O2 attribute
 158  * of LZ4_wildCopy8 does not affect the compression speed.
 159  */
 160 #if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__)
 161 #  define LZ4_FORCE_O2  __attribute__((optimize("O2")))
 162 #  undef LZ4_FORCE_INLINE
 163 #  define LZ4_FORCE_INLINE  static __inline __attribute__((optimize("O2"),always_inline))
 164 #else
 165 #  define LZ4_FORCE_O2
 166 #endif
 167
 168 #if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__)
 169 #  define expect(expr,value)    (__builtin_expect ((expr),(value)) )
 170 #else
 171 #  define expect(expr,value)    (expr)
 172 #endif
 173
 174 #ifndef likely
 175 #define likely(expr)     expect((expr) != 0, 1)
 176 #endif
 177 #ifndef unlikely
 178 #define unlikely(expr)   expect((expr) != 0, 0)
 179 #endif
 180
 181 /* Should the alignment test prove unreliable, for some reason,
 182  * it can be disabled by setting LZ4_ALIGN_TEST to 0 */
 183 #ifndef LZ4_ALIGN_TEST  /* can be externally provided */
 184 # define LZ4_ALIGN_TEST 1
 185 #endif
 186
 187
 188 /*-************************************
 189 *  Memory routines
 190 **************************************/
 191
 192 /*! LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION :
 193  *  Disable relatively high-level LZ4/HC functions that use dynamic memory
 194  *  allocation functions (malloc(), calloc(), free()).
 195  *
 196  *  Note that this is a compile-time switch. And since it disables
 197  *  public/stable LZ4 v1 API functions, we don't recommend using this
 198  *  symbol to generate a library for distribution.
 199  *
 200  *  The following public functions are removed when this symbol is defined.
 201  *  - lz4   : LZ4_createStream, LZ4_freeStream,
 202  *            LZ4_createStreamDecode, LZ4_freeStreamDecode, LZ4_create (deprecated)
 203  *  - lz4hc : LZ4_createStreamHC, LZ4_freeStreamHC,
 204  *            LZ4_createHC (deprecated), LZ4_freeHC  (deprecated)
 205  *  - lz4frame, lz4file : All LZ4F_* functions
 206  */
 207 #if defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
 208 #  define ALLOC(s)          lz4_error_memory_allocation_is_disabled
 209 #  define ALLOC_AND_ZERO(s) lz4_error_memory_allocation_is_disabled
 210 #  define FREEMEM(p)        lz4_error_memory_allocation_is_disabled
 211 #elif defined(LZ4_USER_MEMORY_FUNCTIONS)
 212 /* memory management functions can be customized by user project.
 213  * Below functions must exist somewhere in the Project
 214  * and be available at link time */
 215 void* LZ4_malloc(size_t s);
 216 void* LZ4_calloc(size_t n, size_t s);
 217 void  LZ4_free(void* p);
 218 # define ALLOC(s)          LZ4_malloc(s)
 219 # define ALLOC_AND_ZERO(s) LZ4_calloc(1,s)
 220 # define FREEMEM(p)        LZ4_free(p)
 221 #else
 222 # include <stdlib.h>   /* malloc, calloc, free */
 223 # define ALLOC(s)          malloc(s)
 224 # define ALLOC_AND_ZERO(s) calloc(1,s)
 225 # define FREEMEM(p)        free(p)
 226 #endif
 227
 228 #if ! LZ4_FREESTANDING
 229 #  include <string.h>   /* memset, memcpy */
 230 #endif
 231 #if !defined(LZ4_memset)
 232 #  define LZ4_memset(p,v,s) memset((p),(v),(s))
 233 #endif
 234 #define MEM_INIT(p,v,s)   LZ4_memset((p),(v),(s))
 235
 236
 237 /*-************************************
 238 *  Common Constants
 239 **************************************/
 240 #define MINMATCH 4
 241
 242 #define WILDCOPYLENGTH 8
 243 #define LASTLITERALS   5   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
 244 #define MFLIMIT       12   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
 245 #define MATCH_SAFEGUARD_DISTANCE  ((2*WILDCOPYLENGTH) - MINMATCH)   /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */
 246 #define FASTLOOP_SAFE_DISTANCE 64
 247 static const int LZ4_minLength = (MFLIMIT+1);
 248
 249 #define KB *(1 <<10)
 250 #define MB *(1 <<20)
 251 #define GB *(1U<<30)
 252
 253 #define LZ4_DISTANCE_ABSOLUTE_MAX 65535
 254 #if (LZ4_DISTANCE_MAX > LZ4_DISTANCE_ABSOLUTE_MAX)   /* max supported by LZ4 format */
 255 #  error "LZ4_DISTANCE_MAX is too big : must be <= 65535"
 256 #endif
 257
 258 #define ML_BITS  4
 259 #define ML_MASK  ((1U<<ML_BITS)-1)
 260 #define RUN_BITS (8-ML_BITS)
 261 #define RUN_MASK ((1U<<RUN_BITS)-1)
 262
 263
 264 /*-************************************
 265 *  Error detection
 266 **************************************/
 267 #if defined(LZ4_DEBUG) && (LZ4_DEBUG>=1)
 268 #  include <assert.h>
 269 #else
 270 #  ifndef assert
 271 #    define assert(condition) ((void)0)
 272 #  endif
 273 #endif
 274
 275 #define LZ4_STATIC_ASSERT(c)   { enum { LZ4_static_assert = 1/(int)(!!(c)) }; }   /* use after variable declarations */
 276
 277 #if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2)
 278 #  include <stdio.h>
 279    static int g_debuglog_enable = 1;
 280 #  define DEBUGLOG(l, ...) {                          \
 281         if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) {  \
 282             fprintf(stderr, __FILE__ ": ");           \
 283             fprintf(stderr, __VA_ARGS__);             \
 284             fprintf(stderr, " \n");                   \
 285     }   }
 286 #else
 287 #  define DEBUGLOG(l, ...) {}    /* disabled */
 288 #endif
 289
 290 static int LZ4_isAligned(const void* ptr, size_t alignment)
 291 {
 292     return ((size_t)ptr & (alignment -1)) == 0;
 293 }
 294
 295
 296 /*-************************************
 297 *  Types
 298 **************************************/
 299 #include <limits.h>
 300 #if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
 301 # include <stdint.h>
 302   typedef  uint8_t BYTE;
 303   typedef uint16_t U16;
 304   typedef uint32_t U32;
 305   typedef  int32_t S32;
 306   typedef uint64_t U64;
 307   typedef uintptr_t uptrval;
 308 #else
 309 # if UINT_MAX != 4294967295UL
 310 #   error "LZ4 code (when not C++ or C99) assumes that sizeof(int) == 4"
 311 # endif
 312   typedef unsigned char       BYTE;
 313   typedef unsigned short      U16;
 314   typedef unsigned int        U32;
 315   typedef   signed int        S32;
 316   typedef unsigned long long  U64;
 317   typedef size_t              uptrval;   /* generally true, except OpenVMS-64 */
 318 #endif
 319
 320 #if defined(__x86_64__)
 321   typedef U64    reg_t;   /* 64-bits in x32 mode */
 322 #else
 323   typedef size_t reg_t;   /* 32-bits in x32 mode */
 324 #endif
 325
 326 typedef enum {
 327     notLimited = 0,
 328     limitedOutput = 1,
 329     fillOutput = 2
 330 } limitedOutput_directive;
 331
 332
 333 /*-************************************
 334 *  Reading and writing into memory
 335 **************************************/
 336
 337 /**
 338  * LZ4 relies on memcpy with a constant size being inlined. In freestanding
 339  * environments, the compiler can't assume the implementation of memcpy() is
 340  * standard compliant, so it can't apply its specialized memcpy() inlining
 341  * logic. When possible, use __builtin_memcpy() to tell the compiler to analyze
 342  * memcpy() as if it were standard compliant, so it can inline it in freestanding
 343  * environments. This is needed when decompressing the Linux Kernel, for example.
 344  */
 345 #if !defined(LZ4_memcpy)
 346 #  if defined(__GNUC__) && (__GNUC__ >= 4)
 347 #    define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size)
 348 #  else
 349 #    define LZ4_memcpy(dst, src, size) memcpy(dst, src, size)
 350 #  endif
 351 #endif
 352
 353 #if !defined(LZ4_memmove)
 354 #  if defined(__GNUC__) && (__GNUC__ >= 4)
 355 #    define LZ4_memmove __builtin_memmove
 356 #  else
 357 #    define LZ4_memmove memmove
 358 #  endif
 359 #endif
 360
 361 static unsigned LZ4_isLittleEndian(void)
 362 {
 363     const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental */
 364     return one.c[0];
 365 }
 366
 367
 368 #if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2)
 369 /* lie to the compiler about data alignment; use with caution */
 370
 371 static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; }
 372 static U32 LZ4_read32(const void* memPtr) { return *(const U32*) memPtr; }
 373 static reg_t LZ4_read_ARCH(const void* memPtr) { return *(const reg_t*) memPtr; }
 374
 375 static void LZ4_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
 376 static void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
 377
 378 #elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1)
 379
 380 /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
 381 /* currently only defined for gcc and icc */
 382 typedef union { U16 u16; U32 u32; reg_t uArch; } __attribute__((packed)) LZ4_unalign;
 383
 384 static U16 LZ4_read16(const void* ptr) { return ((const LZ4_unalign*)ptr)->u16; }
 385 static U32 LZ4_read32(const void* ptr) { return ((const LZ4_unalign*)ptr)->u32; }
 386 static reg_t LZ4_read_ARCH(const void* ptr) { return ((const LZ4_unalign*)ptr)->uArch; }
 387
 388 static void LZ4_write16(void* memPtr, U16 value) { ((LZ4_unalign*)memPtr)->u16 = value; }
 389 static void LZ4_write32(void* memPtr, U32 value) { ((LZ4_unalign*)memPtr)->u32 = value; }
 390
 391 #else  /* safe and portable access using memcpy() */
 392
 393 static U16 LZ4_read16(const void* memPtr)
 394 {
 395     U16 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
 396 }
 397
 398 static U32 LZ4_read32(const void* memPtr)
 399 {
 400     U32 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
 401 }
 402
 403 static reg_t LZ4_read_ARCH(const void* memPtr)
 404 {
 405     reg_t val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
 406 }
 407
 408 static void LZ4_write16(void* memPtr, U16 value)
 409 {
 410     LZ4_memcpy(memPtr, &value, sizeof(value));
 411 }
 412
 413 static void LZ4_write32(void* memPtr, U32 value)
 414 {
 415     LZ4_memcpy(memPtr, &value, sizeof(value));
 416 }
 417
 418 #endif /* LZ4_FORCE_MEMORY_ACCESS */
 419
 420
 421 static U16 LZ4_readLE16(const void* memPtr)
 422 {
 423     if (LZ4_isLittleEndian()) {
 424         return LZ4_read16(memPtr);
 425     } else {
 426         const BYTE* p = (const BYTE*)memPtr;
 427         return (U16)((U16)p[0] + (p[1]<<8));
 428     }
 429 }
 430
 431 static void LZ4_writeLE16(void* memPtr, U16 value)
 432 {
 433     if (LZ4_isLittleEndian()) {
 434         LZ4_write16(memPtr, value);
 435     } else {
 436         BYTE* p = (BYTE*)memPtr;
 437         p[0] = (BYTE) value;
 438         p[1] = (BYTE)(value>>8);
 439     }
 440 }
 441
 442 /* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */
 443 LZ4_FORCE_INLINE
 444 void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd)
 445 {
 446     BYTE* d = (BYTE*)dstPtr;
 447     const BYTE* s = (const BYTE*)srcPtr;
 448     BYTE* const e = (BYTE*)dstEnd;
 449
 450     do { LZ4_memcpy(d,s,8); d+=8; s+=8; } while (d<e);
 451 }
 452
 453 static const unsigned inc32table[8] = {0, 1, 2,  1,  0,  4, 4, 4};
 454 static const int      dec64table[8] = {0, 0, 0, -1, -4,  1, 2, 3};
 455
 456
 457 #ifndef LZ4_FAST_DEC_LOOP
 458 #  if defined __i386__ || defined _M_IX86 || defined __x86_64__ || defined _M_X64
 459 #    define LZ4_FAST_DEC_LOOP 1
 460 #  elif defined(__aarch64__) && defined(__APPLE__)
 461 #    define LZ4_FAST_DEC_LOOP 1
 462 #  elif defined(__aarch64__) && !defined(__clang__)
 463      /* On non-Apple aarch64, we disable this optimization for clang because
 464       * on certain mobile chipsets, performance is reduced with clang. For
 465       * more information refer to https://github.com/lz4/lz4/pull/707 */
 466 #    define LZ4_FAST_DEC_LOOP 1
 467 #  else
 468 #    define LZ4_FAST_DEC_LOOP 0
 469 #  endif
 470 #endif
 471
 472 #if LZ4_FAST_DEC_LOOP
 473
 474 LZ4_FORCE_INLINE void
 475 LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
 476 {
 477     assert(srcPtr + offset == dstPtr);
 478     if (offset < 8) {
 479         LZ4_write32(dstPtr, 0);   /* silence an msan warning when offset==0 */
 480         dstPtr[0] = srcPtr[0];
 481         dstPtr[1] = srcPtr[1];
 482         dstPtr[2] = srcPtr[2];
 483         dstPtr[3] = srcPtr[3];
 484         srcPtr += inc32table[offset];
 485         LZ4_memcpy(dstPtr+4, srcPtr, 4);
 486         srcPtr -= dec64table[offset];
 487         dstPtr += 8;
 488     } else {
 489         LZ4_memcpy(dstPtr, srcPtr, 8);
 490         dstPtr += 8;
 491         srcPtr += 8;
 492     }
 493
 494     LZ4_wildCopy8(dstPtr, srcPtr, dstEnd);
 495 }
 496
 497 /* customized variant of memcpy, which can overwrite up to 32 bytes beyond dstEnd
 498  * this version copies two times 16 bytes (instead of one time 32 bytes)
 499  * because it must be compatible with offsets >= 16. */
 500 LZ4_FORCE_INLINE void
 501 LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd)
 502 {
 503     BYTE* d = (BYTE*)dstPtr;
 504     const BYTE* s = (const BYTE*)srcPtr;
 505     BYTE* const e = (BYTE*)dstEnd;
 506
 507     do { LZ4_memcpy(d,s,16); LZ4_memcpy(d+16,s+16,16); d+=32; s+=32; } while (d<e);
 508 }
 509
 510 /* LZ4_memcpy_using_offset()  presumes :
 511  * - dstEnd >= dstPtr + MINMATCH
 512  * - there is at least 8 bytes available to write after dstEnd */
 513 LZ4_FORCE_INLINE void
 514 LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
 515 {
 516     BYTE v[8];
 517
 518     assert(dstEnd >= dstPtr + MINMATCH);
 519
 520     switch(offset) {
 521     case 1:
 522         MEM_INIT(v, *srcPtr, 8);
 523         break;
 524     case 2:
 525         LZ4_memcpy(v, srcPtr, 2);
 526         LZ4_memcpy(&v[2], srcPtr, 2);
 527 #if defined(_MSC_VER) && (_MSC_VER <= 1933) /* MSVC 2022 ver 17.3 or earlier */
 528 #  pragma warning(push)
 529 #  pragma warning(disable : 6385) /* warning C6385: Reading invalid data from 'v'. */
 530 #endif
 531         LZ4_memcpy(&v[4], v, 4);
 532 #if defined(_MSC_VER) && (_MSC_VER <= 1933) /* MSVC 2022 ver 17.3 or earlier */
 533 #  pragma warning(pop)
 534 #endif
 535         break;
 536     case 4:
 537         LZ4_memcpy(v, srcPtr, 4);
 538         LZ4_memcpy(&v[4], srcPtr, 4);
 539         break;
 540     default:
 541         LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset);
 542         return;
 543     }
 544
 545     LZ4_memcpy(dstPtr, v, 8);
 546     dstPtr += 8;
 547     while (dstPtr < dstEnd) {
 548         LZ4_memcpy(dstPtr, v, 8);
 549         dstPtr += 8;
 550     }
 551 }
 552 #endif
 553
 554
 555 /*-************************************
 556 *  Common functions
 557 **************************************/
 558 static unsigned LZ4_NbCommonBytes (reg_t val)
 559 {
 560     assert(val != 0);
 561     if (LZ4_isLittleEndian()) {
 562         if (sizeof(val) == 8) {
 563 #       if defined(_MSC_VER) && (_MSC_VER >= 1800) && (defined(_M_AMD64) && !defined(_M_ARM64EC)) && !defined(LZ4_FORCE_SW_BITCOUNT)
 564 /*-*************************************************************************************************
 565 * ARM64EC is a Microsoft-designed ARM64 ABI compatible with AMD64 applications on ARM64 Windows 11.
 566 * The ARM64EC ABI does not support AVX/AVX2/AVX512 instructions, nor their relevant intrinsics
 567 * including _tzcnt_u64. Therefore, we need to neuter the _tzcnt_u64 code path for ARM64EC.
 568 ****************************************************************************************************/
 569 #         if defined(__clang__) && (__clang_major__ < 10)
 570             /* Avoid undefined clang-cl intrinsics issue.
 571              * See https://github.com/lz4/lz4/pull/1017 for details. */
 572             return (unsigned)__builtin_ia32_tzcnt_u64(val) >> 3;
 573 #         else
 574             /* x64 CPUS without BMI support interpret `TZCNT` as `REP BSF` */
 575             return (unsigned)_tzcnt_u64(val) >> 3;
 576 #         endif
 577 #       elif defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
 578             unsigned long r = 0;
 579             _BitScanForward64(&r, (U64)val);
 580             return (unsigned)r >> 3;
 581 #       elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
 582                             ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
 583                                         !defined(LZ4_FORCE_SW_BITCOUNT)
 584             return (unsigned)__builtin_ctzll((U64)val) >> 3;
 585 #       else
 586             const U64 m = 0x0101010101010101ULL;
 587             val ^= val - 1;
 588             return (unsigned)(((U64)((val & (m - 1)) * m)) >> 56);
 589 #       endif
 590         } else /* 32 bits */ {
 591 #       if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(LZ4_FORCE_SW_BITCOUNT)
 592             unsigned long r;
 593             _BitScanForward(&r, (U32)val);
 594             return (unsigned)r >> 3;
 595 #       elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
 596                             ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
 597                         !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT)
 598             return (unsigned)__builtin_ctz((U32)val) >> 3;
 599 #       else
 600             const U32 m = 0x01010101;
 601             return (unsigned)((((val - 1) ^ val) & (m - 1)) * m) >> 24;
 602 #       endif
 603         }
 604     } else   /* Big Endian CPU */ {
 605         if (sizeof(val)==8) {
 606 #       if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
 607                             ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
 608                         !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT)
 609             return (unsigned)__builtin_clzll((U64)val) >> 3;
 610 #       else
 611 #if 1
 612             /* this method is probably faster,
 613              * but adds a 128 bytes lookup table */
 614             static const unsigned char ctz7_tab[128] = {
 615                 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
 616                 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
 617                 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
 618                 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
 619                 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
 620                 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
 621                 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
 622                 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
 623             };
 624             U64 const mask = 0x0101010101010101ULL;
 625             U64 const t = (((val >> 8) - mask) | val) & mask;
 626             return ctz7_tab[(t * 0x0080402010080402ULL) >> 57];
 627 #else
 628             /* this method doesn't consume memory space like the previous one,
 629              * but it contains several branches,
 630              * that may end up slowing execution */
 631             static const U32 by32 = sizeof(val)*4;  /* 32 on 64 bits (goal), 16 on 32 bits.
 632             Just to avoid some static analyzer complaining about shift by 32 on 32-bits target.
 633             Note that this code path is never triggered in 32-bits mode. */
 634             unsigned r;
 635             if (!(val>>by32)) { r=4; } else { r=0; val>>=by32; }
 636             if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
 637             r += (!val);
 638             return r;
 639 #endif
 640 #       endif
 641         } else /* 32 bits */ {
 642 #       if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
 643                             ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
 644                                         !defined(LZ4_FORCE_SW_BITCOUNT)
 645             return (unsigned)__builtin_clz((U32)val) >> 3;
 646 #       else
 647             val >>= 8;
 648             val = ((((val + 0x00FFFF00) | 0x00FFFFFF) + val) |
 649               (val + 0x00FF0000)) >> 24;
 650             return (unsigned)val ^ 3;
 651 #       endif
 652         }
 653     }
 654 }
 655
 656
 657 #define STEPSIZE sizeof(reg_t)
 658 LZ4_FORCE_INLINE
 659 unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
 660 {
 661     const BYTE* const pStart = pIn;
 662
 663     if (likely(pIn < pInLimit-(STEPSIZE-1))) {
 664         reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
 665         if (!diff) {
 666             pIn+=STEPSIZE; pMatch+=STEPSIZE;
 667         } else {
 668             return LZ4_NbCommonBytes(diff);
 669     }   }
 670
 671     while (likely(pIn < pInLimit-(STEPSIZE-1))) {
 672         reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
 673         if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; continue; }
 674         pIn += LZ4_NbCommonBytes(diff);
 675         return (unsigned)(pIn - pStart);
 676     }
 677
 678     if ((STEPSIZE==8) && (pIn<(pInLimit-3)) && (LZ4_read32(pMatch) == LZ4_read32(pIn))) { pIn+=4; pMatch+=4; }
 679     if ((pIn<(pInLimit-1)) && (LZ4_read16(pMatch) == LZ4_read16(pIn))) { pIn+=2; pMatch+=2; }
 680     if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
 681     return (unsigned)(pIn - pStart);
 682 }
 683
 684
 685 #ifndef LZ4_COMMONDEFS_ONLY
 686 /*-************************************
 687 *  Local Constants
 688 **************************************/
 689 static const int LZ4_64Klimit = ((64 KB) + (MFLIMIT-1));
 690 static const U32 LZ4_skipTrigger = 6;  /* Increase this value ==> compression run slower on incompressible data */
 691
 692
 693 /*-************************************
 694 *  Local Structures and types
 695 **************************************/
 696 typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t;
 697
 698 /**
 699  * This enum distinguishes several different modes of accessing previous
 700  * content in the stream.
 701  *
 702  * - noDict        : There is no preceding content.
 703  * - withPrefix64k : Table entries up to ctx->dictSize before the current blob
 704  *                   blob being compressed are valid and refer to the preceding
 705  *                   content (of length ctx->dictSize), which is available
 706  *                   contiguously preceding in memory the content currently
 707  *                   being compressed.
 708  * - usingExtDict  : Like withPrefix64k, but the preceding content is somewhere
 709  *                   else in memory, starting at ctx->dictionary with length
 710  *                   ctx->dictSize.
 711  * - usingDictCtx  : Everything concerning the preceding content is
 712  *                   in a separate context, pointed to by ctx->dictCtx.
 713  *                   ctx->dictionary, ctx->dictSize, and table entries
 714  *                   in the current context that refer to positions
 715  *                   preceding the beginning of the current compression are
 716  *                   ignored. Instead, ctx->dictCtx->dictionary and ctx->dictCtx
 717  *                   ->dictSize describe the location and size of the preceding
 718  *                   content, and matches are found by looking in the ctx
 719  *                   ->dictCtx->hashTable.
 720  */
 721 typedef enum { noDict = 0, withPrefix64k, usingExtDict, usingDictCtx } dict_directive;
 722 typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
 723
 724
 725 /*-************************************
 726 *  Local Utils
 727 **************************************/
 728 int LZ4_versionNumber (void) { return LZ4_VERSION_NUMBER; }
 729 const char* LZ4_versionString(void) { return LZ4_VERSION_STRING; }
 730 int LZ4_compressBound(int isize)  { return LZ4_COMPRESSBOUND(isize); }
 731 int LZ4_sizeofState(void) { return sizeof(LZ4_stream_t); }
 732
 733
 734 /*-****************************************
 735 *  Internal Definitions, used only in Tests
 736 *******************************************/
 737 #if defined (__cplusplus)
 738 extern "C" {
 739 #endif
 740
 741 int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize);
 742
 743 int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
 744                                      int compressedSize, int maxOutputSize,
 745                                      const void* dictStart, size_t dictSize);
 746 int LZ4_decompress_safe_partial_forceExtDict(const char* source, char* dest,
 747                                      int compressedSize, int targetOutputSize, int dstCapacity,
 748                                      const void* dictStart, size_t dictSize);
 749 #if defined (__cplusplus)
 750 }
 751 #endif
 752
 753 /*-******************************
 754 *  Compression functions
 755 ********************************/
 756 LZ4_FORCE_INLINE U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
 757 {
 758     if (tableType == byU16)
 759         return ((sequence * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1)));
 760     else
 761         return ((sequence * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG));
 762 }
 763
 764 LZ4_FORCE_INLINE U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
 765 {
 766     const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG;
 767     if (LZ4_isLittleEndian()) {
 768         const U64 prime5bytes = 889523592379ULL;
 769         return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog));
 770     } else {
 771         const U64 prime8bytes = 11400714785074694791ULL;
 772         return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog));
 773     }
 774 }
 775
 776 LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tableType)
 777 {
 778     if ((sizeof(reg_t)==8) && (tableType != byU16)) return LZ4_hash5(LZ4_read_ARCH(p), tableType);
 779     return LZ4_hash4(LZ4_read32(p), tableType);
 780 }
 781
 782 LZ4_FORCE_INLINE void LZ4_clearHash(U32 h, void* tableBase, tableType_t const tableType)
 783 {
 784     switch (tableType)
 785     {
 786     default: /* fallthrough */
 787     case clearedTable: { /* illegal! */ assert(0); return; }
 788     case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = NULL; return; }
 789     case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = 0; return; }
 790     case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = 0; return; }
 791     }
 792 }
 793
 794 LZ4_FORCE_INLINE void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t const tableType)
 795 {
 796     switch (tableType)
 797     {
 798     default: /* fallthrough */
 799     case clearedTable: /* fallthrough */
 800     case byPtr: { /* illegal! */ assert(0); return; }
 801     case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = idx; return; }
 802     case byU16: { U16* hashTable = (U16*) tableBase; assert(idx < 65536); hashTable[h] = (U16)idx; return; }
 803     }
 804 }
 805
 806 LZ4_FORCE_INLINE void LZ4_putPositionOnHash(const BYTE* p, U32 h,
 807                                   void* tableBase, tableType_t const tableType,
 808                             const BYTE* srcBase)
 809 {
 810     switch (tableType)
 811     {
 812     case clearedTable: { /* illegal! */ assert(0); return; }
 813     case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = p; return; }
 814     case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); return; }
 815     case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); return; }
 816     }
 817 }
 818
 819 LZ4_FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
 820 {
 821     U32 const h = LZ4_hashPosition(p, tableType);
 822     LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
 823 }
 824
 825 /* LZ4_getIndexOnHash() :
 826  * Index of match position registered in hash table.
 827  * hash position must be calculated by using base+index, or dictBase+index.
 828  * Assumption 1 : only valid if tableType == byU32 or byU16.
 829  * Assumption 2 : h is presumed valid (within limits of hash table)
 830  */
 831 LZ4_FORCE_INLINE U32 LZ4_getIndexOnHash(U32 h, const void* tableBase, tableType_t tableType)
 832 {
 833     LZ4_STATIC_ASSERT(LZ4_MEMORY_USAGE > 2);
 834     if (tableType == byU32) {
 835         const U32* const hashTable = (const U32*) tableBase;
 836         assert(h < (1U << (LZ4_MEMORY_USAGE-2)));
 837         return hashTable[h];
 838     }
 839     if (tableType == byU16) {
 840         const U16* const hashTable = (const U16*) tableBase;
 841         assert(h < (1U << (LZ4_MEMORY_USAGE-1)));
 842         return hashTable[h];
 843     }
 844     assert(0); return 0;  /* forbidden case */
 845 }
 846
 847 static const BYTE* LZ4_getPositionOnHash(U32 h, const void* tableBase, tableType_t tableType, const BYTE* srcBase)
 848 {
 849     if (tableType == byPtr) { const BYTE* const* hashTable = (const BYTE* const*) tableBase; return hashTable[h]; }
 850     if (tableType == byU32) { const U32* const hashTable = (const U32*) tableBase; return hashTable[h] + srcBase; }
 851     { const U16* const hashTable = (const U16*) tableBase; return hashTable[h] + srcBase; }   /* default, to ensure a return */
 852 }
 853
 854 LZ4_FORCE_INLINE const BYTE*
 855 LZ4_getPosition(const BYTE* p,
 856                 const void* tableBase, tableType_t tableType,
 857                 const BYTE* srcBase)
 858 {
 859     U32 const h = LZ4_hashPosition(p, tableType);
 860     return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
 861 }
 862
 863 LZ4_FORCE_INLINE void
 864 LZ4_prepareTable(LZ4_stream_t_internal* const cctx,
 865            const int inputSize,
 866            const tableType_t tableType) {
 867     /* If the table hasn't been used, it's guaranteed to be zeroed out, and is
 868      * therefore safe to use no matter what mode we're in. Otherwise, we figure
 869      * out if it's safe to leave as is or whether it needs to be reset.
 870      */
 871     if ((tableType_t)cctx->tableType != clearedTable) {
 872         assert(inputSize >= 0);
 873         if ((tableType_t)cctx->tableType != tableType
 874           || ((tableType == byU16) && cctx->currentOffset + (unsigned)inputSize >= 0xFFFFU)
 875           || ((tableType == byU32) && cctx->currentOffset > 1 GB)
 876           || tableType == byPtr
 877           || inputSize >= 4 KB)
 878         {
 879             DEBUGLOG(4, "LZ4_prepareTable: Resetting table in %p", cctx);
 880             MEM_INIT(cctx->hashTable, 0, LZ4_HASHTABLESIZE);
 881             cctx->currentOffset = 0;
 882             cctx->tableType = (U32)clearedTable;
 883         } else {
 884             DEBUGLOG(4, "LZ4_prepareTable: Re-use hash table (no reset)");
 885         }
 886     }
 887
 888     /* Adding a gap, so all previous entries are > LZ4_DISTANCE_MAX back,
 889      * is faster than compressing without a gap.
 890      * However, compressing with currentOffset == 0 is faster still,
 891      * so we preserve that case.
 892      */
 893     if (cctx->currentOffset != 0 && tableType == byU32) {
 894         DEBUGLOG(5, "LZ4_prepareTable: adding 64KB to currentOffset");
 895         cctx->currentOffset += 64 KB;
 896     }
 897
 898     /* Finally, clear history */
 899     cctx->dictCtx = NULL;
 900     cctx->dictionary = NULL;
 901     cctx->dictSize = 0;
 902 }
 903
 904 /** LZ4_compress_generic() :
 905  *  inlined, to ensure branches are decided at compilation time.
 906  *  Presumed already validated at this stage:
 907  *  - source != NULL
 908  *  - inputSize > 0
 909  */
 910 LZ4_FORCE_INLINE int LZ4_compress_generic_validated(
 911                  LZ4_stream_t_internal* const cctx,
 912                  const char* const source,
 913                  char* const dest,
 914                  const int inputSize,
 915                  int*  inputConsumed, /* only written when outputDirective == fillOutput */
 916                  const int maxOutputSize,
 917                  const limitedOutput_directive outputDirective,
 918                  const tableType_t tableType,
 919                  const dict_directive dictDirective,
 920                  const dictIssue_directive dictIssue,
 921                  const int acceleration)
 922 {
 923     int result;
 924     const BYTE* ip = (const BYTE*) source;
 925
 926     U32 const startIndex = cctx->currentOffset;
 927     const BYTE* base = (const BYTE*) source - startIndex;
 928     const BYTE* lowLimit;
 929
 930     const LZ4_stream_t_internal* dictCtx = (const LZ4_stream_t_internal*) cctx->dictCtx;
 931     const BYTE* const dictionary =
 932         dictDirective == usingDictCtx ? dictCtx->dictionary : cctx->dictionary;
 933     const U32 dictSize =
 934         dictDirective == usingDictCtx ? dictCtx->dictSize : cctx->dictSize;
 935     const U32 dictDelta = (dictDirective == usingDictCtx) ? startIndex - dictCtx->currentOffset : 0;   /* make indexes in dictCtx comparable with index in current context */
 936
 937     int const maybe_extMem = (dictDirective == usingExtDict) || (dictDirective == usingDictCtx);
 938     U32 const prefixIdxLimit = startIndex - dictSize;   /* used when dictDirective == dictSmall */
 939     const BYTE* const dictEnd = dictionary ? dictionary + dictSize : dictionary;
 940     const BYTE* anchor = (const BYTE*) source;
 941     const BYTE* const iend = ip + inputSize;
 942     const BYTE* const mflimitPlusOne = iend - MFLIMIT + 1;
 943     const BYTE* const matchlimit = iend - LASTLITERALS;
 944
 945     /* the dictCtx currentOffset is indexed on the start of the dictionary,
 946      * while a dictionary in the current context precedes the currentOffset */
 947     const BYTE* dictBase = (dictionary == NULL) ? NULL :
 948                            (dictDirective == usingDictCtx) ?
 949                             dictionary + dictSize - dictCtx->currentOffset :
 950                             dictionary + dictSize - startIndex;
 951
 952     BYTE* op = (BYTE*) dest;
 953     BYTE* const olimit = op + maxOutputSize;
 954
 955     U32 offset = 0;
 956     U32 forwardH;
 957
 958     DEBUGLOG(5, "LZ4_compress_generic_validated: srcSize=%i, tableType=%u", inputSize, tableType);
 959     assert(ip != NULL);
 960     /* If init conditions are not met, we don't have to mark stream
 961      * as having dirty context, since no action was taken yet */
 962     if (outputDirective == fillOutput && maxOutputSize < 1) { return 0; } /* Impossible to store anything */
 963     if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) { return 0; }  /* Size too large (not within 64K limit) */
 964     if (tableType==byPtr) assert(dictDirective==noDict);      /* only supported use case with byPtr */
 965     assert(acceleration >= 1);
 966
 967     lowLimit = (const BYTE*)source - (dictDirective == withPrefix64k ? dictSize : 0);
 968
 969     /* Update context state */
 970     if (dictDirective == usingDictCtx) {
 971         /* Subsequent linked blocks can't use the dictionary. */
 972         /* Instead, they use the block we just compressed. */
 973         cctx->dictCtx = NULL;
 974         cctx->dictSize = (U32)inputSize;
 975     } else {
 976         cctx->dictSize += (U32)inputSize;
 977     }
 978     cctx->currentOffset += (U32)inputSize;
 979     cctx->tableType = (U32)tableType;
 980
 981     if (inputSize<LZ4_minLength) goto _last_literals;        /* Input too small, no compression (all literals) */
 982
 983     /* First Byte */
 984     LZ4_putPosition(ip, cctx->hashTable, tableType, base);
 985     ip++; forwardH = LZ4_hashPosition(ip, tableType);
 986
 987     /* Main Loop */
 988     for ( ; ; ) {
 989         const BYTE* match;
 990         BYTE* token;
 991         const BYTE* filledIp;
 992
 993         /* Find a match */
 994         if (tableType == byPtr) {
 995             const BYTE* forwardIp = ip;
 996             int step = 1;
 997             int searchMatchNb = acceleration << LZ4_skipTrigger;
 998             do {
 999                 U32 const h = forwardH;
1000                 ip = forwardIp;
1001                 forwardIp += step;
1002                 step = (searchMatchNb++ >> LZ4_skipTrigger);
1003
1004                 if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals;
1005                 assert(ip < mflimitPlusOne);
1006
1007                 match = LZ4_getPositionOnHash(h, cctx->hashTable, tableType, base);
1008                 forwardH = LZ4_hashPosition(forwardIp, tableType);
1009                 LZ4_putPositionOnHash(ip, h, cctx->hashTable, tableType, base);
1010
1011             } while ( (match+LZ4_DISTANCE_MAX < ip)
1012                    || (LZ4_read32(match) != LZ4_read32(ip)) );
1013
1014         } else {   /* byU32, byU16 */
1015
1016             const BYTE* forwardIp = ip;
1017             int step = 1;
1018             int searchMatchNb = acceleration << LZ4_skipTrigger;
1019             do {
1020                 U32 const h = forwardH;
1021                 U32 const current = (U32)(forwardIp - base);
1022                 U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType);
1023                 assert(matchIndex <= current);
1024                 assert(forwardIp - base < (ptrdiff_t)(2 GB - 1));
1025                 ip = forwardIp;
1026                 forwardIp += step;
1027                 step = (searchMatchNb++ >> LZ4_skipTrigger);
1028
1029                 if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals;
1030                 assert(ip < mflimitPlusOne);
1031
1032                 if (dictDirective == usingDictCtx) {
1033                     if (matchIndex < startIndex) {
1034                         /* there was no match, try the dictionary */
1035                         assert(tableType == byU32);
1036                         matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32);
1037                         match = dictBase + matchIndex;
1038                         matchIndex += dictDelta;   /* make dictCtx index comparable with current context */
1039                         lowLimit = dictionary;
1040                     } else {
1041                         match = base + matchIndex;
1042                         lowLimit = (const BYTE*)source;
1043                     }
1044                 } else if (dictDirective == usingExtDict) {
1045                     if (matchIndex < startIndex) {
1046                         DEBUGLOG(7, "extDict candidate: matchIndex=%5u  <  startIndex=%5u", matchIndex, startIndex);
1047                         assert(startIndex - matchIndex >= MINMATCH);
1048                         assert(dictBase);
1049                         match = dictBase + matchIndex;
1050                         lowLimit = dictionary;
1051                     } else {
1052                         match = base + matchIndex;
1053                         lowLimit = (const BYTE*)source;
1054                     }
1055                 } else {   /* single continuous memory segment */
1056                     match = base + matchIndex;
1057                 }
1058                 forwardH = LZ4_hashPosition(forwardIp, tableType);
1059                 LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
1060
1061                 DEBUGLOG(7, "candidate at pos=%u  (offset=%u \n", matchIndex, current - matchIndex);
1062                 if ((dictIssue == dictSmall) && (matchIndex < prefixIdxLimit)) { continue; }    /* match outside of valid area */
1063                 assert(matchIndex < current);
1064                 if ( ((tableType != byU16) || (LZ4_DISTANCE_MAX < LZ4_DISTANCE_ABSOLUTE_MAX))
1065                   && (matchIndex+LZ4_DISTANCE_MAX < current)) {
1066                     continue;
1067                 } /* too far */
1068                 assert((current - matchIndex) <= LZ4_DISTANCE_MAX);  /* match now expected within distance */
1069
1070                 if (LZ4_read32(match) == LZ4_read32(ip)) {
1071                     if (maybe_extMem) offset = current - matchIndex;
1072                     break;   /* match found */
1073                 }
1074
1075             } while(1);
1076         }
1077
1078         /* Catch up */
1079         filledIp = ip;
1080         while (((ip>anchor) & (match > lowLimit)) && (unlikely(ip[-1]==match[-1]))) { ip--; match--; }
1081
1082         /* Encode Literals */
1083         {   unsigned const litLength = (unsigned)(ip - anchor);
1084             token = op++;
1085             if ((outputDirective == limitedOutput) &&  /* Check output buffer overflow */
1086                 (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)) ) {
1087                 return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
1088             }
1089             if ((outputDirective == fillOutput) &&
1090                 (unlikely(op + (litLength+240)/255 /* litlen */ + litLength /* literals */ + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit))) {
1091                 op--;
1092                 goto _last_literals;
1093             }
1094             if (litLength >= RUN_MASK) {
1095                 int len = (int)(litLength - RUN_MASK);
1096                 *token = (RUN_MASK<<ML_BITS);
1097                 for(; len >= 255 ; len-=255) *op++ = 255;
1098                 *op++ = (BYTE)len;
1099             }
1100             else *token = (BYTE)(litLength<<ML_BITS);
1101
1102             /* Copy Literals */
1103             LZ4_wildCopy8(op, anchor, op+litLength);
1104             op+=litLength;
1105             DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i",
1106                         (int)(anchor-(const BYTE*)source), litLength, (int)(ip-(const BYTE*)source));
1107         }
1108
1109 _next_match:
1110         /* at this stage, the following variables must be correctly set :
1111          * - ip : at start of LZ operation
1112          * - match : at start of previous pattern occurrence; can be within current prefix, or within extDict
1113          * - offset : if maybe_ext_memSegment==1 (constant)
1114          * - lowLimit : must be == dictionary to mean "match is within extDict"; must be == source otherwise
1115          * - token and *token : position to write 4-bits for match length; higher 4-bits for literal length supposed already written
1116          */
1117
1118         if ((outputDirective == fillOutput) &&
1119             (op + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit)) {
1120             /* the match was too close to the end, rewind and go to last literals */
1121             op = token;
1122             goto _last_literals;
1123         }
1124
1125         /* Encode Offset */
1126         if (maybe_extMem) {   /* static test */
1127             DEBUGLOG(6, "             with offset=%u  (ext if > %i)", offset, (int)(ip - (const BYTE*)source));
1128             assert(offset <= LZ4_DISTANCE_MAX && offset > 0);
1129             LZ4_writeLE16(op, (U16)offset); op+=2;
1130         } else  {
1131             DEBUGLOG(6, "             with offset=%u  (same segment)", (U32)(ip - match));
1132             assert(ip-match <= LZ4_DISTANCE_MAX);
1133             LZ4_writeLE16(op, (U16)(ip - match)); op+=2;
1134         }
1135
1136         /* Encode MatchLength */
1137         {   unsigned matchCode;
1138
1139             if ( (dictDirective==usingExtDict || dictDirective==usingDictCtx)
1140               && (lowLimit==dictionary) /* match within extDict */ ) {
1141                 const BYTE* limit = ip + (dictEnd-match);
1142                 assert(dictEnd > match);
1143                 if (limit > matchlimit) limit = matchlimit;
1144                 matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, limit);
1145                 ip += (size_t)matchCode + MINMATCH;
1146                 if (ip==limit) {
1147                     unsigned const more = LZ4_count(limit, (const BYTE*)source, matchlimit);
1148                     matchCode += more;
1149                     ip += more;
1150                 }
1151                 DEBUGLOG(6, "             with matchLength=%u starting in extDict", matchCode+MINMATCH);
1152             } else {
1153                 matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit);
1154                 ip += (size_t)matchCode + MINMATCH;
1155                 DEBUGLOG(6, "             with matchLength=%u", matchCode+MINMATCH);
1156             }
1157
1158             if ((outputDirective) &&    /* Check output buffer overflow */
1159                 (unlikely(op + (1 + LASTLITERALS) + (matchCode+240)/255 > olimit)) ) {
1160                 if (outputDirective == fillOutput) {
1161                     /* Match description too long : reduce it */
1162                     U32 newMatchCode = 15 /* in token */ - 1 /* to avoid needing a zero byte */ + ((U32)(olimit - op) - 1 - LASTLITERALS) * 255;
1163                     ip -= matchCode - newMatchCode;
1164                     assert(newMatchCode < matchCode);
1165                     matchCode = newMatchCode;
1166                     if (unlikely(ip <= filledIp)) {
1167                         /* We have already filled up to filledIp so if ip ends up less than filledIp
1168                          * we have positions in the hash table beyond the current position. This is
1169                          * a problem if we reuse the hash table. So we have to remove these positions
1170                          * from the hash table.
1171                          */
1172                         const BYTE* ptr;
1173                         DEBUGLOG(5, "Clearing %u positions", (U32)(filledIp - ip));
1174                         for (ptr = ip; ptr <= filledIp; ++ptr) {
1175                             U32 const h = LZ4_hashPosition(ptr, tableType);
1176                             LZ4_clearHash(h, cctx->hashTable, tableType);
1177                         }
1178                     }
1179                 } else {
1180                     assert(outputDirective == limitedOutput);
1181                     return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
1182                 }
1183             }
1184             if (matchCode >= ML_MASK) {
1185                 *token += ML_MASK;
1186                 matchCode -= ML_MASK;
1187                 LZ4_write32(op, 0xFFFFFFFF);
1188                 while (matchCode >= 4*255) {
1189                     op+=4;
1190                     LZ4_write32(op, 0xFFFFFFFF);
1191                     matchCode -= 4*255;
1192                 }
1193                 op += matchCode / 255;
1194                 *op++ = (BYTE)(matchCode % 255);
1195             } else
1196                 *token += (BYTE)(matchCode);
1197         }
1198         /* Ensure we have enough space for the last literals. */
1199         assert(!(outputDirective == fillOutput && op + 1 + LASTLITERALS > olimit));
1200
1201         anchor = ip;
1202
1203         /* Test end of chunk */
1204         if (ip >= mflimitPlusOne) break;
1205
1206         /* Fill table */
1207         LZ4_putPosition(ip-2, cctx->hashTable, tableType, base);
1208
1209         /* Test next position */
1210         if (tableType == byPtr) {
1211
1212             match = LZ4_getPosition(ip, cctx->hashTable, tableType, base);
1213             LZ4_putPosition(ip, cctx->hashTable, tableType, base);
1214             if ( (match+LZ4_DISTANCE_MAX >= ip)
1215               && (LZ4_read32(match) == LZ4_read32(ip)) )
1216             { token=op++; *token=0; goto _next_match; }
1217
1218         } else {   /* byU32, byU16 */
1219
1220             U32 const h = LZ4_hashPosition(ip, tableType);
1221             U32 const current = (U32)(ip-base);
1222             U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType);
1223             assert(matchIndex < current);
1224             if (dictDirective == usingDictCtx) {
1225                 if (matchIndex < startIndex) {
1226                     /* there was no match, try the dictionary */
1227                     matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32);
1228                     match = dictBase + matchIndex;
1229                     lowLimit = dictionary;   /* required for match length counter */
1230                     matchIndex += dictDelta;
1231                 } else {
1232                     match = base + matchIndex;
1233                     lowLimit = (const BYTE*)source;  /* required for match length counter */
1234                 }
1235             } else if (dictDirective==usingExtDict) {
1236                 if (matchIndex < startIndex) {
1237                     assert(dictBase);
1238                     match = dictBase + matchIndex;
1239                     lowLimit = dictionary;   /* required for match length counter */
1240                 } else {
1241                     match = base + matchIndex;
1242                     lowLimit = (const BYTE*)source;   /* required for match length counter */
1243                 }
1244             } else {   /* single memory segment */
1245                 match = base + matchIndex;
1246             }
1247             LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
1248             assert(matchIndex < current);
1249             if ( ((dictIssue==dictSmall) ? (matchIndex >= prefixIdxLimit) : 1)
1250               && (((tableType==byU16) && (LZ4_DISTANCE_MAX == LZ4_DISTANCE_ABSOLUTE_MAX)) ? 1 : (matchIndex+LZ4_DISTANCE_MAX >= current))
1251               && (LZ4_read32(match) == LZ4_read32(ip)) ) {
1252                 token=op++;
1253                 *token=0;
1254                 if (maybe_extMem) offset = current - matchIndex;
1255                 DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i",
1256                             (int)(anchor-(const BYTE*)source), 0, (int)(ip-(const BYTE*)source));
1257                 goto _next_match;
1258             }
1259         }
1260
1261         /* Prepare next loop */
1262         forwardH = LZ4_hashPosition(++ip, tableType);
1263
1264     }
1265
1266 _last_literals:
1267     /* Encode Last Literals */
1268     {   size_t lastRun = (size_t)(iend - anchor);
1269         if ( (outputDirective) &&  /* Check output buffer overflow */
1270             (op + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > olimit)) {
1271             if (outputDirective == fillOutput) {
1272                 /* adapt lastRun to fill 'dst' */
1273                 assert(olimit >= op);
1274                 lastRun  = (size_t)(olimit-op) - 1/*token*/;
1275                 lastRun -= (lastRun + 256 - RUN_MASK) / 256;  /*additional length tokens*/
1276             } else {
1277                 assert(outputDirective == limitedOutput);
1278                 return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
1279             }
1280         }
1281         DEBUGLOG(6, "Final literal run : %i literals", (int)lastRun);
1282         if (lastRun >= RUN_MASK) {
1283             size_t accumulator = lastRun - RUN_MASK;
1284             *op++ = RUN_MASK << ML_BITS;
1285             for(; accumulator >= 255 ; accumulator-=255) *op++ = 255;
1286             *op++ = (BYTE) accumulator;
1287         } else {
1288             *op++ = (BYTE)(lastRun<<ML_BITS);
1289         }
1290         LZ4_memcpy(op, anchor, lastRun);
1291         ip = anchor + lastRun;
1292         op += lastRun;
1293     }
1294
1295     if (outputDirective == fillOutput) {
1296         *inputConsumed = (int) (((const char*)ip)-source);
1297     }
1298     result = (int)(((char*)op) - dest);
1299     assert(result > 0);
1300     DEBUGLOG(5, "LZ4_compress_generic: compressed %i bytes into %i bytes", inputSize, result);
1301     return result;
1302 }
1303
1304 /** LZ4_compress_generic() :
1305  *  inlined, to ensure branches are decided at compilation time;
1306  *  takes care of src == (NULL, 0)
1307  *  and forward the rest to LZ4_compress_generic_validated */
1308 LZ4_FORCE_INLINE int LZ4_compress_generic(
1309                  LZ4_stream_t_internal* const cctx,
1310                  const char* const src,
1311                  char* const dst,
1312                  const int srcSize,
1313                  int *inputConsumed, /* only written when outputDirective == fillOutput */
1314                  const int dstCapacity,
1315                  const limitedOutput_directive outputDirective,
1316                  const tableType_t tableType,
1317                  const dict_directive dictDirective,
1318                  const dictIssue_directive dictIssue,
1319                  const int acceleration)
1320 {
1321     DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, dstCapacity=%i",
1322                 srcSize, dstCapacity);
1323
1324     if ((U32)srcSize > (U32)LZ4_MAX_INPUT_SIZE) { return 0; }  /* Unsupported srcSize, too large (or negative) */
1325     if (srcSize == 0) {   /* src == NULL supported if srcSize == 0 */
1326         if (outputDirective != notLimited && dstCapacity <= 0) return 0;  /* no output, can't write anything */
1327         DEBUGLOG(5, "Generating an empty block");
1328         assert(outputDirective == notLimited || dstCapacity >= 1);
1329         assert(dst != NULL);
1330         dst[0] = 0;
1331         if (outputDirective == fillOutput) {
1332             assert (inputConsumed != NULL);
1333             *inputConsumed = 0;
1334         }
1335         return 1;
1336     }
1337     assert(src != NULL);
1338
1339     return LZ4_compress_generic_validated(cctx, src, dst, srcSize,
1340                 inputConsumed, /* only written into if outputDirective == fillOutput */
1341                 dstCapacity, outputDirective,
1342                 tableType, dictDirective, dictIssue, acceleration);
1343 }
1344
1345
1346 int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
1347 {
1348     LZ4_stream_t_internal* const ctx = & LZ4_initStream(state, sizeof(LZ4_stream_t)) -> internal_donotuse;
1349     assert(ctx != NULL);
1350     if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
1351     if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
1352     if (maxOutputSize >= LZ4_compressBound(inputSize)) {
1353         if (inputSize < LZ4_64Klimit) {
1354             return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, byU16, noDict, noDictIssue, acceleration);
1355         } else {
1356             const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
1357             return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
1358         }
1359     } else {
1360         if (inputSize < LZ4_64Klimit) {
1361             return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration);
1362         } else {
1363             const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
1364             return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, noDict, noDictIssue, acceleration);
1365         }
1366     }
1367 }
1368
1369 /**
1370  * LZ4_compress_fast_extState_fastReset() :
1371  * A variant of LZ4_compress_fast_extState().
1372  *
1373  * Using this variant avoids an expensive initialization step. It is only safe
1374  * to call if the state buffer is known to be correctly initialized already
1375  * (see comment in lz4.h on LZ4_resetStream_fast() for a definition of
1376  * "correctly initialized").
1377  */
1378 int LZ4_compress_fast_extState_fastReset(void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration)
1379 {
1380     LZ4_stream_t_internal* ctx = &((LZ4_stream_t*)state)->internal_donotuse;
1381     if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
1382     if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
1383
1384     if (dstCapacity >= LZ4_compressBound(srcSize)) {
1385         if (srcSize < LZ4_64Klimit) {
1386             const tableType_t tableType = byU16;
1387             LZ4_prepareTable(ctx, srcSize, tableType);
1388             if (ctx->currentOffset) {
1389                 return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, dictSmall, acceleration);
1390             } else {
1391                 return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
1392             }
1393         } else {
1394             const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
1395             LZ4_prepareTable(ctx, srcSize, tableType);
1396             return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
1397         }
1398     } else {
1399         if (srcSize < LZ4_64Klimit) {
1400             const tableType_t tableType = byU16;
1401             LZ4_prepareTable(ctx, srcSize, tableType);
1402             if (ctx->currentOffset) {
1403                 return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, dictSmall, acceleration);
1404             } else {
1405                 return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration);
1406             }
1407         } else {
1408             const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
1409             LZ4_prepareTable(ctx, srcSize, tableType);
1410             return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration);
1411         }
1412     }
1413 }
1414
1415
1416 int LZ4_compress_fast(const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
1417 {
1418     int result;
1419 #if (LZ4_HEAPMODE)
1420     LZ4_stream_t* ctxPtr = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t));   /* malloc-calloc always properly aligned */
1421     if (ctxPtr == NULL) return 0;
1422 #else
1423     LZ4_stream_t ctx;
1424     LZ4_stream_t* const ctxPtr = &ctx;
1425 #endif
1426     result = LZ4_compress_fast_extState(ctxPtr, source, dest, inputSize, maxOutputSize, acceleration);
1427
1428 #if (LZ4_HEAPMODE)
1429     FREEMEM(ctxPtr);
1430 #endif
1431     return result;
1432 }
1433
1434
1435 int LZ4_compress_default(const char* src, char* dst, int srcSize, int maxOutputSize)
1436 {
1437     return LZ4_compress_fast(src, dst, srcSize, maxOutputSize, 1);
1438 }
1439
1440
1441 /* Note!: This function leaves the stream in an unclean/broken state!
1442  * It is not safe to subsequently use the same state with a _fastReset() or
1443  * _continue() call without resetting it. */
1444 static int LZ4_compress_destSize_extState (LZ4_stream_t* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize)
1445 {
1446     void* const s = LZ4_initStream(state, sizeof (*state));
1447     assert(s != NULL); (void)s;
1448
1449     if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) {  /* compression success is guaranteed */
1450         return LZ4_compress_fast_extState(state, src, dst, *srcSizePtr, targetDstSize, 1);
1451     } else {
1452         if (*srcSizePtr < LZ4_64Klimit) {
1453             return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, byU16, noDict, noDictIssue, 1);
1454         } else {
1455             tableType_t const addrMode = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
1456             return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, addrMode, noDict, noDictIssue, 1);
1457     }   }
1458 }
1459
1460
1461 int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targetDstSize)
1462 {
1463 #if (LZ4_HEAPMODE)
1464     LZ4_stream_t* ctx = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t));   /* malloc-calloc always properly aligned */
1465     if (ctx == NULL) return 0;
1466 #else
1467     LZ4_stream_t ctxBody;
1468     LZ4_stream_t* ctx = &ctxBody;
1469 #endif
1470
1471     int result = LZ4_compress_destSize_extState(ctx, src, dst, srcSizePtr, targetDstSize);
1472
1473 #if (LZ4_HEAPMODE)
1474     FREEMEM(ctx);
1475 #endif
1476     return result;
1477 }
1478
1479
1480
1481 /*-******************************
1482 *  Streaming functions
1483 ********************************/
1484
1485 #if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
1486 LZ4_stream_t* LZ4_createStream(void)
1487 {
1488     LZ4_stream_t* const lz4s = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t));
1489     LZ4_STATIC_ASSERT(sizeof(LZ4_stream_t) >= sizeof(LZ4_stream_t_internal));
1490     DEBUGLOG(4, "LZ4_createStream %p", lz4s);
1491     if (lz4s == NULL) return NULL;
1492     LZ4_initStream(lz4s, sizeof(*lz4s));
1493     return lz4s;
1494 }
1495 #endif
1496
1497 static size_t LZ4_stream_t_alignment(void)
1498 {
1499 #if LZ4_ALIGN_TEST
1500     typedef struct { char c; LZ4_stream_t t; } t_a;
1501     return sizeof(t_a) - sizeof(LZ4_stream_t);
1502 #else
1503     return 1;  /* effectively disabled */
1504 #endif
1505 }
1506
1507 LZ4_stream_t* LZ4_initStream (void* buffer, size_t size)
1508 {
1509     DEBUGLOG(5, "LZ4_initStream");
1510     if (buffer == NULL) { return NULL; }
1511     if (size < sizeof(LZ4_stream_t)) { return NULL; }
1512     if (!LZ4_isAligned(buffer, LZ4_stream_t_alignment())) return NULL;
1513     MEM_INIT(buffer, 0, sizeof(LZ4_stream_t_internal));
1514     return (LZ4_stream_t*)buffer;
1515 }
1516
1517 /* resetStream is now deprecated,
1518  * prefer initStream() which is more general */
1519 void LZ4_resetStream (LZ4_stream_t* LZ4_stream)
1520 {
1521     DEBUGLOG(5, "LZ4_resetStream (ctx:%p)", LZ4_stream);
1522     MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t_internal));
1523 }
1524
1525 void LZ4_resetStream_fast(LZ4_stream_t* ctx) {
1526     LZ4_prepareTable(&(ctx->internal_donotuse), 0, byU32);
1527 }
1528
1529 #if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
1530 int LZ4_freeStream (LZ4_stream_t* LZ4_stream)
1531 {
1532     if (!LZ4_stream) return 0;   /* support free on NULL */
1533     DEBUGLOG(5, "LZ4_freeStream %p", LZ4_stream);
1534     FREEMEM(LZ4_stream);
1535     return (0);
1536 }
1537 #endif
1538
1539
1540 #define HASH_UNIT sizeof(reg_t)
1541 int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize)
1542 {
1543     LZ4_stream_t_internal* dict = &LZ4_dict->internal_donotuse;
1544     const tableType_t tableType = byU32;
1545     const BYTE* p = (const BYTE*)dictionary;
1546     const BYTE* const dictEnd = p + dictSize;
1547     const BYTE* base;
1548
1549     DEBUGLOG(4, "LZ4_loadDict (%i bytes from %p into %p)", dictSize, dictionary, LZ4_dict);
1550
1551     /* It's necessary to reset the context,
1552      * and not just continue it with prepareTable()
1553      * to avoid any risk of generating overflowing matchIndex
1554      * when compressing using this dictionary */
1555     LZ4_resetStream(LZ4_dict);
1556
1557     /* We always increment the offset by 64 KB, since, if the dict is longer,
1558      * we truncate it to the last 64k, and if it's shorter, we still want to
1559      * advance by a whole window length so we can provide the guarantee that
1560      * there are only valid offsets in the window, which allows an optimization
1561      * in LZ4_compress_fast_continue() where it uses noDictIssue even when the
1562      * dictionary isn't a full 64k. */
1563     dict->currentOffset += 64 KB;
1564
1565     if (dictSize < (int)HASH_UNIT) {
1566         return 0;
1567     }
1568
1569     if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB;
1570     base = dictEnd - dict->currentOffset;
1571     dict->dictionary = p;
1572     dict->dictSize = (U32)(dictEnd - p);
1573     dict->tableType = (U32)tableType;
1574
1575     while (p <= dictEnd-HASH_UNIT) {
1576         LZ4_putPosition(p, dict->hashTable, tableType, base);
1577         p+=3;
1578     }
1579
1580     return (int)dict->dictSize;
1581 }
1582
1583 void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream)
1584 {
1585     const LZ4_stream_t_internal* dictCtx = (dictionaryStream == NULL) ? NULL :
1586         &(dictionaryStream->internal_donotuse);
1587
1588     DEBUGLOG(4, "LZ4_attach_dictionary (%p, %p, size %u)",
1589              workingStream, dictionaryStream,
1590              dictCtx != NULL ? dictCtx->dictSize : 0);
1591
1592     if (dictCtx != NULL) {
1593         /* If the current offset is zero, we will never look in the
1594          * external dictionary context, since there is no value a table
1595          * entry can take that indicate a miss. In that case, we need
1596          * to bump the offset to something non-zero.
1597          */
1598         if (workingStream->internal_donotuse.currentOffset == 0) {
1599             workingStream->internal_donotuse.currentOffset = 64 KB;
1600         }
1601
1602         /* Don't actually attach an empty dictionary.
1603          */
1604         if (dictCtx->dictSize == 0) {
1605             dictCtx = NULL;
1606         }
1607     }
1608     workingStream->internal_donotuse.dictCtx = dictCtx;
1609 }
1610
1611
1612 static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, int nextSize)
1613 {
1614     assert(nextSize >= 0);
1615     if (LZ4_dict->currentOffset + (unsigned)nextSize > 0x80000000) {   /* potential ptrdiff_t overflow (32-bits mode) */
1616         /* rescale hash table */
1617         U32 const delta = LZ4_dict->currentOffset - 64 KB;
1618         const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize;
1619         int i;
1620         DEBUGLOG(4, "LZ4_renormDictT");
1621         for (i=0; i<LZ4_HASH_SIZE_U32; i++) {
1622             if (LZ4_dict->hashTable[i] < delta) LZ4_dict->hashTable[i]=0;
1623             else LZ4_dict->hashTable[i] -= delta;
1624         }
1625         LZ4_dict->currentOffset = 64 KB;
1626         if (LZ4_dict->dictSize > 64 KB) LZ4_dict->dictSize = 64 KB;
1627         LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize;
1628     }
1629 }
1630
1631
1632 int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream,
1633                                 const char* source, char* dest,
1634                                 int inputSize, int maxOutputSize,
1635                                 int acceleration)
1636 {
1637     const tableType_t tableType = byU32;
1638     LZ4_stream_t_internal* const streamPtr = &LZ4_stream->internal_donotuse;
1639     const char* dictEnd = streamPtr->dictSize ? (const char*)streamPtr->dictionary + streamPtr->dictSize : NULL;
1640
1641     DEBUGLOG(5, "LZ4_compress_fast_continue (inputSize=%i, dictSize=%u)", inputSize, streamPtr->dictSize);
1642
1643     LZ4_renormDictT(streamPtr, inputSize);   /* fix index overflow */
1644     if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
1645     if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
1646
1647     /* invalidate tiny dictionaries */
1648     if ( (streamPtr->dictSize < 4)     /* tiny dictionary : not enough for a hash */
1649       && (dictEnd != source)           /* prefix mode */
1650       && (inputSize > 0)               /* tolerance : don't lose history, in case next invocation would use prefix mode */
1651       && (streamPtr->dictCtx == NULL)  /* usingDictCtx */
1652       ) {
1653         DEBUGLOG(5, "LZ4_compress_fast_continue: dictSize(%u) at addr:%p is too small", streamPtr->dictSize, streamPtr->dictionary);
1654         /* remove dictionary existence from history, to employ faster prefix mode */
1655         streamPtr->dictSize = 0;
1656         streamPtr->dictionary = (const BYTE*)source;
1657         dictEnd = source;
1658     }
1659
1660     /* Check overlapping input/dictionary space */
1661     {   const char* const sourceEnd = source + inputSize;
1662         if ((sourceEnd > (const char*)streamPtr->dictionary) && (sourceEnd < dictEnd)) {
1663             streamPtr->dictSize = (U32)(dictEnd - sourceEnd);
1664             if (streamPtr->dictSize > 64 KB) streamPtr->dictSize = 64 KB;
1665             if (streamPtr->dictSize < 4) streamPtr->dictSize = 0;
1666             streamPtr->dictionary = (const BYTE*)dictEnd - streamPtr->dictSize;
1667         }
1668     }
1669
1670     /* prefix mode : source data follows dictionary */
1671     if (dictEnd == source) {
1672         if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset))
1673             return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, dictSmall, acceleration);
1674         else
1675             return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, noDictIssue, acceleration);
1676     }
1677
1678     /* external dictionary mode */
1679     {   int result;
1680         if (streamPtr->dictCtx) {
1681             /* We depend here on the fact that dictCtx'es (produced by
1682              * LZ4_loadDict) guarantee that their tables contain no references
1683              * to offsets between dictCtx->currentOffset - 64 KB and
1684              * dictCtx->currentOffset - dictCtx->dictSize. This makes it safe
1685              * to use noDictIssue even when the dict isn't a full 64 KB.
1686              */
1687             if (inputSize > 4 KB) {
1688                 /* For compressing large blobs, it is faster to pay the setup
1689                  * cost to copy the dictionary's tables into the active context,
1690                  * so that the compression loop is only looking into one table.
1691                  */
1692                 LZ4_memcpy(streamPtr, streamPtr->dictCtx, sizeof(*streamPtr));
1693                 result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration);
1694             } else {
1695                 result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingDictCtx, noDictIssue, acceleration);
1696             }
1697         } else {  /* small data <= 4 KB */
1698             if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) {
1699                 result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, dictSmall, acceleration);
1700             } else {
1701                 result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration);
1702             }
1703         }
1704         streamPtr->dictionary = (const BYTE*)source;
1705         streamPtr->dictSize = (U32)inputSize;
1706         return result;
1707     }
1708 }
1709
1710
1711 /* Hidden debug function, to force-test external dictionary mode */
1712 int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize)
1713 {
1714     LZ4_stream_t_internal* streamPtr = &LZ4_dict->internal_donotuse;
1715     int result;
1716
1717     LZ4_renormDictT(streamPtr, srcSize);
1718
1719     if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) {
1720         result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, dictSmall, 1);
1721     } else {
1722         result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, noDictIssue, 1);
1723     }
1724
1725     streamPtr->dictionary = (const BYTE*)source;
1726     streamPtr->dictSize = (U32)srcSize;
1727
1728     return result;
1729 }
1730
1731
1732 /*! LZ4_saveDict() :
1733  *  If previously compressed data block is not guaranteed to remain available at its memory location,
1734  *  save it into a safer place (char* safeBuffer).
1735  *  Note : no need to call LZ4_loadDict() afterwards, dictionary is immediately usable,
1736  *         one can therefore call LZ4_compress_fast_continue() right after.
1737  * @return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error.
1738  */
1739 int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize)
1740 {
1741     LZ4_stream_t_internal* const dict = &LZ4_dict->internal_donotuse;
1742
1743     DEBUGLOG(5, "LZ4_saveDict : dictSize=%i, safeBuffer=%p", dictSize, safeBuffer);
1744
1745     if ((U32)dictSize > 64 KB) { dictSize = 64 KB; } /* useless to define a dictionary > 64 KB */
1746     if ((U32)dictSize > dict->dictSize) { dictSize = (int)dict->dictSize; }
1747
1748     if (safeBuffer == NULL) assert(dictSize == 0);
1749     if (dictSize > 0) {
1750         const BYTE* const previousDictEnd = dict->dictionary + dict->dictSize;
1751         assert(dict->dictionary);
1752         LZ4_memmove(safeBuffer, previousDictEnd - dictSize, (size_t)dictSize);
1753     }
1754
1755     dict->dictionary = (const BYTE*)safeBuffer;
1756     dict->dictSize = (U32)dictSize;
1757
1758     return dictSize;
1759 }
1760
1761
1762
1763 /*-*******************************
1764  *  Decompression functions
1765  ********************************/
1766
1767 typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive;
1768
1769 #undef MIN
1770 #define MIN(a,b)    ( (a) < (b) ? (a) : (b) )
1771
1772
1773 /* variant for decompress_unsafe()
1774  * does not know end of input
1775  * presumes input is well formed
1776  * note : will consume at least one byte */
1777 size_t read_long_length_no_check(const BYTE** pp)
1778 {
1779     size_t b, l = 0;
1780     do { b = **pp; (*pp)++; l += b; } while (b==255);
1781     DEBUGLOG(6, "read_long_length_no_check: +length=%zu using %zu input bytes", l, l/255 + 1)
1782     return l;
1783 }
1784
1785 /* core decoder variant for LZ4_decompress_fast*()
1786  * for legacy support only : these entry points are deprecated.
1787  * - Presumes input is correctly formed (no defense vs malformed inputs)
1788  * - Does not know input size (presume input buffer is "large enough")
1789  * - Decompress a full block (only)
1790  * @return : nb of bytes read from input.
1791  * Note : this variant is not optimized for speed, just for maintenance.
1792  *        the goal is to remove support of decompress_fast*() variants by v2.0
1793 **/
1794 LZ4_FORCE_INLINE int
1795 LZ4_decompress_unsafe_generic(
1796                  const BYTE* const istart,
1797                  BYTE* const ostart,
1798                  int decompressedSize,
1799
1800                  size_t prefixSize,
1801                  const BYTE* const dictStart,  /* only if dict==usingExtDict */
1802                  const size_t dictSize         /* note: =0 if dictStart==NULL */
1803                  )
1804 {
1805     const BYTE* ip = istart;
1806     BYTE* op = (BYTE*)ostart;
1807     BYTE* const oend = ostart + decompressedSize;
1808     const BYTE* const prefixStart = ostart - prefixSize;
1809
1810     DEBUGLOG(5, "LZ4_decompress_unsafe_generic");
1811     if (dictStart == NULL) assert(dictSize == 0);
1812
1813     while (1) {
1814         /* start new sequence */
1815         unsigned token = *ip++;
1816
1817         /* literals */
1818         {   size_t ll = token >> ML_BITS;
1819             if (ll==15) {
1820                 /* long literal length */
1821                 ll += read_long_length_no_check(&ip);
1822             }
1823             if ((size_t)(oend-op) < ll) return -1; /* output buffer overflow */
1824             LZ4_memmove(op, ip, ll); /* support in-place decompression */
1825             op += ll;
1826             ip += ll;
1827             if ((size_t)(oend-op) < MFLIMIT) {
1828                 if (op==oend) break;  /* end of block */
1829                 DEBUGLOG(5, "invalid: literals end at distance %zi from end of block", oend-op);
1830                 /* incorrect end of block :
1831                  * last match must start at least MFLIMIT==12 bytes before end of output block */
1832                 return -1;
1833         }   }
1834
1835         /* match */
1836         {   size_t ml = token & 15;
1837             size_t const offset = LZ4_readLE16(ip);
1838             ip+=2;
1839
1840             if (ml==15) {
1841                 /* long literal length */
1842                 ml += read_long_length_no_check(&ip);
1843             }
1844             ml += MINMATCH;
1845
1846             if ((size_t)(oend-op) < ml) return -1; /* output buffer overflow */
1847
1848             {   const BYTE* match = op - offset;
1849
1850                 /* out of range */
1851                 if (offset > (size_t)(op - prefixStart) + dictSize) {
1852                     DEBUGLOG(6, "offset out of range");
1853                     return -1;
1854                 }
1855
1856                 /* check special case : extDict */
1857                 if (offset > (size_t)(op - prefixStart)) {
1858                     /* extDict scenario */
1859                     const BYTE* const dictEnd = dictStart + dictSize;
1860                     const BYTE* extMatch = dictEnd - (offset - (size_t)(op-prefixStart));
1861                     size_t const extml = (size_t)(dictEnd - extMatch);
1862                     if (extml > ml) {
1863                         /* match entirely within extDict */
1864                         LZ4_memmove(op, extMatch, ml);
1865                         op += ml;
1866                         ml = 0;
1867                     } else {
1868                         /* match split between extDict & prefix */
1869                         LZ4_memmove(op, extMatch, extml);
1870                         op += extml;
1871                         ml -= extml;
1872                     }
1873                     match = prefixStart;
1874                 }
1875
1876                 /* match copy - slow variant, supporting overlap copy */
1877                 {   size_t u;
1878                     for (u=0; u<ml; u++) {
1879                         op[u] = match[u];
1880             }   }   }
1881             op += ml;
1882             if ((size_t)(oend-op) < LASTLITERALS) {
1883                 DEBUGLOG(5, "invalid: match ends at distance %zi from end of block", oend-op);
1884                 /* incorrect end of block :
1885                  * last match must stop at least LASTLITERALS==5 bytes before end of output block */
1886                 return -1;
1887             }
1888         } /* match */
1889     } /* main loop */
1890     return (int)(ip - istart);
1891 }
1892
1893
1894 /* Read the variable-length literal or match length.
1895  *
1896  * @ip : input pointer
1897  * @ilimit : position after which if length is not decoded, the input is necessarily corrupted.
1898  * @initial_check - check ip >= ipmax before start of loop.  Returns initial_error if so.
1899  * @error (output) - error code.  Must be set to 0 before call.
1900 **/
1901 typedef size_t Rvl_t;
1902 static const Rvl_t rvl_error = (Rvl_t)(-1);
1903 LZ4_FORCE_INLINE Rvl_t
1904 read_variable_length(const BYTE** ip, const BYTE* ilimit,
1905                      int initial_check)
1906 {
1907     Rvl_t s, length = 0;
1908     assert(ip != NULL);
1909     assert(*ip !=  NULL);
1910     assert(ilimit != NULL);
1911     if (initial_check && unlikely((*ip) >= ilimit)) {    /* read limit reached */
1912         return rvl_error;
1913     }
1914     do {
1915         s = **ip;
1916         (*ip)++;
1917         length += s;
1918         if (unlikely((*ip) > ilimit)) {    /* read limit reached */
1919             return rvl_error;
1920         }
1921         /* accumulator overflow detection (32-bit mode only) */
1922         if ((sizeof(length)<8) && unlikely(length > ((Rvl_t)(-1)/2)) ) {
1923             return rvl_error;
1924         }
1925     } while (s==255);
1926
1927     return length;
1928 }
1929
1930 /*! LZ4_decompress_generic() :
1931  *  This generic decompression function covers all use cases.
1932  *  It shall be instantiated several times, using different sets of directives.
1933  *  Note that it is important for performance that this function really get inlined,
1934  *  in order to remove useless branches during compilation optimization.
1935  */
1936 LZ4_FORCE_INLINE int
1937 LZ4_decompress_generic(
1938                  const char* const src,
1939                  char* const dst,
1940                  int srcSize,
1941                  int outputSize,         /* If endOnInput==endOnInputSize, this value is `dstCapacity` */
1942
1943                  earlyEnd_directive partialDecoding,  /* full, partial */
1944                  dict_directive dict,                 /* noDict, withPrefix64k, usingExtDict */
1945                  const BYTE* const lowPrefix,  /* always <= dst, == dst when no prefix */
1946                  const BYTE* const dictStart,  /* only if dict==usingExtDict */
1947                  const size_t dictSize         /* note : = 0 if noDict */
1948                  )
1949 {
1950     if ((src == NULL) || (outputSize < 0)) { return -1; }
1951
1952     {   const BYTE* ip = (const BYTE*) src;
1953         const BYTE* const iend = ip + srcSize;
1954
1955         BYTE* op = (BYTE*) dst;
1956         BYTE* const oend = op + outputSize;
1957         BYTE* cpy;
1958
1959         const BYTE* const dictEnd = (dictStart == NULL) ? NULL : dictStart + dictSize;
1960
1961         const int checkOffset = (dictSize < (int)(64 KB));
1962
1963
1964         /* Set up the "end" pointers for the shortcut. */
1965         const BYTE* const shortiend = iend - 14 /*maxLL*/ - 2 /*offset*/;
1966         const BYTE* const shortoend = oend - 14 /*maxLL*/ - 18 /*maxML*/;
1967
1968         const BYTE* match;
1969         size_t offset;
1970         unsigned token;
1971         size_t length;
1972
1973
1974         DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize);
1975
1976         /* Special cases */
1977         assert(lowPrefix <= op);
1978         if (unlikely(outputSize==0)) {
1979             /* Empty output buffer */
1980             if (partialDecoding) return 0;
1981             return ((srcSize==1) && (*ip==0)) ? 0 : -1;
1982         }
1983         if (unlikely(srcSize==0)) { return -1; }
1984
1985     /* LZ4_FAST_DEC_LOOP:
1986      * designed for modern OoO performance cpus,
1987      * where copying reliably 32-bytes is preferable to an unpredictable branch.
1988      * note : fast loop may show a regression for some client arm chips. */
1989 #if LZ4_FAST_DEC_LOOP
1990         if ((oend - op) < FASTLOOP_SAFE_DISTANCE) {
1991             DEBUGLOG(6, "skip fast decode loop");
1992             goto safe_decode;
1993         }
1994
1995         /* Fast loop : decode sequences as long as output < oend-FASTLOOP_SAFE_DISTANCE */
1996         while (1) {
1997             /* Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE */
1998             assert(oend - op >= FASTLOOP_SAFE_DISTANCE);
1999             assert(ip < iend);
2000             token = *ip++;
2001             length = token >> ML_BITS;  /* literal length */
2002
2003             /* decode literal length */
2004             if (length == RUN_MASK) {
2005                 size_t const addl = read_variable_length(&ip, iend-RUN_MASK, 1);
2006                 if (addl == rvl_error) { goto _output_error; }
2007                 length += addl;
2008                 if (unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
2009                 if (unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
2010
2011                 /* copy literals */
2012                 cpy = op+length;
2013                 LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
2014                 if ((cpy>oend-32) || (ip+length>iend-32)) { goto safe_literal_copy; }
2015                 LZ4_wildCopy32(op, ip, cpy);
2016                 ip += length; op = cpy;
2017             } else {
2018                 cpy = op+length;
2019                 DEBUGLOG(7, "copy %u bytes in a 16-bytes stripe", (unsigned)length);
2020                 /* We don't need to check oend, since we check it once for each loop below */
2021                 if (ip > iend-(16 + 1/*max lit + offset + nextToken*/)) { goto safe_literal_copy; }
2022                 /* Literals can only be <= 14, but hope compilers optimize better when copy by a register size */
2023                 LZ4_memcpy(op, ip, 16);
2024                 ip += length; op = cpy;
2025             }
2026
2027             /* get offset */
2028             offset = LZ4_readLE16(ip); ip+=2;
2029             match = op - offset;
2030             assert(match <= op);  /* overflow check */
2031
2032             /* get matchlength */
2033             length = token & ML_MASK;
2034
2035             if (length == ML_MASK) {
2036                 size_t const addl = read_variable_length(&ip, iend - LASTLITERALS + 1, 0);
2037                 if (addl == rvl_error) { goto _output_error; }
2038                 length += addl;
2039                 length += MINMATCH;
2040                 if (unlikely((uptrval)(op)+length<(uptrval)op)) { goto _output_error; } /* overflow detection */
2041                 if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
2042                 if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
2043                     goto safe_match_copy;
2044                 }
2045             } else {
2046                 length += MINMATCH;
2047                 if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
2048                     goto safe_match_copy;
2049                 }
2050
2051                 /* Fastpath check: skip LZ4_wildCopy32 when true */
2052                 if ((dict == withPrefix64k) || (match >= lowPrefix)) {
2053                     if (offset >= 8) {
2054                         assert(match >= lowPrefix);
2055                         assert(match <= op);
2056                         assert(op + 18 <= oend);
2057
2058                         LZ4_memcpy(op, match, 8);
2059                         LZ4_memcpy(op+8, match+8, 8);
2060                         LZ4_memcpy(op+16, match+16, 2);
2061                         op += length;
2062                         continue;
2063             }   }   }
2064
2065             if (checkOffset && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
2066             /* match starting within external dictionary */
2067             if ((dict==usingExtDict) && (match < lowPrefix)) {
2068                 assert(dictEnd != NULL);
2069                 if (unlikely(op+length > oend-LASTLITERALS)) {
2070                     if (partialDecoding) {
2071                         DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd");
2072                         length = MIN(length, (size_t)(oend-op));
2073                     } else {
2074                         goto _output_error;  /* end-of-block condition violated */
2075                 }   }
2076
2077                 if (length <= (size_t)(lowPrefix-match)) {
2078                     /* match fits entirely within external dictionary : just copy */
2079                     LZ4_memmove(op, dictEnd - (lowPrefix-match), length);
2080                     op += length;
2081                 } else {
2082                     /* match stretches into both external dictionary and current block */
2083                     size_t const copySize = (size_t)(lowPrefix - match);
2084                     size_t const restSize = length - copySize;
2085                     LZ4_memcpy(op, dictEnd - copySize, copySize);
2086                     op += copySize;
2087                     if (restSize > (size_t)(op - lowPrefix)) {  /* overlap copy */
2088                         BYTE* const endOfMatch = op + restSize;
2089                         const BYTE* copyFrom = lowPrefix;
2090                         while (op < endOfMatch) { *op++ = *copyFrom++; }
2091                     } else {
2092                         LZ4_memcpy(op, lowPrefix, restSize);
2093                         op += restSize;
2094                 }   }
2095                 continue;
2096             }
2097
2098             /* copy match within block */
2099             cpy = op + length;
2100
2101             assert((op <= oend) && (oend-op >= 32));
2102             if (unlikely(offset<16)) {
2103                 LZ4_memcpy_using_offset(op, match, cpy, offset);
2104             } else {
2105                 LZ4_wildCopy32(op, match, cpy);
2106             }
2107
2108             op = cpy;   /* wildcopy correction */
2109         }
2110     safe_decode:
2111 #endif
2112
2113         /* Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE */
2114         while (1) {
2115             assert(ip < iend);
2116             token = *ip++;
2117             length = token >> ML_BITS;  /* literal length */
2118
2119             /* A two-stage shortcut for the most common case:
2120              * 1) If the literal length is 0..14, and there is enough space,
2121              * enter the shortcut and copy 16 bytes on behalf of the literals
2122              * (in the fast mode, only 8 bytes can be safely copied this way).
2123              * 2) Further if the match length is 4..18, copy 18 bytes in a similar
2124              * manner; but we ensure that there's enough space in the output for
2125              * those 18 bytes earlier, upon entering the shortcut (in other words,
2126              * there is a combined check for both stages).
2127              */
2128             if ( (length != RUN_MASK)
2129                 /* strictly "less than" on input, to re-enter the loop with at least one byte */
2130               && likely((ip < shortiend) & (op <= shortoend)) ) {
2131                 /* Copy the literals */
2132                 LZ4_memcpy(op, ip, 16);
2133                 op += length; ip += length;
2134
2135                 /* The second stage: prepare for match copying, decode full info.
2136                  * If it doesn't work out, the info won't be wasted. */
2137                 length = token & ML_MASK; /* match length */
2138                 offset = LZ4_readLE16(ip); ip += 2;
2139                 match = op - offset;
2140                 assert(match <= op); /* check overflow */
2141
2142                 /* Do not deal with overlapping matches. */
2143                 if ( (length != ML_MASK)
2144                   && (offset >= 8)
2145                   && (dict==withPrefix64k || match >= lowPrefix) ) {
2146                     /* Copy the match. */
2147                     LZ4_memcpy(op + 0, match + 0, 8);
2148                     LZ4_memcpy(op + 8, match + 8, 8);
2149                     LZ4_memcpy(op +16, match +16, 2);
2150                     op += length + MINMATCH;
2151                     /* Both stages worked, load the next token. */
2152                     continue;
2153                 }
2154
2155                 /* The second stage didn't work out, but the info is ready.
2156                  * Propel it right to the point of match copying. */
2157                 goto _copy_match;
2158             }
2159
2160             /* decode literal length */
2161             if (length == RUN_MASK) {
2162                 size_t const addl = read_variable_length(&ip, iend-RUN_MASK, 1);
2163                 if (addl == rvl_error) { goto _output_error; }
2164                 length += addl;
2165                 if (unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
2166                 if (unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
2167             }
2168
2169             /* copy literals */
2170             cpy = op+length;
2171 #if LZ4_FAST_DEC_LOOP
2172         safe_literal_copy:
2173 #endif
2174             LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
2175             if ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) {
2176                 /* We've either hit the input parsing restriction or the output parsing restriction.
2177                  * In the normal scenario, decoding a full block, it must be the last sequence,
2178                  * otherwise it's an error (invalid input or dimensions).
2179                  * In partialDecoding scenario, it's necessary to ensure there is no buffer overflow.
2180                  */
2181                 if (partialDecoding) {
2182                     /* Since we are partial decoding we may be in this block because of the output parsing
2183                      * restriction, which is not valid since the output buffer is allowed to be undersized.
2184                      */
2185                     DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end")
2186                     DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length);
2187                     DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op));
2188                     DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip));
2189                     /* Finishing in the middle of a literals segment,
2190                      * due to lack of input.
2191                      */
2192                     if (ip+length > iend) {
2193                         length = (size_t)(iend-ip);
2194                         cpy = op + length;
2195                     }
2196                     /* Finishing in the middle of a literals segment,
2197                      * due to lack of output space.
2198                      */
2199                     if (cpy > oend) {
2200                         cpy = oend;
2201                         assert(op<=oend);
2202                         length = (size_t)(oend-op);
2203                     }
2204                 } else {
2205                      /* We must be on the last sequence (or invalid) because of the parsing limitations
2206                       * so check that we exactly consume the input and don't overrun the output buffer.
2207                       */
2208                     if ((ip+length != iend) || (cpy > oend)) {
2209                         DEBUGLOG(6, "should have been last run of literals")
2210                         DEBUGLOG(6, "ip(%p) + length(%i) = %p != iend (%p)", ip, (int)length, ip+length, iend);
2211                         DEBUGLOG(6, "or cpy(%p) > oend(%p)", cpy, oend);
2212                         goto _output_error;
2213                     }
2214                 }
2215                 LZ4_memmove(op, ip, length);  /* supports overlapping memory regions, for in-place decompression scenarios */
2216                 ip += length;
2217                 op += length;
2218                 /* Necessarily EOF when !partialDecoding.
2219                  * When partialDecoding, it is EOF if we've either
2220                  * filled the output buffer or
2221                  * can't proceed with reading an offset for following match.
2222                  */
2223                 if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) {
2224                     break;
2225                 }
2226             } else {
2227                 LZ4_wildCopy8(op, ip, cpy);   /* can overwrite up to 8 bytes beyond cpy */
2228                 ip += length; op = cpy;
2229             }
2230
2231             /* get offset */
2232             offset = LZ4_readLE16(ip); ip+=2;
2233             match = op - offset;
2234
2235             /* get matchlength */
2236             length = token & ML_MASK;
2237
2238     _copy_match:
2239             if (length == ML_MASK) {
2240                 size_t const addl = read_variable_length(&ip, iend - LASTLITERALS + 1, 0);
2241                 if (addl == rvl_error) { goto _output_error; }
2242                 length += addl;
2243                 if (unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error;   /* overflow detection */
2244             }
2245             length += MINMATCH;
2246
2247 #if LZ4_FAST_DEC_LOOP
2248         safe_match_copy:
2249 #endif
2250             if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error;   /* Error : offset outside buffers */
2251             /* match starting within external dictionary */
2252             if ((dict==usingExtDict) && (match < lowPrefix)) {
2253                 assert(dictEnd != NULL);
2254                 if (unlikely(op+length > oend-LASTLITERALS)) {
2255                     if (partialDecoding) length = MIN(length, (size_t)(oend-op));
2256                     else goto _output_error;   /* doesn't respect parsing restriction */
2257                 }
2258
2259                 if (length <= (size_t)(lowPrefix-match)) {
2260                     /* match fits entirely within external dictionary : just copy */
2261                     LZ4_memmove(op, dictEnd - (lowPrefix-match), length);
2262                     op += length;
2263                 } else {
2264                     /* match stretches into both external dictionary and current block */
2265                     size_t const copySize = (size_t)(lowPrefix - match);
2266                     size_t const restSize = length - copySize;
2267                     LZ4_memcpy(op, dictEnd - copySize, copySize);
2268                     op += copySize;
2269                     if (restSize > (size_t)(op - lowPrefix)) {  /* overlap copy */
2270                         BYTE* const endOfMatch = op + restSize;
2271                         const BYTE* copyFrom = lowPrefix;
2272                         while (op < endOfMatch) *op++ = *copyFrom++;
2273                     } else {
2274                         LZ4_memcpy(op, lowPrefix, restSize);
2275                         op += restSize;
2276                 }   }
2277                 continue;
2278             }
2279             assert(match >= lowPrefix);
2280
2281             /* copy match within block */
2282             cpy = op + length;
2283
2284             /* partialDecoding : may end anywhere within the block */
2285             assert(op<=oend);
2286             if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
2287                 size_t const mlen = MIN(length, (size_t)(oend-op));
2288                 const BYTE* const matchEnd = match + mlen;
2289                 BYTE* const copyEnd = op + mlen;
2290                 if (matchEnd > op) {   /* overlap copy */
2291                     while (op < copyEnd) { *op++ = *match++; }
2292                 } else {
2293                     LZ4_memcpy(op, match, mlen);
2294                 }
2295                 op = copyEnd;
2296                 if (op == oend) { break; }
2297                 continue;
2298             }
2299
2300             if (unlikely(offset<8)) {
2301                 LZ4_write32(op, 0);   /* silence msan warning when offset==0 */
2302                 op[0] = match[0];
2303                 op[1] = match[1];
2304                 op[2] = match[2];
2305                 op[3] = match[3];
2306                 match += inc32table[offset];
2307                 LZ4_memcpy(op+4, match, 4);
2308                 match -= dec64table[offset];
2309             } else {
2310                 LZ4_memcpy(op, match, 8);
2311                 match += 8;
2312             }
2313             op += 8;
2314
2315             if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
2316                 BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1);
2317                 if (cpy > oend-LASTLITERALS) { goto _output_error; } /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
2318                 if (op < oCopyLimit) {
2319                     LZ4_wildCopy8(op, match, oCopyLimit);
2320                     match += oCopyLimit - op;
2321                     op = oCopyLimit;
2322                 }
2323                 while (op < cpy) { *op++ = *match++; }
2324             } else {
2325                 LZ4_memcpy(op, match, 8);
2326                 if (length > 16)  { LZ4_wildCopy8(op+8, match+8, cpy); }
2327             }
2328             op = cpy;   /* wildcopy correction */
2329         }
2330
2331         /* end of decoding */
2332         DEBUGLOG(5, "decoded %i bytes", (int) (((char*)op)-dst));
2333         return (int) (((char*)op)-dst);     /* Nb of output bytes decoded */
2334
2335         /* Overflow error detected */
2336     _output_error:
2337         return (int) (-(((const char*)ip)-src))-1;
2338     }
2339 }
2340
2341
2342 /*===== Instantiate the API decoding functions. =====*/
2343
2344 LZ4_FORCE_O2
2345 int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
2346 {
2347     return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize,
2348                                   decode_full_block, noDict,
2349                                   (BYTE*)dest, NULL, 0);
2350 }
2351
2352 LZ4_FORCE_O2
2353 int LZ4_decompress_safe_partial(const char* src, char* dst, int compressedSize, int targetOutputSize, int dstCapacity)
2354 {
2355     dstCapacity = MIN(targetOutputSize, dstCapacity);
2356     return LZ4_decompress_generic(src, dst, compressedSize, dstCapacity,
2357                                   partial_decode,
2358                                   noDict, (BYTE*)dst, NULL, 0);
2359 }
2360
2361 LZ4_FORCE_O2
2362 int LZ4_decompress_fast(const char* source, char* dest, int originalSize)
2363 {
2364     DEBUGLOG(5, "LZ4_decompress_fast");
2365     return LZ4_decompress_unsafe_generic(
2366                 (const BYTE*)source, (BYTE*)dest, originalSize,
2367                 0, NULL, 0);
2368 }
2369
2370 /*===== Instantiate a few more decoding cases, used more than once. =====*/
2371
2372 LZ4_FORCE_O2 /* Exported, an obsolete API function. */
2373 int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize)
2374 {
2375     return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
2376                                   decode_full_block, withPrefix64k,
2377                                   (BYTE*)dest - 64 KB, NULL, 0);
2378 }
2379
2380 LZ4_FORCE_O2
2381 static int LZ4_decompress_safe_partial_withPrefix64k(const char* source, char* dest, int compressedSize, int targetOutputSize, int dstCapacity)
2382 {
2383     dstCapacity = MIN(targetOutputSize, dstCapacity);
2384     return LZ4_decompress_generic(source, dest, compressedSize, dstCapacity,
2385                                   partial_decode, withPrefix64k,
2386                                   (BYTE*)dest - 64 KB, NULL, 0);
2387 }
2388
2389 /* Another obsolete API function, paired with the previous one. */
2390 int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize)
2391 {
2392     return LZ4_decompress_unsafe_generic(
2393                 (const BYTE*)source, (BYTE*)dest, originalSize,
2394                 64 KB, NULL, 0);
2395 }
2396
2397 LZ4_FORCE_O2
2398 static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, int compressedSize, int maxOutputSize,
2399                                                size_t prefixSize)
2400 {
2401     return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
2402                                   decode_full_block, noDict,
2403                                   (BYTE*)dest-prefixSize, NULL, 0);
2404 }
2405
2406 LZ4_FORCE_O2
2407 static int LZ4_decompress_safe_partial_withSmallPrefix(const char* source, char* dest, int compressedSize, int targetOutputSize, int dstCapacity,
2408                                                size_t prefixSize)
2409 {
2410     dstCapacity = MIN(targetOutputSize, dstCapacity);
2411     return LZ4_decompress_generic(source, dest, compressedSize, dstCapacity,
2412                                   partial_decode, noDict,
2413                                   (BYTE*)dest-prefixSize, NULL, 0);
2414 }
2415
2416 LZ4_FORCE_O2
2417 int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
2418                                      int compressedSize, int maxOutputSize,
2419                                      const void* dictStart, size_t dictSize)
2420 {
2421     return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
2422                                   decode_full_block, usingExtDict,
2423                                   (BYTE*)dest, (const BYTE*)dictStart, dictSize);
2424 }
2425
2426 LZ4_FORCE_O2
2427 int LZ4_decompress_safe_partial_forceExtDict(const char* source, char* dest,
2428                                      int compressedSize, int targetOutputSize, int dstCapacity,
2429                                      const void* dictStart, size_t dictSize)
2430 {
2431     dstCapacity = MIN(targetOutputSize, dstCapacity);
2432     return LZ4_decompress_generic(source, dest, compressedSize, dstCapacity,
2433                                   partial_decode, usingExtDict,
2434                                   (BYTE*)dest, (const BYTE*)dictStart, dictSize);
2435 }
2436
2437 LZ4_FORCE_O2
2438 static int LZ4_decompress_fast_extDict(const char* source, char* dest, int originalSize,
2439                                        const void* dictStart, size_t dictSize)
2440 {
2441     return LZ4_decompress_unsafe_generic(
2442                 (const BYTE*)source, (BYTE*)dest, originalSize,
2443                 0, (const BYTE*)dictStart, dictSize);
2444 }
2445
2446 /* The "double dictionary" mode, for use with e.g. ring buffers: the first part
2447  * of the dictionary is passed as prefix, and the second via dictStart + dictSize.
2448  * These routines are used only once, in LZ4_decompress_*_continue().
2449  */
2450 LZ4_FORCE_INLINE
2451 int LZ4_decompress_safe_doubleDict(const char* source, char* dest, int compressedSize, int maxOutputSize,
2452                                    size_t prefixSize, const void* dictStart, size_t dictSize)
2453 {
2454     return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
2455                                   decode_full_block, usingExtDict,
2456                                   (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize);
2457 }
2458
2459 /*===== streaming decompression functions =====*/
2460
2461 #if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
2462 LZ4_streamDecode_t* LZ4_createStreamDecode(void)
2463 {
2464     LZ4_STATIC_ASSERT(sizeof(LZ4_streamDecode_t) >= sizeof(LZ4_streamDecode_t_internal));
2465     return (LZ4_streamDecode_t*) ALLOC_AND_ZERO(sizeof(LZ4_streamDecode_t));
2466 }
2467
2468 int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream)
2469 {
2470     if (LZ4_stream == NULL) { return 0; }  /* support free on NULL */
2471     FREEMEM(LZ4_stream);
2472     return 0;
2473 }
2474 #endif
2475
2476 /*! LZ4_setStreamDecode() :
2477  *  Use this function to instruct where to find the dictionary.
2478  *  This function is not necessary if previous data is still available where it was decoded.
2479  *  Loading a size of 0 is allowed (same effect as no dictionary).
2480  * @return : 1 if OK, 0 if error
2481  */
2482 int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize)
2483 {
2484     LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
2485     lz4sd->prefixSize = (size_t)dictSize;
2486     if (dictSize) {
2487         assert(dictionary != NULL);
2488         lz4sd->prefixEnd = (const BYTE*) dictionary + dictSize;
2489     } else {
2490         lz4sd->prefixEnd = (const BYTE*) dictionary;
2491     }
2492     lz4sd->externalDict = NULL;
2493     lz4sd->extDictSize  = 0;
2494     return 1;
2495 }
2496
2497 /*! LZ4_decoderRingBufferSize() :
2498  *  when setting a ring buffer for streaming decompression (optional scenario),
2499  *  provides the minimum size of this ring buffer
2500  *  to be compatible with any source respecting maxBlockSize condition.
2501  *  Note : in a ring buffer scenario,
2502  *  blocks are presumed decompressed next to each other.
2503  *  When not enough space remains for next block (remainingSize < maxBlockSize),
2504  *  decoding resumes from beginning of ring buffer.
2505  * @return : minimum ring buffer size,
2506  *           or 0 if there is an error (invalid maxBlockSize).
2507  */
2508 int LZ4_decoderRingBufferSize(int maxBlockSize)
2509 {
2510     if (maxBlockSize < 0) return 0;
2511     if (maxBlockSize > LZ4_MAX_INPUT_SIZE) return 0;
2512     if (maxBlockSize < 16) maxBlockSize = 16;
2513     return LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize);
2514 }
2515
2516 /*
2517 *_continue() :
2518     These decoding functions allow decompression of multiple blocks in "streaming" mode.
2519     Previously decoded blocks must still be available at the memory position where they were decoded.
2520     If it's not possible, save the relevant part of decoded data into a safe buffer,
2521     and indicate where it stands using LZ4_setStreamDecode()
2522 */
2523 LZ4_FORCE_O2
2524 int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize)
2525 {
2526     LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
2527     int result;
2528
2529     if (lz4sd->prefixSize == 0) {
2530         /* The first call, no dictionary yet. */
2531         assert(lz4sd->extDictSize == 0);
2532         result = LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize);
2533         if (result <= 0) return result;
2534         lz4sd->prefixSize = (size_t)result;
2535         lz4sd->prefixEnd = (BYTE*)dest + result;
2536     } else if (lz4sd->prefixEnd == (BYTE*)dest) {
2537         /* They're rolling the current segment. */
2538         if (lz4sd->prefixSize >= 64 KB - 1)
2539             result = LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize);
2540         else if (lz4sd->extDictSize == 0)
2541             result = LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize,
2542                                                          lz4sd->prefixSize);
2543         else
2544             result = LZ4_decompress_safe_doubleDict(source, dest, compressedSize, maxOutputSize,
2545                                                     lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
2546         if (result <= 0) return result;
2547         lz4sd->prefixSize += (size_t)result;
2548         lz4sd->prefixEnd  += result;
2549     } else {
2550         /* The buffer wraps around, or they're switching to another buffer. */
2551         lz4sd->extDictSize = lz4sd->prefixSize;
2552         lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
2553         result = LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize,
2554                                                   lz4sd->externalDict, lz4sd->extDictSize);
2555         if (result <= 0) return result;
2556         lz4sd->prefixSize = (size_t)result;
2557         lz4sd->prefixEnd  = (BYTE*)dest + result;
2558     }
2559
2560     return result;
2561 }
2562
2563 LZ4_FORCE_O2 int
2564 LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode,
2565                         const char* source, char* dest, int originalSize)
2566 {
2567     LZ4_streamDecode_t_internal* const lz4sd =
2568         (assert(LZ4_streamDecode!=NULL), &LZ4_streamDecode->internal_donotuse);
2569     int result;
2570
2571     DEBUGLOG(5, "LZ4_decompress_fast_continue (toDecodeSize=%i)", originalSize);
2572     assert(originalSize >= 0);
2573
2574     if (lz4sd->prefixSize == 0) {
2575         DEBUGLOG(5, "first invocation : no prefix nor extDict");
2576         assert(lz4sd->extDictSize == 0);
2577         result = LZ4_decompress_fast(source, dest, originalSize);
2578         if (result <= 0) return result;
2579         lz4sd->prefixSize = (size_t)originalSize;
2580         lz4sd->prefixEnd = (BYTE*)dest + originalSize;
2581     } else if (lz4sd->prefixEnd == (BYTE*)dest) {
2582         DEBUGLOG(5, "continue using existing prefix");
2583         result = LZ4_decompress_unsafe_generic(
2584                         (const BYTE*)source, (BYTE*)dest, originalSize,
2585                         lz4sd->prefixSize,
2586                         lz4sd->externalDict, lz4sd->extDictSize);
2587         if (result <= 0) return result;
2588         lz4sd->prefixSize += (size_t)originalSize;
2589         lz4sd->prefixEnd  += originalSize;
2590     } else {
2591         DEBUGLOG(5, "prefix becomes extDict");
2592         lz4sd->extDictSize = lz4sd->prefixSize;
2593         lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
2594         result = LZ4_decompress_fast_extDict(source, dest, originalSize,
2595                                              lz4sd->externalDict, lz4sd->extDictSize);
2596         if (result <= 0) return result;
2597         lz4sd->prefixSize = (size_t)originalSize;
2598         lz4sd->prefixEnd  = (BYTE*)dest + originalSize;
2599     }
2600
2601     return result;
2602 }
2603
2604
2605 /*
2606 Advanced decoding functions :
2607 *_usingDict() :
2608     These decoding functions work the same as "_continue" ones,
2609     the dictionary must be explicitly provided within parameters
2610 */
2611
2612 int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
2613 {
2614     if (dictSize==0)
2615         return LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize);
2616     if (dictStart+dictSize == dest) {
2617         if (dictSize >= 64 KB - 1) {
2618             return LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize);
2619         }
2620         assert(dictSize >= 0);
2621         return LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, (size_t)dictSize);
2622     }
2623     assert(dictSize >= 0);
2624     return LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, dictStart, (size_t)dictSize);
2625 }
2626
2627 int LZ4_decompress_safe_partial_usingDict(const char* source, char* dest, int compressedSize, int targetOutputSize, int dstCapacity, const char* dictStart, int dictSize)
2628 {
2629     if (dictSize==0)
2630         return LZ4_decompress_safe_partial(source, dest, compressedSize, targetOutputSize, dstCapacity);
2631     if (dictStart+dictSize == dest) {
2632         if (dictSize >= 64 KB - 1) {
2633             return LZ4_decompress_safe_partial_withPrefix64k(source, dest, compressedSize, targetOutputSize, dstCapacity);
2634         }
2635         assert(dictSize >= 0);
2636         return LZ4_decompress_safe_partial_withSmallPrefix(source, dest, compressedSize, targetOutputSize, dstCapacity, (size_t)dictSize);
2637     }
2638     assert(dictSize >= 0);
2639     return LZ4_decompress_safe_partial_forceExtDict(source, dest, compressedSize, targetOutputSize, dstCapacity, dictStart, (size_t)dictSize);
2640 }
2641
2642 int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize)
2643 {
2644     if (dictSize==0 || dictStart+dictSize == dest)
2645         return LZ4_decompress_unsafe_generic(
2646                         (const BYTE*)source, (BYTE*)dest, originalSize,
2647                         (size_t)dictSize, NULL, 0);
2648     assert(dictSize >= 0);
2649     return LZ4_decompress_fast_extDict(source, dest, originalSize, dictStart, (size_t)dictSize);
2650 }
2651
2652
2653 /*=*************************************************
2654 *  Obsolete Functions
2655 ***************************************************/
2656 /* obsolete compression functions */
2657 int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize)
2658 {
2659     return LZ4_compress_default(source, dest, inputSize, maxOutputSize);
2660 }
2661 int LZ4_compress(const char* src, char* dest, int srcSize)
2662 {
2663     return LZ4_compress_default(src, dest, srcSize, LZ4_compressBound(srcSize));
2664 }
2665 int LZ4_compress_limitedOutput_withState (void* state, const char* src, char* dst, int srcSize, int dstSize)
2666 {
2667     return LZ4_compress_fast_extState(state, src, dst, srcSize, dstSize, 1);
2668 }
2669 int LZ4_compress_withState (void* state, const char* src, char* dst, int srcSize)
2670 {
2671     return LZ4_compress_fast_extState(state, src, dst, srcSize, LZ4_compressBound(srcSize), 1);
2672 }
2673 int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* src, char* dst, int srcSize, int dstCapacity)
2674 {
2675     return LZ4_compress_fast_continue(LZ4_stream, src, dst, srcSize, dstCapacity, 1);
2676 }
2677 int LZ4_compress_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize)
2678 {
2679     return LZ4_compress_fast_continue(LZ4_stream, source, dest, inputSize, LZ4_compressBound(inputSize), 1);
2680 }
2681
2682 /*
2683 These decompression functions are deprecated and should no longer be used.
2684 They are only provided here for compatibility with older user programs.
2685 - LZ4_uncompress is totally equivalent to LZ4_decompress_fast
2686 - LZ4_uncompress_unknownOutputSize is totally equivalent to LZ4_decompress_safe
2687 */
2688 int LZ4_uncompress (const char* source, char* dest, int outputSize)
2689 {
2690     return LZ4_decompress_fast(source, dest, outputSize);
2691 }
2692 int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize)
2693 {
2694     return LZ4_decompress_safe(source, dest, isize, maxOutputSize);
2695 }
2696
2697 /* Obsolete Streaming functions */
2698
2699 int LZ4_sizeofStreamState(void) { return sizeof(LZ4_stream_t); }
2700
2701 int LZ4_resetStreamState(void* state, char* inputBuffer)
2702 {
2703     (void)inputBuffer;
2704     LZ4_resetStream((LZ4_stream_t*)state);
2705     return 0;
2706 }
2707
2708 #if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
2709 void* LZ4_create (char* inputBuffer)
2710 {
2711     (void)inputBuffer;
2712     return LZ4_createStream();
2713 }
2714 #endif
2715
2716 char* LZ4_slideInputBuffer (void* state)
2717 {
2718     /* avoid const char * -> char * conversion warning */
2719     return (char *)(uptrval)((LZ4_stream_t*)state)->internal_donotuse.dictionary;
2720 }
2721
2722 #endif   /* LZ4_COMMONDEFS_ONLY */