1 /* LzFind.c -- Match finder for LZ algorithms
2 2008-10-04 : Igor Pavlov : Public domain */
9 #define kEmptyHashValue 0
10 #define kMaxValForNormalize ((UInt32)0xFFFFFFFF)
11 #define kNormalizeStepMin (1 << 10) /* it must be power of 2 */
12 #define kNormalizeMask (~(kNormalizeStepMin - 1))
13 #define kMaxHistorySize ((UInt32)3 << 30)
15 #define kStartMaxLen 3
17 static void LzInWindow_Free(CMatchFinder
*p
, ISzAlloc
*alloc
)
21 alloc
->Free(alloc
, p
->bufferBase
);
26 /* keepSizeBefore + keepSizeAfter + keepSizeReserv must be < 4G) */
28 static int LzInWindow_Create(CMatchFinder
*p
, UInt32 keepSizeReserv
, ISzAlloc
*alloc
)
30 UInt32 blockSize
= p
->keepSizeBefore
+ p
->keepSizeAfter
+ keepSizeReserv
;
33 p
->blockSize
= blockSize
;
36 if (p
->bufferBase
== 0 || p
->blockSize
!= blockSize
)
38 LzInWindow_Free(p
, alloc
);
39 p
->blockSize
= blockSize
;
40 p
->bufferBase
= (Byte
*)alloc
->Alloc(alloc
, (size_t)blockSize
);
42 return (p
->bufferBase
!= 0);
45 Byte
*MatchFinder_GetPointerToCurrentPos(CMatchFinder
*p
) { return p
->buffer
; }
46 Byte
MatchFinder_GetIndexByte(CMatchFinder
*p
, Int32 index
) { return p
->buffer
[index
]; }
48 UInt32
MatchFinder_GetNumAvailableBytes(CMatchFinder
*p
) { return p
->streamPos
- p
->pos
; }
50 void MatchFinder_ReduceOffsets(CMatchFinder
*p
, UInt32 subValue
)
52 p
->posLimit
-= subValue
;
54 p
->streamPos
-= subValue
;
57 static void MatchFinder_ReadBlock(CMatchFinder
*p
)
59 if (p
->streamEndWasReached
|| p
->result
!= SZ_OK
)
63 Byte
*dest
= p
->buffer
+ (p
->streamPos
- p
->pos
);
64 size_t size
= (p
->bufferBase
+ p
->blockSize
- dest
);
67 p
->result
= p
->stream
->Read(p
->stream
, dest
, &size
);
68 if (p
->result
!= SZ_OK
)
72 p
->streamEndWasReached
= 1;
75 p
->streamPos
+= (UInt32
)size
;
76 if (p
->streamPos
- p
->pos
> p
->keepSizeAfter
)
81 void MatchFinder_MoveBlock(CMatchFinder
*p
)
83 memmove(p
->bufferBase
,
84 p
->buffer
- p
->keepSizeBefore
,
85 (size_t)(p
->streamPos
- p
->pos
+ p
->keepSizeBefore
));
86 p
->buffer
= p
->bufferBase
+ p
->keepSizeBefore
;
89 int MatchFinder_NeedMove(CMatchFinder
*p
)
91 /* if (p->streamEndWasReached) return 0; */
92 return ((size_t)(p
->bufferBase
+ p
->blockSize
- p
->buffer
) <= p
->keepSizeAfter
);
95 void MatchFinder_ReadIfRequired(CMatchFinder
*p
)
97 if (p
->streamEndWasReached
)
99 if (p
->keepSizeAfter
>= p
->streamPos
- p
->pos
)
100 MatchFinder_ReadBlock(p
);
103 static void MatchFinder_CheckAndMoveAndRead(CMatchFinder
*p
)
105 if (MatchFinder_NeedMove(p
))
106 MatchFinder_MoveBlock(p
);
107 MatchFinder_ReadBlock(p
);
110 static void MatchFinder_SetDefaultSettings(CMatchFinder
*p
)
115 /* p->skipModeBits = 0; */
120 #define kCrcPoly 0xEDB88320
122 void MatchFinder_Construct(CMatchFinder
*p
)
128 MatchFinder_SetDefaultSettings(p
);
130 for (i
= 0; i
< 256; i
++)
134 for (j
= 0; j
< 8; j
++)
135 r
= (r
>> 1) ^ (kCrcPoly
& ~((r
& 1) - 1));
140 static void MatchFinder_FreeThisClassMemory(CMatchFinder
*p
, ISzAlloc
*alloc
)
142 alloc
->Free(alloc
, p
->hash
);
146 void MatchFinder_Free(CMatchFinder
*p
, ISzAlloc
*alloc
)
148 MatchFinder_FreeThisClassMemory(p
, alloc
);
149 LzInWindow_Free(p
, alloc
);
152 static CLzRef
* AllocRefs(UInt32 num
, ISzAlloc
*alloc
)
154 size_t sizeInBytes
= (size_t)num
* sizeof(CLzRef
);
155 if (sizeInBytes
/ sizeof(CLzRef
) != num
)
157 return (CLzRef
*)alloc
->Alloc(alloc
, sizeInBytes
);
160 int MatchFinder_Create(CMatchFinder
*p
, UInt32 historySize
,
161 UInt32 keepAddBufferBefore
, UInt32 matchMaxLen
, UInt32 keepAddBufferAfter
,
165 if (historySize
> kMaxHistorySize
)
167 MatchFinder_Free(p
, alloc
);
170 sizeReserv
= historySize
>> 1;
171 if (historySize
> ((UInt32
)2 << 30))
172 sizeReserv
= historySize
>> 2;
173 sizeReserv
+= (keepAddBufferBefore
+ matchMaxLen
+ keepAddBufferAfter
) / 2 + (1 << 19);
175 p
->keepSizeBefore
= historySize
+ keepAddBufferBefore
+ 1;
176 p
->keepSizeAfter
= matchMaxLen
+ keepAddBufferAfter
;
177 /* we need one additional byte, since we use MoveBlock after pos++ and before dictionary using */
178 if (LzInWindow_Create(p
, sizeReserv
, alloc
))
180 UInt32 newCyclicBufferSize
= (historySize
/* >> p->skipModeBits */) + 1;
182 p
->matchMaxLen
= matchMaxLen
;
184 p
->fixedHashSize
= 0;
185 if (p
->numHashBytes
== 2)
189 hs
= historySize
- 1;
195 /* hs >>= p->skipModeBits; */
196 hs
|= 0xFFFF; /* don't change it! It's required for Deflate */
199 if (p
->numHashBytes
== 3)
207 if (p
->numHashBytes
> 2) p
->fixedHashSize
+= kHash2Size
;
208 if (p
->numHashBytes
> 3) p
->fixedHashSize
+= kHash3Size
;
209 if (p
->numHashBytes
> 4) p
->fixedHashSize
+= kHash4Size
;
210 hs
+= p
->fixedHashSize
;
214 UInt32 prevSize
= p
->hashSizeSum
+ p
->numSons
;
216 p
->historySize
= historySize
;
218 p
->cyclicBufferSize
= newCyclicBufferSize
;
219 p
->numSons
= (p
->btMode
? newCyclicBufferSize
* 2 : newCyclicBufferSize
);
220 newSize
= p
->hashSizeSum
+ p
->numSons
;
221 if (p
->hash
!= 0 && prevSize
== newSize
)
223 MatchFinder_FreeThisClassMemory(p
, alloc
);
224 p
->hash
= AllocRefs(newSize
, alloc
);
227 p
->son
= p
->hash
+ p
->hashSizeSum
;
232 MatchFinder_Free(p
, alloc
);
236 static void MatchFinder_SetLimits(CMatchFinder
*p
)
238 UInt32 limit
= kMaxValForNormalize
- p
->pos
;
239 UInt32 limit2
= p
->cyclicBufferSize
- p
->cyclicBufferPos
;
242 limit2
= p
->streamPos
- p
->pos
;
243 if (limit2
<= p
->keepSizeAfter
)
249 limit2
-= p
->keepSizeAfter
;
253 UInt32 lenLimit
= p
->streamPos
- p
->pos
;
254 if (lenLimit
> p
->matchMaxLen
)
255 lenLimit
= p
->matchMaxLen
;
256 p
->lenLimit
= lenLimit
;
258 p
->posLimit
= p
->pos
+ limit
;
261 void MatchFinder_Init(CMatchFinder
*p
)
264 for (i
= 0; i
< p
->hashSizeSum
; i
++)
265 p
->hash
[i
] = kEmptyHashValue
;
266 p
->cyclicBufferPos
= 0;
267 p
->buffer
= p
->bufferBase
;
268 p
->pos
= p
->streamPos
= p
->cyclicBufferSize
;
270 p
->streamEndWasReached
= 0;
271 MatchFinder_ReadBlock(p
);
272 MatchFinder_SetLimits(p
);
275 static UInt32
MatchFinder_GetSubValue(CMatchFinder
*p
)
277 return (p
->pos
- p
->historySize
- 1) & kNormalizeMask
;
280 void MatchFinder_Normalize3(UInt32 subValue
, CLzRef
*items
, UInt32 numItems
)
283 for (i
= 0; i
< numItems
; i
++)
285 UInt32 value
= items
[i
];
286 if (value
<= subValue
)
287 value
= kEmptyHashValue
;
294 static void MatchFinder_Normalize(CMatchFinder
*p
)
296 UInt32 subValue
= MatchFinder_GetSubValue(p
);
297 MatchFinder_Normalize3(subValue
, p
->hash
, p
->hashSizeSum
+ p
->numSons
);
298 MatchFinder_ReduceOffsets(p
, subValue
);
301 static void MatchFinder_CheckLimits(CMatchFinder
*p
)
303 if (p
->pos
== kMaxValForNormalize
)
304 MatchFinder_Normalize(p
);
305 if (!p
->streamEndWasReached
&& p
->keepSizeAfter
== p
->streamPos
- p
->pos
)
306 MatchFinder_CheckAndMoveAndRead(p
);
307 if (p
->cyclicBufferPos
== p
->cyclicBufferSize
)
308 p
->cyclicBufferPos
= 0;
309 MatchFinder_SetLimits(p
);
312 static UInt32
* Hc_GetMatchesSpec(UInt32 lenLimit
, UInt32 curMatch
, UInt32 pos
, const Byte
*cur
, CLzRef
*son
,
313 UInt32 _cyclicBufferPos
, UInt32 _cyclicBufferSize
, UInt32 cutValue
,
314 UInt32
*distances
, UInt32 maxLen
)
316 son
[_cyclicBufferPos
] = curMatch
;
319 UInt32 delta
= pos
- curMatch
;
320 if (cutValue
-- == 0 || delta
>= _cyclicBufferSize
)
323 const Byte
*pb
= cur
- delta
;
324 curMatch
= son
[_cyclicBufferPos
- delta
+ ((delta
> _cyclicBufferPos
) ? _cyclicBufferSize
: 0)];
325 if (pb
[maxLen
] == cur
[maxLen
] && *pb
== *cur
)
328 while (++len
!= lenLimit
)
329 if (pb
[len
] != cur
[len
])
333 *distances
++ = maxLen
= len
;
334 *distances
++ = delta
- 1;
343 UInt32
* GetMatchesSpec1(UInt32 lenLimit
, UInt32 curMatch
, UInt32 pos
, const Byte
*cur
, CLzRef
*son
,
344 UInt32 _cyclicBufferPos
, UInt32 _cyclicBufferSize
, UInt32 cutValue
,
345 UInt32
*distances
, UInt32 maxLen
)
347 CLzRef
*ptr0
= son
+ (_cyclicBufferPos
<< 1) + 1;
348 CLzRef
*ptr1
= son
+ (_cyclicBufferPos
<< 1);
349 UInt32 len0
= 0, len1
= 0;
352 UInt32 delta
= pos
- curMatch
;
353 if (cutValue
-- == 0 || delta
>= _cyclicBufferSize
)
355 *ptr0
= *ptr1
= kEmptyHashValue
;
359 CLzRef
*pair
= son
+ ((_cyclicBufferPos
- delta
+ ((delta
> _cyclicBufferPos
) ? _cyclicBufferSize
: 0)) << 1);
360 const Byte
*pb
= cur
- delta
;
361 UInt32 len
= (len0
< len1
? len0
: len1
);
362 if (pb
[len
] == cur
[len
])
364 if (++len
!= lenLimit
&& pb
[len
] == cur
[len
])
365 while (++len
!= lenLimit
)
366 if (pb
[len
] != cur
[len
])
370 *distances
++ = maxLen
= len
;
371 *distances
++ = delta
- 1;
380 if (pb
[len
] < cur
[len
])
398 static void SkipMatchesSpec(UInt32 lenLimit
, UInt32 curMatch
, UInt32 pos
, const Byte
*cur
, CLzRef
*son
,
399 UInt32 _cyclicBufferPos
, UInt32 _cyclicBufferSize
, UInt32 cutValue
)
401 CLzRef
*ptr0
= son
+ (_cyclicBufferPos
<< 1) + 1;
402 CLzRef
*ptr1
= son
+ (_cyclicBufferPos
<< 1);
403 UInt32 len0
= 0, len1
= 0;
406 UInt32 delta
= pos
- curMatch
;
407 if (cutValue
-- == 0 || delta
>= _cyclicBufferSize
)
409 *ptr0
= *ptr1
= kEmptyHashValue
;
413 CLzRef
*pair
= son
+ ((_cyclicBufferPos
- delta
+ ((delta
> _cyclicBufferPos
) ? _cyclicBufferSize
: 0)) << 1);
414 const Byte
*pb
= cur
- delta
;
415 UInt32 len
= (len0
< len1
? len0
: len1
);
416 if (pb
[len
] == cur
[len
])
418 while (++len
!= lenLimit
)
419 if (pb
[len
] != cur
[len
])
430 if (pb
[len
] < cur
[len
])
449 ++p->cyclicBufferPos; \
451 if (++p->pos == p->posLimit) MatchFinder_CheckLimits(p);
453 #define MOVE_POS_RET MOVE_POS return offset;
455 static void MatchFinder_MovePos(CMatchFinder
*p
) { MOVE_POS
; }
457 #define GET_MATCHES_HEADER2(minLen, ret_op) \
458 UInt32 lenLimit; UInt32 hashValue; const Byte *cur; UInt32 curMatch; \
459 lenLimit = p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \
462 #define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return 0)
463 #define SKIP_HEADER(minLen) GET_MATCHES_HEADER2(minLen, continue)
465 #define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
467 #define GET_MATCHES_FOOTER(offset, maxLen) \
468 offset = (UInt32)(GetMatchesSpec1(lenLimit, curMatch, MF_PARAMS(p), \
469 distances + offset, maxLen) - distances); MOVE_POS_RET;
471 #define SKIP_FOOTER \
472 SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS;
474 static UInt32
Bt2_MatchFinder_GetMatches(CMatchFinder
*p
, UInt32
*distances
)
477 GET_MATCHES_HEADER(2)
479 curMatch
= p
->hash
[hashValue
];
480 p
->hash
[hashValue
] = p
->pos
;
482 GET_MATCHES_FOOTER(offset
, 1)
485 UInt32
Bt3Zip_MatchFinder_GetMatches(CMatchFinder
*p
, UInt32
*distances
)
488 GET_MATCHES_HEADER(3)
490 curMatch
= p
->hash
[hashValue
];
491 p
->hash
[hashValue
] = p
->pos
;
493 GET_MATCHES_FOOTER(offset
, 2)
496 static UInt32
Bt3_MatchFinder_GetMatches(CMatchFinder
*p
, UInt32
*distances
)
498 UInt32 hash2Value
, delta2
, maxLen
, offset
;
499 GET_MATCHES_HEADER(3)
503 delta2
= p
->pos
- p
->hash
[hash2Value
];
504 curMatch
= p
->hash
[kFix3HashSize
+ hashValue
];
506 p
->hash
[hash2Value
] =
507 p
->hash
[kFix3HashSize
+ hashValue
] = p
->pos
;
512 if (delta2
< p
->cyclicBufferSize
&& *(cur
- delta2
) == *cur
)
514 for (; maxLen
!= lenLimit
; maxLen
++)
515 if (cur
[(ptrdiff_t)maxLen
- delta2
] != cur
[maxLen
])
517 distances
[0] = maxLen
;
518 distances
[1] = delta2
- 1;
520 if (maxLen
== lenLimit
)
522 SkipMatchesSpec(lenLimit
, curMatch
, MF_PARAMS(p
));
526 GET_MATCHES_FOOTER(offset
, maxLen
)
529 static UInt32
Bt4_MatchFinder_GetMatches(CMatchFinder
*p
, UInt32
*distances
)
531 UInt32 hash2Value
, hash3Value
, delta2
, delta3
, maxLen
, offset
;
532 GET_MATCHES_HEADER(4)
536 delta2
= p
->pos
- p
->hash
[ hash2Value
];
537 delta3
= p
->pos
- p
->hash
[kFix3HashSize
+ hash3Value
];
538 curMatch
= p
->hash
[kFix4HashSize
+ hashValue
];
540 p
->hash
[ hash2Value
] =
541 p
->hash
[kFix3HashSize
+ hash3Value
] =
542 p
->hash
[kFix4HashSize
+ hashValue
] = p
->pos
;
546 if (delta2
< p
->cyclicBufferSize
&& *(cur
- delta2
) == *cur
)
548 distances
[0] = maxLen
= 2;
549 distances
[1] = delta2
- 1;
552 if (delta2
!= delta3
&& delta3
< p
->cyclicBufferSize
&& *(cur
- delta3
) == *cur
)
555 distances
[offset
+ 1] = delta3
- 1;
561 for (; maxLen
!= lenLimit
; maxLen
++)
562 if (cur
[(ptrdiff_t)maxLen
- delta2
] != cur
[maxLen
])
564 distances
[offset
- 2] = maxLen
;
565 if (maxLen
== lenLimit
)
567 SkipMatchesSpec(lenLimit
, curMatch
, MF_PARAMS(p
));
573 GET_MATCHES_FOOTER(offset
, maxLen
)
576 static UInt32
Hc4_MatchFinder_GetMatches(CMatchFinder
*p
, UInt32
*distances
)
578 UInt32 hash2Value
, hash3Value
, delta2
, delta3
, maxLen
, offset
;
579 GET_MATCHES_HEADER(4)
583 delta2
= p
->pos
- p
->hash
[ hash2Value
];
584 delta3
= p
->pos
- p
->hash
[kFix3HashSize
+ hash3Value
];
585 curMatch
= p
->hash
[kFix4HashSize
+ hashValue
];
587 p
->hash
[ hash2Value
] =
588 p
->hash
[kFix3HashSize
+ hash3Value
] =
589 p
->hash
[kFix4HashSize
+ hashValue
] = p
->pos
;
593 if (delta2
< p
->cyclicBufferSize
&& *(cur
- delta2
) == *cur
)
595 distances
[0] = maxLen
= 2;
596 distances
[1] = delta2
- 1;
599 if (delta2
!= delta3
&& delta3
< p
->cyclicBufferSize
&& *(cur
- delta3
) == *cur
)
602 distances
[offset
+ 1] = delta3
- 1;
608 for (; maxLen
!= lenLimit
; maxLen
++)
609 if (cur
[(ptrdiff_t)maxLen
- delta2
] != cur
[maxLen
])
611 distances
[offset
- 2] = maxLen
;
612 if (maxLen
== lenLimit
)
614 p
->son
[p
->cyclicBufferPos
] = curMatch
;
620 offset
= (UInt32
)(Hc_GetMatchesSpec(lenLimit
, curMatch
, MF_PARAMS(p
),
621 distances
+ offset
, maxLen
) - (distances
));
625 UInt32
Hc3Zip_MatchFinder_GetMatches(CMatchFinder
*p
, UInt32
*distances
)
628 GET_MATCHES_HEADER(3)
630 curMatch
= p
->hash
[hashValue
];
631 p
->hash
[hashValue
] = p
->pos
;
632 offset
= (UInt32
)(Hc_GetMatchesSpec(lenLimit
, curMatch
, MF_PARAMS(p
),
633 distances
, 2) - (distances
));
637 static void Bt2_MatchFinder_Skip(CMatchFinder
*p
, UInt32 num
)
643 curMatch
= p
->hash
[hashValue
];
644 p
->hash
[hashValue
] = p
->pos
;
650 void Bt3Zip_MatchFinder_Skip(CMatchFinder
*p
, UInt32 num
)
656 curMatch
= p
->hash
[hashValue
];
657 p
->hash
[hashValue
] = p
->pos
;
663 static void Bt3_MatchFinder_Skip(CMatchFinder
*p
, UInt32 num
)
670 curMatch
= p
->hash
[kFix3HashSize
+ hashValue
];
671 p
->hash
[hash2Value
] =
672 p
->hash
[kFix3HashSize
+ hashValue
] = p
->pos
;
678 static void Bt4_MatchFinder_Skip(CMatchFinder
*p
, UInt32 num
)
682 UInt32 hash2Value
, hash3Value
;
685 curMatch
= p
->hash
[kFix4HashSize
+ hashValue
];
686 p
->hash
[ hash2Value
] =
687 p
->hash
[kFix3HashSize
+ hash3Value
] = p
->pos
;
688 p
->hash
[kFix4HashSize
+ hashValue
] = p
->pos
;
694 static void Hc4_MatchFinder_Skip(CMatchFinder
*p
, UInt32 num
)
698 UInt32 hash2Value
, hash3Value
;
701 curMatch
= p
->hash
[kFix4HashSize
+ hashValue
];
702 p
->hash
[ hash2Value
] =
703 p
->hash
[kFix3HashSize
+ hash3Value
] =
704 p
->hash
[kFix4HashSize
+ hashValue
] = p
->pos
;
705 p
->son
[p
->cyclicBufferPos
] = curMatch
;
711 void Hc3Zip_MatchFinder_Skip(CMatchFinder
*p
, UInt32 num
)
717 curMatch
= p
->hash
[hashValue
];
718 p
->hash
[hashValue
] = p
->pos
;
719 p
->son
[p
->cyclicBufferPos
] = curMatch
;
725 void MatchFinder_CreateVTable(CMatchFinder
*p
, IMatchFinder
*vTable
)
727 vTable
->Init
= (Mf_Init_Func
)MatchFinder_Init
;
728 vTable
->GetIndexByte
= (Mf_GetIndexByte_Func
)MatchFinder_GetIndexByte
;
729 vTable
->GetNumAvailableBytes
= (Mf_GetNumAvailableBytes_Func
)MatchFinder_GetNumAvailableBytes
;
730 vTable
->GetPointerToCurrentPos
= (Mf_GetPointerToCurrentPos_Func
)MatchFinder_GetPointerToCurrentPos
;
733 vTable
->GetMatches
= (Mf_GetMatches_Func
)Hc4_MatchFinder_GetMatches
;
734 vTable
->Skip
= (Mf_Skip_Func
)Hc4_MatchFinder_Skip
;
736 else if (p
->numHashBytes
== 2)
738 vTable
->GetMatches
= (Mf_GetMatches_Func
)Bt2_MatchFinder_GetMatches
;
739 vTable
->Skip
= (Mf_Skip_Func
)Bt2_MatchFinder_Skip
;
741 else if (p
->numHashBytes
== 3)
743 vTable
->GetMatches
= (Mf_GetMatches_Func
)Bt3_MatchFinder_GetMatches
;
744 vTable
->Skip
= (Mf_Skip_Func
)Bt3_MatchFinder_Skip
;
748 vTable
->GetMatches
= (Mf_GetMatches_Func
)Bt4_MatchFinder_GetMatches
;
749 vTable
->Skip
= (Mf_Skip_Func
)Bt4_MatchFinder_Skip
;