1 /* LzFind.c -- Match finder for LZ algorithms
2 2009-04-22 : Igor Pavlov : Public domain */
9 #define kEmptyHashValue 0
10 #define kMaxValForNormalize ((UInt32)0xFFFFFFFF)
11 #define kNormalizeStepMin (1 << 10) /* it must be power of 2 */
12 #define kNormalizeMask (~(kNormalizeStepMin - 1))
13 #define kMaxHistorySize ((UInt32)3 << 30)
15 #define kStartMaxLen 3
17 static void LzInWindow_Free(CMatchFinder
*p
, ISzAlloc
*alloc
)
21 alloc
->Free(alloc
, p
->bufferBase
);
26 /* keepSizeBefore + keepSizeAfter + keepSizeReserv must be < 4G) */
28 static int LzInWindow_Create(CMatchFinder
*p
, UInt32 keepSizeReserv
, ISzAlloc
*alloc
)
30 UInt32 blockSize
= p
->keepSizeBefore
+ p
->keepSizeAfter
+ keepSizeReserv
;
33 p
->blockSize
= blockSize
;
36 if (p
->bufferBase
== 0 || p
->blockSize
!= blockSize
)
38 LzInWindow_Free(p
, alloc
);
39 p
->blockSize
= blockSize
;
40 p
->bufferBase
= (Byte
*)alloc
->Alloc(alloc
, (size_t)blockSize
);
42 return (p
->bufferBase
!= 0);
45 Byte
*MatchFinder_GetPointerToCurrentPos(CMatchFinder
*p
) { return p
->buffer
; }
46 Byte
MatchFinder_GetIndexByte(CMatchFinder
*p
, Int32 index
) { return p
->buffer
[index
]; }
48 UInt32
MatchFinder_GetNumAvailableBytes(CMatchFinder
*p
) { return p
->streamPos
- p
->pos
; }
50 void MatchFinder_ReduceOffsets(CMatchFinder
*p
, UInt32 subValue
)
52 p
->posLimit
-= subValue
;
54 p
->streamPos
-= subValue
;
57 static void MatchFinder_ReadBlock(CMatchFinder
*p
)
59 if (p
->streamEndWasReached
|| p
->result
!= SZ_OK
)
63 UInt32 curSize
= 0xFFFFFFFF - p
->streamPos
;
64 if (curSize
> p
->directInputRem
)
65 curSize
= (UInt32
)p
->directInputRem
;
66 p
->directInputRem
-= curSize
;
67 p
->streamPos
+= curSize
;
68 if (p
->directInputRem
== 0)
69 p
->streamEndWasReached
= 1;
74 Byte
*dest
= p
->buffer
+ (p
->streamPos
- p
->pos
);
75 size_t size
= (p
->bufferBase
+ p
->blockSize
- dest
);
78 p
->result
= p
->stream
->Read(p
->stream
, dest
, &size
);
79 if (p
->result
!= SZ_OK
)
83 p
->streamEndWasReached
= 1;
86 p
->streamPos
+= (UInt32
)size
;
87 if (p
->streamPos
- p
->pos
> p
->keepSizeAfter
)
92 void MatchFinder_MoveBlock(CMatchFinder
*p
)
94 memmove(p
->bufferBase
,
95 p
->buffer
- p
->keepSizeBefore
,
96 (size_t)(p
->streamPos
- p
->pos
+ p
->keepSizeBefore
));
97 p
->buffer
= p
->bufferBase
+ p
->keepSizeBefore
;
100 int MatchFinder_NeedMove(CMatchFinder
*p
)
104 /* if (p->streamEndWasReached) return 0; */
105 return ((size_t)(p
->bufferBase
+ p
->blockSize
- p
->buffer
) <= p
->keepSizeAfter
);
108 void MatchFinder_ReadIfRequired(CMatchFinder
*p
)
110 if (p
->streamEndWasReached
)
112 if (p
->keepSizeAfter
>= p
->streamPos
- p
->pos
)
113 MatchFinder_ReadBlock(p
);
116 static void MatchFinder_CheckAndMoveAndRead(CMatchFinder
*p
)
118 if (MatchFinder_NeedMove(p
))
119 MatchFinder_MoveBlock(p
);
120 MatchFinder_ReadBlock(p
);
123 static void MatchFinder_SetDefaultSettings(CMatchFinder
*p
)
131 #define kCrcPoly 0xEDB88320
133 void MatchFinder_Construct(CMatchFinder
*p
)
139 MatchFinder_SetDefaultSettings(p
);
141 for (i
= 0; i
< 256; i
++)
145 for (j
= 0; j
< 8; j
++)
146 r
= (r
>> 1) ^ (kCrcPoly
& ~((r
& 1) - 1));
151 static void MatchFinder_FreeThisClassMemory(CMatchFinder
*p
, ISzAlloc
*alloc
)
153 alloc
->Free(alloc
, p
->hash
);
157 void MatchFinder_Free(CMatchFinder
*p
, ISzAlloc
*alloc
)
159 MatchFinder_FreeThisClassMemory(p
, alloc
);
160 LzInWindow_Free(p
, alloc
);
163 static CLzRef
* AllocRefs(UInt32 num
, ISzAlloc
*alloc
)
165 size_t sizeInBytes
= (size_t)num
* sizeof(CLzRef
);
166 if (sizeInBytes
/ sizeof(CLzRef
) != num
)
168 return (CLzRef
*)alloc
->Alloc(alloc
, sizeInBytes
);
171 int MatchFinder_Create(CMatchFinder
*p
, UInt32 historySize
,
172 UInt32 keepAddBufferBefore
, UInt32 matchMaxLen
, UInt32 keepAddBufferAfter
,
176 if (historySize
> kMaxHistorySize
)
178 MatchFinder_Free(p
, alloc
);
181 sizeReserv
= historySize
>> 1;
182 if (historySize
> ((UInt32
)2 << 30))
183 sizeReserv
= historySize
>> 2;
184 sizeReserv
+= (keepAddBufferBefore
+ matchMaxLen
+ keepAddBufferAfter
) / 2 + (1 << 19);
186 p
->keepSizeBefore
= historySize
+ keepAddBufferBefore
+ 1;
187 p
->keepSizeAfter
= matchMaxLen
+ keepAddBufferAfter
;
188 /* we need one additional byte, since we use MoveBlock after pos++ and before dictionary using */
189 if (LzInWindow_Create(p
, sizeReserv
, alloc
))
191 UInt32 newCyclicBufferSize
= historySize
+ 1;
193 p
->matchMaxLen
= matchMaxLen
;
195 p
->fixedHashSize
= 0;
196 if (p
->numHashBytes
== 2)
200 hs
= historySize
- 1;
206 hs
|= 0xFFFF; /* don't change it! It's required for Deflate */
209 if (p
->numHashBytes
== 3)
217 if (p
->numHashBytes
> 2) p
->fixedHashSize
+= kHash2Size
;
218 if (p
->numHashBytes
> 3) p
->fixedHashSize
+= kHash3Size
;
219 if (p
->numHashBytes
> 4) p
->fixedHashSize
+= kHash4Size
;
220 hs
+= p
->fixedHashSize
;
224 UInt32 prevSize
= p
->hashSizeSum
+ p
->numSons
;
226 p
->historySize
= historySize
;
228 p
->cyclicBufferSize
= newCyclicBufferSize
;
229 p
->numSons
= (p
->btMode
? newCyclicBufferSize
* 2 : newCyclicBufferSize
);
230 newSize
= p
->hashSizeSum
+ p
->numSons
;
231 if (p
->hash
!= 0 && prevSize
== newSize
)
233 MatchFinder_FreeThisClassMemory(p
, alloc
);
234 p
->hash
= AllocRefs(newSize
, alloc
);
237 p
->son
= p
->hash
+ p
->hashSizeSum
;
242 MatchFinder_Free(p
, alloc
);
246 static void MatchFinder_SetLimits(CMatchFinder
*p
)
248 UInt32 limit
= kMaxValForNormalize
- p
->pos
;
249 UInt32 limit2
= p
->cyclicBufferSize
- p
->cyclicBufferPos
;
252 limit2
= p
->streamPos
- p
->pos
;
253 if (limit2
<= p
->keepSizeAfter
)
259 limit2
-= p
->keepSizeAfter
;
263 UInt32 lenLimit
= p
->streamPos
- p
->pos
;
264 if (lenLimit
> p
->matchMaxLen
)
265 lenLimit
= p
->matchMaxLen
;
266 p
->lenLimit
= lenLimit
;
268 p
->posLimit
= p
->pos
+ limit
;
271 void MatchFinder_Init(CMatchFinder
*p
)
274 for (i
= 0; i
< p
->hashSizeSum
; i
++)
275 p
->hash
[i
] = kEmptyHashValue
;
276 p
->cyclicBufferPos
= 0;
277 p
->buffer
= p
->bufferBase
;
278 p
->pos
= p
->streamPos
= p
->cyclicBufferSize
;
280 p
->streamEndWasReached
= 0;
281 MatchFinder_ReadBlock(p
);
282 MatchFinder_SetLimits(p
);
285 static UInt32
MatchFinder_GetSubValue(CMatchFinder
*p
)
287 return (p
->pos
- p
->historySize
- 1) & kNormalizeMask
;
290 void MatchFinder_Normalize3(UInt32 subValue
, CLzRef
*items
, UInt32 numItems
)
293 for (i
= 0; i
< numItems
; i
++)
295 UInt32 value
= items
[i
];
296 if (value
<= subValue
)
297 value
= kEmptyHashValue
;
304 static void MatchFinder_Normalize(CMatchFinder
*p
)
306 UInt32 subValue
= MatchFinder_GetSubValue(p
);
307 MatchFinder_Normalize3(subValue
, p
->hash
, p
->hashSizeSum
+ p
->numSons
);
308 MatchFinder_ReduceOffsets(p
, subValue
);
311 static void MatchFinder_CheckLimits(CMatchFinder
*p
)
313 if (p
->pos
== kMaxValForNormalize
)
314 MatchFinder_Normalize(p
);
315 if (!p
->streamEndWasReached
&& p
->keepSizeAfter
== p
->streamPos
- p
->pos
)
316 MatchFinder_CheckAndMoveAndRead(p
);
317 if (p
->cyclicBufferPos
== p
->cyclicBufferSize
)
318 p
->cyclicBufferPos
= 0;
319 MatchFinder_SetLimits(p
);
322 static UInt32
* Hc_GetMatchesSpec(UInt32 lenLimit
, UInt32 curMatch
, UInt32 pos
, const Byte
*cur
, CLzRef
*son
,
323 UInt32 _cyclicBufferPos
, UInt32 _cyclicBufferSize
, UInt32 cutValue
,
324 UInt32
*distances
, UInt32 maxLen
)
326 son
[_cyclicBufferPos
] = curMatch
;
329 UInt32 delta
= pos
- curMatch
;
330 if (cutValue
-- == 0 || delta
>= _cyclicBufferSize
)
333 const Byte
*pb
= cur
- delta
;
334 curMatch
= son
[_cyclicBufferPos
- delta
+ ((delta
> _cyclicBufferPos
) ? _cyclicBufferSize
: 0)];
335 if (pb
[maxLen
] == cur
[maxLen
] && *pb
== *cur
)
338 while (++len
!= lenLimit
)
339 if (pb
[len
] != cur
[len
])
343 *distances
++ = maxLen
= len
;
344 *distances
++ = delta
- 1;
353 UInt32
* GetMatchesSpec1(UInt32 lenLimit
, UInt32 curMatch
, UInt32 pos
, const Byte
*cur
, CLzRef
*son
,
354 UInt32 _cyclicBufferPos
, UInt32 _cyclicBufferSize
, UInt32 cutValue
,
355 UInt32
*distances
, UInt32 maxLen
)
357 CLzRef
*ptr0
= son
+ (_cyclicBufferPos
<< 1) + 1;
358 CLzRef
*ptr1
= son
+ (_cyclicBufferPos
<< 1);
359 UInt32 len0
= 0, len1
= 0;
362 UInt32 delta
= pos
- curMatch
;
363 if (cutValue
-- == 0 || delta
>= _cyclicBufferSize
)
365 *ptr0
= *ptr1
= kEmptyHashValue
;
369 CLzRef
*pair
= son
+ ((_cyclicBufferPos
- delta
+ ((delta
> _cyclicBufferPos
) ? _cyclicBufferSize
: 0)) << 1);
370 const Byte
*pb
= cur
- delta
;
371 UInt32 len
= (len0
< len1
? len0
: len1
);
372 if (pb
[len
] == cur
[len
])
374 if (++len
!= lenLimit
&& pb
[len
] == cur
[len
])
375 while (++len
!= lenLimit
)
376 if (pb
[len
] != cur
[len
])
380 *distances
++ = maxLen
= len
;
381 *distances
++ = delta
- 1;
390 if (pb
[len
] < cur
[len
])
408 static void SkipMatchesSpec(UInt32 lenLimit
, UInt32 curMatch
, UInt32 pos
, const Byte
*cur
, CLzRef
*son
,
409 UInt32 _cyclicBufferPos
, UInt32 _cyclicBufferSize
, UInt32 cutValue
)
411 CLzRef
*ptr0
= son
+ (_cyclicBufferPos
<< 1) + 1;
412 CLzRef
*ptr1
= son
+ (_cyclicBufferPos
<< 1);
413 UInt32 len0
= 0, len1
= 0;
416 UInt32 delta
= pos
- curMatch
;
417 if (cutValue
-- == 0 || delta
>= _cyclicBufferSize
)
419 *ptr0
= *ptr1
= kEmptyHashValue
;
423 CLzRef
*pair
= son
+ ((_cyclicBufferPos
- delta
+ ((delta
> _cyclicBufferPos
) ? _cyclicBufferSize
: 0)) << 1);
424 const Byte
*pb
= cur
- delta
;
425 UInt32 len
= (len0
< len1
? len0
: len1
);
426 if (pb
[len
] == cur
[len
])
428 while (++len
!= lenLimit
)
429 if (pb
[len
] != cur
[len
])
440 if (pb
[len
] < cur
[len
])
459 ++p->cyclicBufferPos; \
461 if (++p->pos == p->posLimit) MatchFinder_CheckLimits(p);
463 #define MOVE_POS_RET MOVE_POS return offset;
465 static void MatchFinder_MovePos(CMatchFinder
*p
) { MOVE_POS
; }
467 #define GET_MATCHES_HEADER2(minLen, ret_op) \
468 UInt32 lenLimit; UInt32 hashValue; const Byte *cur; UInt32 curMatch; \
469 lenLimit = p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \
472 #define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return 0)
473 #define SKIP_HEADER(minLen) GET_MATCHES_HEADER2(minLen, continue)
475 #define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
477 #define GET_MATCHES_FOOTER(offset, maxLen) \
478 offset = (UInt32)(GetMatchesSpec1(lenLimit, curMatch, MF_PARAMS(p), \
479 distances + offset, maxLen) - distances); MOVE_POS_RET;
481 #define SKIP_FOOTER \
482 SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS;
484 static UInt32
Bt2_MatchFinder_GetMatches(CMatchFinder
*p
, UInt32
*distances
)
487 GET_MATCHES_HEADER(2)
489 curMatch
= p
->hash
[hashValue
];
490 p
->hash
[hashValue
] = p
->pos
;
492 GET_MATCHES_FOOTER(offset
, 1)
495 UInt32
Bt3Zip_MatchFinder_GetMatches(CMatchFinder
*p
, UInt32
*distances
)
498 GET_MATCHES_HEADER(3)
500 curMatch
= p
->hash
[hashValue
];
501 p
->hash
[hashValue
] = p
->pos
;
503 GET_MATCHES_FOOTER(offset
, 2)
506 static UInt32
Bt3_MatchFinder_GetMatches(CMatchFinder
*p
, UInt32
*distances
)
508 UInt32 hash2Value
, delta2
, maxLen
, offset
;
509 GET_MATCHES_HEADER(3)
513 delta2
= p
->pos
- p
->hash
[hash2Value
];
514 curMatch
= p
->hash
[kFix3HashSize
+ hashValue
];
516 p
->hash
[hash2Value
] =
517 p
->hash
[kFix3HashSize
+ hashValue
] = p
->pos
;
522 if (delta2
< p
->cyclicBufferSize
&& *(cur
- delta2
) == *cur
)
524 for (; maxLen
!= lenLimit
; maxLen
++)
525 if (cur
[(ptrdiff_t)maxLen
- delta2
] != cur
[maxLen
])
527 distances
[0] = maxLen
;
528 distances
[1] = delta2
- 1;
530 if (maxLen
== lenLimit
)
532 SkipMatchesSpec(lenLimit
, curMatch
, MF_PARAMS(p
));
536 GET_MATCHES_FOOTER(offset
, maxLen
)
539 static UInt32
Bt4_MatchFinder_GetMatches(CMatchFinder
*p
, UInt32
*distances
)
541 UInt32 hash2Value
, hash3Value
, delta2
, delta3
, maxLen
, offset
;
542 GET_MATCHES_HEADER(4)
546 delta2
= p
->pos
- p
->hash
[ hash2Value
];
547 delta3
= p
->pos
- p
->hash
[kFix3HashSize
+ hash3Value
];
548 curMatch
= p
->hash
[kFix4HashSize
+ hashValue
];
550 p
->hash
[ hash2Value
] =
551 p
->hash
[kFix3HashSize
+ hash3Value
] =
552 p
->hash
[kFix4HashSize
+ hashValue
] = p
->pos
;
556 if (delta2
< p
->cyclicBufferSize
&& *(cur
- delta2
) == *cur
)
558 distances
[0] = maxLen
= 2;
559 distances
[1] = delta2
- 1;
562 if (delta2
!= delta3
&& delta3
< p
->cyclicBufferSize
&& *(cur
- delta3
) == *cur
)
565 distances
[offset
+ 1] = delta3
- 1;
571 for (; maxLen
!= lenLimit
; maxLen
++)
572 if (cur
[(ptrdiff_t)maxLen
- delta2
] != cur
[maxLen
])
574 distances
[offset
- 2] = maxLen
;
575 if (maxLen
== lenLimit
)
577 SkipMatchesSpec(lenLimit
, curMatch
, MF_PARAMS(p
));
583 GET_MATCHES_FOOTER(offset
, maxLen
)
586 static UInt32
Hc4_MatchFinder_GetMatches(CMatchFinder
*p
, UInt32
*distances
)
588 UInt32 hash2Value
, hash3Value
, delta2
, delta3
, maxLen
, offset
;
589 GET_MATCHES_HEADER(4)
593 delta2
= p
->pos
- p
->hash
[ hash2Value
];
594 delta3
= p
->pos
- p
->hash
[kFix3HashSize
+ hash3Value
];
595 curMatch
= p
->hash
[kFix4HashSize
+ hashValue
];
597 p
->hash
[ hash2Value
] =
598 p
->hash
[kFix3HashSize
+ hash3Value
] =
599 p
->hash
[kFix4HashSize
+ hashValue
] = p
->pos
;
603 if (delta2
< p
->cyclicBufferSize
&& *(cur
- delta2
) == *cur
)
605 distances
[0] = maxLen
= 2;
606 distances
[1] = delta2
- 1;
609 if (delta2
!= delta3
&& delta3
< p
->cyclicBufferSize
&& *(cur
- delta3
) == *cur
)
612 distances
[offset
+ 1] = delta3
- 1;
618 for (; maxLen
!= lenLimit
; maxLen
++)
619 if (cur
[(ptrdiff_t)maxLen
- delta2
] != cur
[maxLen
])
621 distances
[offset
- 2] = maxLen
;
622 if (maxLen
== lenLimit
)
624 p
->son
[p
->cyclicBufferPos
] = curMatch
;
630 offset
= (UInt32
)(Hc_GetMatchesSpec(lenLimit
, curMatch
, MF_PARAMS(p
),
631 distances
+ offset
, maxLen
) - (distances
));
635 UInt32
Hc3Zip_MatchFinder_GetMatches(CMatchFinder
*p
, UInt32
*distances
)
638 GET_MATCHES_HEADER(3)
640 curMatch
= p
->hash
[hashValue
];
641 p
->hash
[hashValue
] = p
->pos
;
642 offset
= (UInt32
)(Hc_GetMatchesSpec(lenLimit
, curMatch
, MF_PARAMS(p
),
643 distances
, 2) - (distances
));
647 static void Bt2_MatchFinder_Skip(CMatchFinder
*p
, UInt32 num
)
653 curMatch
= p
->hash
[hashValue
];
654 p
->hash
[hashValue
] = p
->pos
;
660 void Bt3Zip_MatchFinder_Skip(CMatchFinder
*p
, UInt32 num
)
666 curMatch
= p
->hash
[hashValue
];
667 p
->hash
[hashValue
] = p
->pos
;
673 static void Bt3_MatchFinder_Skip(CMatchFinder
*p
, UInt32 num
)
680 curMatch
= p
->hash
[kFix3HashSize
+ hashValue
];
681 p
->hash
[hash2Value
] =
682 p
->hash
[kFix3HashSize
+ hashValue
] = p
->pos
;
688 static void Bt4_MatchFinder_Skip(CMatchFinder
*p
, UInt32 num
)
692 UInt32 hash2Value
, hash3Value
;
695 curMatch
= p
->hash
[kFix4HashSize
+ hashValue
];
696 p
->hash
[ hash2Value
] =
697 p
->hash
[kFix3HashSize
+ hash3Value
] = p
->pos
;
698 p
->hash
[kFix4HashSize
+ hashValue
] = p
->pos
;
704 static void Hc4_MatchFinder_Skip(CMatchFinder
*p
, UInt32 num
)
708 UInt32 hash2Value
, hash3Value
;
711 curMatch
= p
->hash
[kFix4HashSize
+ hashValue
];
712 p
->hash
[ hash2Value
] =
713 p
->hash
[kFix3HashSize
+ hash3Value
] =
714 p
->hash
[kFix4HashSize
+ hashValue
] = p
->pos
;
715 p
->son
[p
->cyclicBufferPos
] = curMatch
;
721 void Hc3Zip_MatchFinder_Skip(CMatchFinder
*p
, UInt32 num
)
727 curMatch
= p
->hash
[hashValue
];
728 p
->hash
[hashValue
] = p
->pos
;
729 p
->son
[p
->cyclicBufferPos
] = curMatch
;
735 void MatchFinder_CreateVTable(CMatchFinder
*p
, IMatchFinder
*vTable
)
737 vTable
->Init
= (Mf_Init_Func
)MatchFinder_Init
;
738 vTable
->GetIndexByte
= (Mf_GetIndexByte_Func
)MatchFinder_GetIndexByte
;
739 vTable
->GetNumAvailableBytes
= (Mf_GetNumAvailableBytes_Func
)MatchFinder_GetNumAvailableBytes
;
740 vTable
->GetPointerToCurrentPos
= (Mf_GetPointerToCurrentPos_Func
)MatchFinder_GetPointerToCurrentPos
;
743 vTable
->GetMatches
= (Mf_GetMatches_Func
)Hc4_MatchFinder_GetMatches
;
744 vTable
->Skip
= (Mf_Skip_Func
)Hc4_MatchFinder_Skip
;
746 else if (p
->numHashBytes
== 2)
748 vTable
->GetMatches
= (Mf_GetMatches_Func
)Bt2_MatchFinder_GetMatches
;
749 vTable
->Skip
= (Mf_Skip_Func
)Bt2_MatchFinder_Skip
;
751 else if (p
->numHashBytes
== 3)
753 vTable
->GetMatches
= (Mf_GetMatches_Func
)Bt3_MatchFinder_GetMatches
;
754 vTable
->Skip
= (Mf_Skip_Func
)Bt3_MatchFinder_Skip
;
758 vTable
->GetMatches
= (Mf_GetMatches_Func
)Bt4_MatchFinder_GetMatches
;
759 vTable
->Skip
= (Mf_Skip_Func
)Bt4_MatchFinder_Skip
;