2 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
3 * Use is subject to license terms.
6 /* LzFind.c -- Match finder for LZ algorithms
7 2008-10-04 : Igor Pavlov : Public domain */
14 #define kEmptyHashValue 0
15 #define kMaxValForNormalize ((UInt32)0xFFFFFFFF)
16 #define kNormalizeStepMin (1 << 10) /* it must be power of 2 */
17 #define kNormalizeMask (~(kNormalizeStepMin - 1))
18 #define kMaxHistorySize ((UInt32)3 << 30)
20 #define kStartMaxLen 3
22 static void LzInWindow_Free(CMatchFinder
*p
, ISzAlloc
*alloc
)
26 alloc
->Free(alloc
, p
->bufferBase
, 0);
31 /* keepSizeBefore + keepSizeAfter + keepSizeReserv must be < 4G) */
33 static int LzInWindow_Create(CMatchFinder
*p
, UInt32 keepSizeReserv
, ISzAlloc
*alloc
)
35 UInt32 blockSize
= p
->keepSizeBefore
+ p
->keepSizeAfter
+ keepSizeReserv
;
38 p
->blockSize
= blockSize
;
41 if (p
->bufferBase
== 0 || p
->blockSize
!= blockSize
)
43 LzInWindow_Free(p
, alloc
);
44 p
->blockSize
= blockSize
;
45 p
->bufferBase
= (Byte
*)alloc
->Alloc(alloc
, (size_t)blockSize
);
47 return (p
->bufferBase
!= 0);
50 Byte
*MatchFinder_GetPointerToCurrentPos(CMatchFinder
*p
) { return p
->buffer
; }
51 Byte
MatchFinder_GetIndexByte(CMatchFinder
*p
, Int32 index
) { return p
->buffer
[index
]; }
53 UInt32
MatchFinder_GetNumAvailableBytes(CMatchFinder
*p
) { return p
->streamPos
- p
->pos
; }
55 void MatchFinder_ReduceOffsets(CMatchFinder
*p
, UInt32 subValue
)
57 p
->posLimit
-= subValue
;
59 p
->streamPos
-= subValue
;
62 static void MatchFinder_ReadBlock(CMatchFinder
*p
)
64 if (p
->streamEndWasReached
|| p
->result
!= SZ_OK
)
68 Byte
*dest
= p
->buffer
+ (p
->streamPos
- p
->pos
);
69 size_t size
= (p
->bufferBase
+ p
->blockSize
- dest
);
72 p
->result
= p
->stream
->Read(p
->stream
, dest
, &size
);
73 if (p
->result
!= SZ_OK
)
77 p
->streamEndWasReached
= 1;
80 p
->streamPos
+= (UInt32
)size
;
81 if (p
->streamPos
- p
->pos
> p
->keepSizeAfter
)
86 void MatchFinder_MoveBlock(CMatchFinder
*p
)
88 memmove(p
->bufferBase
,
89 p
->buffer
- p
->keepSizeBefore
,
90 (size_t)(p
->streamPos
- p
->pos
+ p
->keepSizeBefore
));
91 p
->buffer
= p
->bufferBase
+ p
->keepSizeBefore
;
94 int MatchFinder_NeedMove(CMatchFinder
*p
)
96 /* if (p->streamEndWasReached) return 0; */
97 return ((size_t)(p
->bufferBase
+ p
->blockSize
- p
->buffer
) <= p
->keepSizeAfter
);
100 void MatchFinder_ReadIfRequired(CMatchFinder
*p
)
102 if (p
->streamEndWasReached
)
104 if (p
->keepSizeAfter
>= p
->streamPos
- p
->pos
)
105 MatchFinder_ReadBlock(p
);
108 static void MatchFinder_CheckAndMoveAndRead(CMatchFinder
*p
)
110 if (MatchFinder_NeedMove(p
))
111 MatchFinder_MoveBlock(p
);
112 MatchFinder_ReadBlock(p
);
115 static void MatchFinder_SetDefaultSettings(CMatchFinder
*p
)
120 /* p->skipModeBits = 0; */
125 #define kCrcPoly 0xEDB88320
127 void MatchFinder_Construct(CMatchFinder
*p
)
133 MatchFinder_SetDefaultSettings(p
);
135 for (i
= 0; i
< 256; i
++)
139 for (j
= 0; j
< 8; j
++)
140 r
= (r
>> 1) ^ (kCrcPoly
& ~((r
& 1) - 1));
145 static void MatchFinder_FreeThisClassMemory(CMatchFinder
*p
, ISzAlloc
*alloc
)
147 alloc
->Free(alloc
, p
->hash
, 0);
151 void MatchFinder_Free(CMatchFinder
*p
, ISzAlloc
*alloc
)
153 MatchFinder_FreeThisClassMemory(p
, alloc
);
154 LzInWindow_Free(p
, alloc
);
157 static CLzRef
* AllocRefs(UInt32 num
, ISzAlloc
*alloc
)
159 size_t sizeInBytes
= (size_t)num
* sizeof(CLzRef
);
160 if (sizeInBytes
/ sizeof(CLzRef
) != num
)
162 return (CLzRef
*)alloc
->Alloc(alloc
, sizeInBytes
);
165 int MatchFinder_Create(CMatchFinder
*p
, UInt32 historySize
,
166 UInt32 keepAddBufferBefore
, UInt32 matchMaxLen
, UInt32 keepAddBufferAfter
,
170 if (historySize
> kMaxHistorySize
)
172 MatchFinder_Free(p
, alloc
);
175 sizeReserv
= historySize
>> 1;
176 if (historySize
> ((UInt32
)2 << 30))
177 sizeReserv
= historySize
>> 2;
178 sizeReserv
+= (keepAddBufferBefore
+ matchMaxLen
+ keepAddBufferAfter
) / 2 + (1 << 19);
180 p
->keepSizeBefore
= historySize
+ keepAddBufferBefore
+ 1;
181 p
->keepSizeAfter
= matchMaxLen
+ keepAddBufferAfter
;
182 /* we need one additional byte, since we use MoveBlock after pos++ and before dictionary using */
183 if (LzInWindow_Create(p
, sizeReserv
, alloc
))
185 UInt32 newCyclicBufferSize
= (historySize
/* >> p->skipModeBits */) + 1;
187 p
->matchMaxLen
= matchMaxLen
;
189 p
->fixedHashSize
= 0;
190 if (p
->numHashBytes
== 2)
194 hs
= historySize
- 1;
200 /* hs >>= p->skipModeBits; */
201 hs
|= 0xFFFF; /* don't change it! It's required for Deflate */
204 if (p
->numHashBytes
== 3)
212 if (p
->numHashBytes
> 2) p
->fixedHashSize
+= kHash2Size
;
213 if (p
->numHashBytes
> 3) p
->fixedHashSize
+= kHash3Size
;
214 if (p
->numHashBytes
> 4) p
->fixedHashSize
+= kHash4Size
;
215 hs
+= p
->fixedHashSize
;
219 UInt32 prevSize
= p
->hashSizeSum
+ p
->numSons
;
221 p
->historySize
= historySize
;
223 p
->cyclicBufferSize
= newCyclicBufferSize
;
224 p
->numSons
= (p
->btMode
? newCyclicBufferSize
* 2 : newCyclicBufferSize
);
225 newSize
= p
->hashSizeSum
+ p
->numSons
;
226 if (p
->hash
!= 0 && prevSize
== newSize
)
228 MatchFinder_FreeThisClassMemory(p
, alloc
);
229 p
->hash
= AllocRefs(newSize
, alloc
);
232 p
->son
= p
->hash
+ p
->hashSizeSum
;
237 MatchFinder_Free(p
, alloc
);
241 static void MatchFinder_SetLimits(CMatchFinder
*p
)
243 UInt32 limit
= kMaxValForNormalize
- p
->pos
;
244 UInt32 limit2
= p
->cyclicBufferSize
- p
->cyclicBufferPos
;
247 limit2
= p
->streamPos
- p
->pos
;
248 if (limit2
<= p
->keepSizeAfter
)
254 limit2
-= p
->keepSizeAfter
;
258 UInt32 lenLimit
= p
->streamPos
- p
->pos
;
259 if (lenLimit
> p
->matchMaxLen
)
260 lenLimit
= p
->matchMaxLen
;
261 p
->lenLimit
= lenLimit
;
263 p
->posLimit
= p
->pos
+ limit
;
266 void MatchFinder_Init(CMatchFinder
*p
)
269 for (i
= 0; i
< p
->hashSizeSum
; i
++)
270 p
->hash
[i
] = kEmptyHashValue
;
271 p
->cyclicBufferPos
= 0;
272 p
->buffer
= p
->bufferBase
;
273 p
->pos
= p
->streamPos
= p
->cyclicBufferSize
;
275 p
->streamEndWasReached
= 0;
276 MatchFinder_ReadBlock(p
);
277 MatchFinder_SetLimits(p
);
280 static UInt32
MatchFinder_GetSubValue(CMatchFinder
*p
)
282 return (p
->pos
- p
->historySize
- 1) & kNormalizeMask
;
285 void MatchFinder_Normalize3(UInt32 subValue
, CLzRef
*items
, UInt32 numItems
)
288 for (i
= 0; i
< numItems
; i
++)
290 UInt32 value
= items
[i
];
291 if (value
<= subValue
)
292 value
= kEmptyHashValue
;
299 static void MatchFinder_Normalize(CMatchFinder
*p
)
301 UInt32 subValue
= MatchFinder_GetSubValue(p
);
302 MatchFinder_Normalize3(subValue
, p
->hash
, p
->hashSizeSum
+ p
->numSons
);
303 MatchFinder_ReduceOffsets(p
, subValue
);
306 static void MatchFinder_CheckLimits(CMatchFinder
*p
)
308 if (p
->pos
== kMaxValForNormalize
)
309 MatchFinder_Normalize(p
);
310 if (!p
->streamEndWasReached
&& p
->keepSizeAfter
== p
->streamPos
- p
->pos
)
311 MatchFinder_CheckAndMoveAndRead(p
);
312 if (p
->cyclicBufferPos
== p
->cyclicBufferSize
)
313 p
->cyclicBufferPos
= 0;
314 MatchFinder_SetLimits(p
);
317 static UInt32
* Hc_GetMatchesSpec(UInt32 lenLimit
, UInt32 curMatch
, UInt32 pos
, const Byte
*cur
, CLzRef
*son
,
318 UInt32 _cyclicBufferPos
, UInt32 _cyclicBufferSize
, UInt32 cutValue
,
319 UInt32
*distances
, UInt32 maxLen
)
321 son
[_cyclicBufferPos
] = curMatch
;
324 UInt32 delta
= pos
- curMatch
;
325 if (cutValue
-- == 0 || delta
>= _cyclicBufferSize
)
328 const Byte
*pb
= cur
- delta
;
329 curMatch
= son
[_cyclicBufferPos
- delta
+ ((delta
> _cyclicBufferPos
) ? _cyclicBufferSize
: 0)];
330 if (pb
[maxLen
] == cur
[maxLen
] && *pb
== *cur
)
333 while (++len
!= lenLimit
)
334 if (pb
[len
] != cur
[len
])
338 *distances
++ = maxLen
= len
;
339 *distances
++ = delta
- 1;
348 UInt32
* GetMatchesSpec1(UInt32 lenLimit
, UInt32 curMatch
, UInt32 pos
, const Byte
*cur
, CLzRef
*son
,
349 UInt32 _cyclicBufferPos
, UInt32 _cyclicBufferSize
, UInt32 cutValue
,
350 UInt32
*distances
, UInt32 maxLen
)
352 CLzRef
*ptr0
= son
+ (_cyclicBufferPos
<< 1) + 1;
353 CLzRef
*ptr1
= son
+ (_cyclicBufferPos
<< 1);
354 UInt32 len0
= 0, len1
= 0;
357 UInt32 delta
= pos
- curMatch
;
358 if (cutValue
-- == 0 || delta
>= _cyclicBufferSize
)
360 *ptr0
= *ptr1
= kEmptyHashValue
;
364 CLzRef
*pair
= son
+ ((_cyclicBufferPos
- delta
+ ((delta
> _cyclicBufferPos
) ? _cyclicBufferSize
: 0)) << 1);
365 const Byte
*pb
= cur
- delta
;
366 UInt32 len
= (len0
< len1
? len0
: len1
);
367 if (pb
[len
] == cur
[len
])
369 if (++len
!= lenLimit
&& pb
[len
] == cur
[len
])
370 while (++len
!= lenLimit
)
371 if (pb
[len
] != cur
[len
])
375 *distances
++ = maxLen
= len
;
376 *distances
++ = delta
- 1;
385 if (pb
[len
] < cur
[len
])
403 static void SkipMatchesSpec(UInt32 lenLimit
, UInt32 curMatch
, UInt32 pos
, const Byte
*cur
, CLzRef
*son
,
404 UInt32 _cyclicBufferPos
, UInt32 _cyclicBufferSize
, UInt32 cutValue
)
406 CLzRef
*ptr0
= son
+ (_cyclicBufferPos
<< 1) + 1;
407 CLzRef
*ptr1
= son
+ (_cyclicBufferPos
<< 1);
408 UInt32 len0
= 0, len1
= 0;
411 UInt32 delta
= pos
- curMatch
;
412 if (cutValue
-- == 0 || delta
>= _cyclicBufferSize
)
414 *ptr0
= *ptr1
= kEmptyHashValue
;
418 CLzRef
*pair
= son
+ ((_cyclicBufferPos
- delta
+ ((delta
> _cyclicBufferPos
) ? _cyclicBufferSize
: 0)) << 1);
419 const Byte
*pb
= cur
- delta
;
420 UInt32 len
= (len0
< len1
? len0
: len1
);
421 if (pb
[len
] == cur
[len
])
423 while (++len
!= lenLimit
)
424 if (pb
[len
] != cur
[len
])
435 if (pb
[len
] < cur
[len
])
454 ++p->cyclicBufferPos; \
456 if (++p->pos == p->posLimit) MatchFinder_CheckLimits(p);
458 #define MOVE_POS_RET MOVE_POS return offset;
460 static void MatchFinder_MovePos(CMatchFinder
*p
) { MOVE_POS
; }
462 #define GET_MATCHES_HEADER2(minLen, ret_op) \
463 UInt32 lenLimit; UInt32 hashValue; const Byte *cur; UInt32 curMatch; \
464 lenLimit = p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \
467 #define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return 0)
468 #define SKIP_HEADER(minLen) GET_MATCHES_HEADER2(minLen, continue)
470 #define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
472 #define GET_MATCHES_FOOTER(offset, maxLen) \
473 offset = (UInt32)(GetMatchesSpec1(lenLimit, curMatch, MF_PARAMS(p), \
474 distances + offset, maxLen) - distances); MOVE_POS_RET;
476 #define SKIP_FOOTER \
477 SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS;
479 static UInt32
Bt2_MatchFinder_GetMatches(CMatchFinder
*p
, UInt32
*distances
)
482 GET_MATCHES_HEADER(2)
484 curMatch
= p
->hash
[hashValue
];
485 p
->hash
[hashValue
] = p
->pos
;
487 GET_MATCHES_FOOTER(offset
, 1)
490 UInt32
Bt3Zip_MatchFinder_GetMatches(CMatchFinder
*p
, UInt32
*distances
)
493 GET_MATCHES_HEADER(3)
495 curMatch
= p
->hash
[hashValue
];
496 p
->hash
[hashValue
] = p
->pos
;
498 GET_MATCHES_FOOTER(offset
, 2)
501 static UInt32
Bt3_MatchFinder_GetMatches(CMatchFinder
*p
, UInt32
*distances
)
503 UInt32 hash2Value
, delta2
, maxLen
, offset
;
504 GET_MATCHES_HEADER(3)
508 delta2
= p
->pos
- p
->hash
[hash2Value
];
509 curMatch
= p
->hash
[kFix3HashSize
+ hashValue
];
511 p
->hash
[hash2Value
] =
512 p
->hash
[kFix3HashSize
+ hashValue
] = p
->pos
;
517 if (delta2
< p
->cyclicBufferSize
&& *(cur
- delta2
) == *cur
)
519 for (; maxLen
!= lenLimit
; maxLen
++)
520 if (cur
[(ptrdiff_t)maxLen
- delta2
] != cur
[maxLen
])
522 distances
[0] = maxLen
;
523 distances
[1] = delta2
- 1;
525 if (maxLen
== lenLimit
)
527 SkipMatchesSpec(lenLimit
, curMatch
, MF_PARAMS(p
));
531 GET_MATCHES_FOOTER(offset
, maxLen
)
534 static UInt32
Bt4_MatchFinder_GetMatches(CMatchFinder
*p
, UInt32
*distances
)
536 UInt32 hash2Value
, hash3Value
, delta2
, delta3
, maxLen
, offset
;
537 GET_MATCHES_HEADER(4)
541 delta2
= p
->pos
- p
->hash
[ hash2Value
];
542 delta3
= p
->pos
- p
->hash
[kFix3HashSize
+ hash3Value
];
543 curMatch
= p
->hash
[kFix4HashSize
+ hashValue
];
545 p
->hash
[ hash2Value
] =
546 p
->hash
[kFix3HashSize
+ hash3Value
] =
547 p
->hash
[kFix4HashSize
+ hashValue
] = p
->pos
;
551 if (delta2
< p
->cyclicBufferSize
&& *(cur
- delta2
) == *cur
)
553 distances
[0] = maxLen
= 2;
554 distances
[1] = delta2
- 1;
557 if (delta2
!= delta3
&& delta3
< p
->cyclicBufferSize
&& *(cur
- delta3
) == *cur
)
560 distances
[offset
+ 1] = delta3
- 1;
566 for (; maxLen
!= lenLimit
; maxLen
++)
567 if (cur
[(ptrdiff_t)maxLen
- delta2
] != cur
[maxLen
])
569 distances
[offset
- 2] = maxLen
;
570 if (maxLen
== lenLimit
)
572 SkipMatchesSpec(lenLimit
, curMatch
, MF_PARAMS(p
));
578 GET_MATCHES_FOOTER(offset
, maxLen
)
581 static UInt32
Hc4_MatchFinder_GetMatches(CMatchFinder
*p
, UInt32
*distances
)
583 UInt32 hash2Value
, hash3Value
, delta2
, delta3
, maxLen
, offset
;
584 GET_MATCHES_HEADER(4)
588 delta2
= p
->pos
- p
->hash
[ hash2Value
];
589 delta3
= p
->pos
- p
->hash
[kFix3HashSize
+ hash3Value
];
590 curMatch
= p
->hash
[kFix4HashSize
+ hashValue
];
592 p
->hash
[ hash2Value
] =
593 p
->hash
[kFix3HashSize
+ hash3Value
] =
594 p
->hash
[kFix4HashSize
+ hashValue
] = p
->pos
;
598 if (delta2
< p
->cyclicBufferSize
&& *(cur
- delta2
) == *cur
)
600 distances
[0] = maxLen
= 2;
601 distances
[1] = delta2
- 1;
604 if (delta2
!= delta3
&& delta3
< p
->cyclicBufferSize
&& *(cur
- delta3
) == *cur
)
607 distances
[offset
+ 1] = delta3
- 1;
613 for (; maxLen
!= lenLimit
; maxLen
++)
614 if (cur
[(ptrdiff_t)maxLen
- delta2
] != cur
[maxLen
])
616 distances
[offset
- 2] = maxLen
;
617 if (maxLen
== lenLimit
)
619 p
->son
[p
->cyclicBufferPos
] = curMatch
;
625 offset
= (UInt32
)(Hc_GetMatchesSpec(lenLimit
, curMatch
, MF_PARAMS(p
),
626 distances
+ offset
, maxLen
) - (distances
));
630 UInt32
Hc3Zip_MatchFinder_GetMatches(CMatchFinder
*p
, UInt32
*distances
)
633 GET_MATCHES_HEADER(3)
635 curMatch
= p
->hash
[hashValue
];
636 p
->hash
[hashValue
] = p
->pos
;
637 offset
= (UInt32
)(Hc_GetMatchesSpec(lenLimit
, curMatch
, MF_PARAMS(p
),
638 distances
, 2) - (distances
));
642 static void Bt2_MatchFinder_Skip(CMatchFinder
*p
, UInt32 num
)
648 curMatch
= p
->hash
[hashValue
];
649 p
->hash
[hashValue
] = p
->pos
;
655 void Bt3Zip_MatchFinder_Skip(CMatchFinder
*p
, UInt32 num
)
661 curMatch
= p
->hash
[hashValue
];
662 p
->hash
[hashValue
] = p
->pos
;
668 static void Bt3_MatchFinder_Skip(CMatchFinder
*p
, UInt32 num
)
675 curMatch
= p
->hash
[kFix3HashSize
+ hashValue
];
676 p
->hash
[hash2Value
] =
677 p
->hash
[kFix3HashSize
+ hashValue
] = p
->pos
;
683 static void Bt4_MatchFinder_Skip(CMatchFinder
*p
, UInt32 num
)
687 UInt32 hash2Value
, hash3Value
;
690 curMatch
= p
->hash
[kFix4HashSize
+ hashValue
];
691 p
->hash
[ hash2Value
] =
692 p
->hash
[kFix3HashSize
+ hash3Value
] = p
->pos
;
693 p
->hash
[kFix4HashSize
+ hashValue
] = p
->pos
;
699 static void Hc4_MatchFinder_Skip(CMatchFinder
*p
, UInt32 num
)
703 UInt32 hash2Value
, hash3Value
;
706 curMatch
= p
->hash
[kFix4HashSize
+ hashValue
];
707 p
->hash
[ hash2Value
] =
708 p
->hash
[kFix3HashSize
+ hash3Value
] =
709 p
->hash
[kFix4HashSize
+ hashValue
] = p
->pos
;
710 p
->son
[p
->cyclicBufferPos
] = curMatch
;
716 void Hc3Zip_MatchFinder_Skip(CMatchFinder
*p
, UInt32 num
)
722 curMatch
= p
->hash
[hashValue
];
723 p
->hash
[hashValue
] = p
->pos
;
724 p
->son
[p
->cyclicBufferPos
] = curMatch
;
730 void MatchFinder_CreateVTable(CMatchFinder
*p
, IMatchFinder
*vTable
)
732 vTable
->Init
= (Mf_Init_Func
)MatchFinder_Init
;
733 vTable
->GetIndexByte
= (Mf_GetIndexByte_Func
)MatchFinder_GetIndexByte
;
734 vTable
->GetNumAvailableBytes
= (Mf_GetNumAvailableBytes_Func
)MatchFinder_GetNumAvailableBytes
;
735 vTable
->GetPointerToCurrentPos
= (Mf_GetPointerToCurrentPos_Func
)MatchFinder_GetPointerToCurrentPos
;
738 vTable
->GetMatches
= (Mf_GetMatches_Func
)Hc4_MatchFinder_GetMatches
;
739 vTable
->Skip
= (Mf_Skip_Func
)Hc4_MatchFinder_Skip
;
741 else if (p
->numHashBytes
== 2)
743 vTable
->GetMatches
= (Mf_GetMatches_Func
)Bt2_MatchFinder_GetMatches
;
744 vTable
->Skip
= (Mf_Skip_Func
)Bt2_MatchFinder_Skip
;
746 else if (p
->numHashBytes
== 3)
748 vTable
->GetMatches
= (Mf_GetMatches_Func
)Bt3_MatchFinder_GetMatches
;
749 vTable
->Skip
= (Mf_Skip_Func
)Bt3_MatchFinder_Skip
;
753 vTable
->GetMatches
= (Mf_GetMatches_Func
)Bt4_MatchFinder_GetMatches
;
754 vTable
->Skip
= (Mf_Skip_Func
)Bt4_MatchFinder_Skip
;