1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
7 // dictDecoder implements the LZ77 sliding dictionary as used in decompression.
8 // LZ77 decompresses data through sequences of two forms of commands:
10 // * Literal insertions: Runs of one or more symbols are inserted into the data
11 // stream as is. This is accomplished through the writeByte method for a
12 // single symbol, or combinations of writeSlice/writeMark for multiple symbols.
13 // Any valid stream must start with a literal insertion if no preset dictionary
16 // * Backward copies: Runs of one or more symbols are copied from previously
17 // emitted data. Backward copies come as the tuple (dist, length) where dist
18 // determines how far back in the stream to copy from and length determines how
19 // many bytes to copy. Note that it is valid for the length to be greater than
20 // the distance. Since LZ77 uses forward copies, that situation is used to
21 // perform a form of run-length encoding on repeated runs of symbols.
22 // The writeCopy and tryWriteCopy are used to implement this command.
24 // For performance reasons, this implementation performs little to no sanity
25 // checks about the arguments. As such, the invariants documented for each
26 // method call must be respected.
27 type dictDecoder
struct {
28 hist
[]byte // Sliding window history
30 // Invariant: 0 <= rdPos <= wrPos <= len(hist)
31 wrPos
int // Current output position in buffer
32 rdPos
int // Have emitted hist[:rdPos] already
33 full
bool // Has a full window length been written yet?
36 // init initializes dictDecoder to have a sliding window dictionary of the given
37 // size. If a preset dict is provided, it will initialize the dictionary with
38 // the contents of dict.
39 func (dd
*dictDecoder
) init(size
int, dict
[]byte) {
40 *dd
= dictDecoder
{hist
: dd
.hist
}
42 if cap(dd
.hist
) < size
{
43 dd
.hist
= make([]byte, size
)
45 dd
.hist
= dd
.hist
[:size
]
47 if len(dict
) > len(dd
.hist
) {
48 dict
= dict
[len(dict
)-len(dd
.hist
):]
50 dd
.wrPos
= copy(dd
.hist
, dict
)
51 if dd
.wrPos
== len(dd
.hist
) {
58 // histSize reports the total amount of historical data in the dictionary.
59 func (dd
*dictDecoder
) histSize() int {
66 // availRead reports the number of bytes that can be flushed by readFlush.
67 func (dd
*dictDecoder
) availRead() int {
68 return dd
.wrPos
- dd
.rdPos
71 // availWrite reports the available amount of output buffer space.
72 func (dd
*dictDecoder
) availWrite() int {
73 return len(dd
.hist
) - dd
.wrPos
76 // writeSlice returns a slice of the available buffer to write data to.
78 // This invariant will be kept: len(s) <= availWrite()
79 func (dd
*dictDecoder
) writeSlice() []byte {
80 return dd
.hist
[dd
.wrPos
:]
83 // writeMark advances the writer pointer by cnt.
85 // This invariant must be kept: 0 <= cnt <= availWrite()
86 func (dd
*dictDecoder
) writeMark(cnt
int) {
90 // writeByte writes a single byte to the dictionary.
92 // This invariant must be kept: 0 < availWrite()
93 func (dd
*dictDecoder
) writeByte(c
byte) {
98 // writeCopy copies a string at a given (dist, length) to the output.
99 // This returns the number of bytes copied and may be less than the requested
100 // length if the available space in the output buffer is too small.
102 // This invariant must be kept: 0 < dist <= histSize()
103 func (dd
*dictDecoder
) writeCopy(dist
, length
int) int {
106 srcPos
:= dstPos
- dist
107 endPos
:= dstPos
+ length
108 if endPos
> len(dd
.hist
) {
109 endPos
= len(dd
.hist
)
112 // Copy non-overlapping section after destination position.
114 // This section is non-overlapping in that the copy length for this section
115 // is always less than or equal to the backwards distance. This can occur
116 // if a distance refers to data that wraps-around in the buffer.
117 // Thus, a backwards copy is performed here; that is, the exact bytes in
118 // the source prior to the copy is placed in the destination.
120 srcPos
+= len(dd
.hist
)
121 dstPos
+= copy(dd
.hist
[dstPos
:endPos
], dd
.hist
[srcPos
:])
125 // Copy possibly overlapping section before destination position.
127 // This section can overlap if the copy length for this section is larger
128 // than the backwards distance. This is allowed by LZ77 so that repeated
129 // strings can be succinctly represented using (dist, length) pairs.
130 // Thus, a forwards copy is performed here; that is, the bytes copied is
131 // possibly dependent on the resulting bytes in the destination as the copy
132 // progresses along. This is functionally equivalent to the following:
134 // for i := 0; i < endPos-dstPos; i++ {
135 // dd.hist[dstPos+i] = dd.hist[srcPos+i]
139 for dstPos
< endPos
{
140 dstPos
+= copy(dd
.hist
[dstPos
:endPos
], dd
.hist
[srcPos
:dstPos
])
144 return dstPos
- dstBase
147 // tryWriteCopy tries to copy a string at a given (distance, length) to the
148 // output. This specialized version is optimized for short distances.
150 // This method is designed to be inlined for performance reasons.
152 // This invariant must be kept: 0 < dist <= histSize()
153 func (dd
*dictDecoder
) tryWriteCopy(dist
, length
int) int {
155 endPos
:= dstPos
+ length
156 if dstPos
< dist || endPos
> len(dd
.hist
) {
160 srcPos
:= dstPos
- dist
162 // Copy possibly overlapping section before destination position.
164 dstPos
+= copy(dd
.hist
[dstPos
:endPos
], dd
.hist
[srcPos
:dstPos
])
166 goto loop
// Avoid for-loop so that this function can be inlined
170 return dstPos
- dstBase
173 // readFlush returns a slice of the historical buffer that is ready to be
174 // emitted to the user. The data returned by readFlush must be fully consumed
175 // before calling any other dictDecoder methods.
176 func (dd
*dictDecoder
) readFlush() []byte {
177 toRead
:= dd
.hist
[dd
.rdPos
:dd
.wrPos
]
179 if dd
.wrPos
== len(dd
.hist
) {
180 dd
.wrPos
, dd
.rdPos
= 0, 0