1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
14 // A writer is a buffered, flushable writer.
15 type writer
interface {
21 // A code is a 12 bit value, stored as a uint32 when encoding to avoid
22 // type conversions when shifting bits.
24 invalidCode
= 1<<32 - 1
25 // There are 1<<12 possible codes, which is an upper bound on the number of
26 // valid hash table entries at any given point in time. tableSize is 4x that.
27 tableSize
= 4 * 1 << 12
28 tableMask
= tableSize
- 1
29 // A hash table entry is a uint32. Zero is an invalid entry since the
30 // lower 12 bits of a valid entry must be a non-literal code.
34 // Writer is an LZW compressor. It writes the compressed form of the data
35 // to an underlying writer (see NewWriter).
37 // w is the writer that compressed bytes are written to.
39 // order, write, bits, nBits and width are the state for
40 // converting a code stream into a byte stream.
42 write
func(*Writer
, uint32) error
46 // litWidth is the width in bits of literal codes.
48 // hi is the code implied by the next code emission.
49 // overflow is the code at which hi overflows the code width.
51 // savedCode is the accumulated code at the end of the most recent Write
52 // call. It is equal to invalidCode if there was no such call.
54 // err is the first error encountered during writing. Closing the writer
55 // will make any future Write calls return errClosed
57 // table is the hash table from 20-bit keys to 12-bit values. Each table
58 // entry contains key<<12|val and collisions resolve by linear probing.
59 // The keys consist of a 12-bit code prefix and an 8-bit byte suffix.
60 // The values are a 12-bit code.
61 table
[tableSize
]uint32
64 // writeLSB writes the code c for "Least Significant Bits first" data.
65 func (w
*Writer
) writeLSB(c
uint32) error
{
66 w
.bits |
= c
<< w
.nBits
69 if err
:= w
.w
.WriteByte(uint8(w
.bits
)); err
!= nil {
78 // writeMSB writes the code c for "Most Significant Bits first" data.
79 func (w
*Writer
) writeMSB(c
uint32) error
{
80 w
.bits |
= c
<< (32 - w
.width
- w
.nBits
)
83 if err
:= w
.w
.WriteByte(uint8(w
.bits
>> 24)); err
!= nil {
92 // errOutOfCodes is an internal error that means that the writer has run out
93 // of unused codes and a clear code needs to be sent next.
94 var errOutOfCodes
= errors
.New("lzw: out of codes")
96 // incHi increments e.hi and checks for both overflow and running out of
97 // unused codes. In the latter case, incHi sends a clear code, resets the
98 // writer state and returns errOutOfCodes.
99 func (w
*Writer
) incHi() error
{
101 if w
.hi
== w
.overflow
{
106 clear
:= uint32(1) << w
.litWidth
107 if err
:= w
.write(w
, clear
); err
!= nil {
110 w
.width
= w
.litWidth
+ 1
112 w
.overflow
= clear
<< 1
113 for i
:= range w
.table
{
114 w
.table
[i
] = invalidEntry
121 // Write writes a compressed representation of p to w's underlying writer.
122 func (w
*Writer
) Write(p
[]byte) (n
int, err error
) {
129 if maxLit
:= uint8(1<<w
.litWidth
- 1); maxLit
!= 0xff {
130 for _
, x
:= range p
{
132 w
.err
= errors
.New("lzw: input byte too large for the litWidth")
139 if code
== invalidCode
{
140 // This is the first write; send a clear code.
141 // https://www.w3.org/Graphics/GIF/spec-gif89a.txt Appendix F
142 // "Variable-Length-Code LZW Compression" says that "Encoders should
143 // output a Clear code as the first code of each image data stream".
145 // LZW compression isn't only used by GIF, but it's cheap to follow
146 // that directive unconditionally.
147 clear
:= uint32(1) << w
.litWidth
148 if err
:= w
.write(w
, clear
); err
!= nil {
151 // After the starting clear code, the next code sent (for non-empty
152 // input) is always a literal code.
153 code
, p
= uint32(p
[0]), p
[1:]
156 for _
, x
:= range p
{
158 key
:= code
<<8 | literal
159 // If there is a hash table hit for this key then we continue the loop
160 // and do not emit a code yet.
161 hash
:= (key
>>12 ^ key
) & tableMask
162 for h
, t
:= hash
, w
.table
[hash
]; t
!= invalidEntry
; {
167 h
= (h
+ 1) & tableMask
170 // Otherwise, write the current code, and literal becomes the start of
171 // the next emitted code.
172 if w
.err
= w
.write(w
, code
); w
.err
!= nil {
176 // Increment e.hi, the next implied code. If we run out of codes, reset
177 // the writer state (including clearing the hash table) and continue.
178 if err1
:= w
.incHi(); err1
!= nil {
179 if err1
== errOutOfCodes
{
185 // Otherwise, insert key -> e.hi into the map that e.table represents.
187 if w
.table
[hash
] == invalidEntry
{
188 w
.table
[hash
] = (key
<< 12) | w
.hi
191 hash
= (hash
+ 1) & tableMask
198 // Close closes the Writer, flushing any pending output. It does not close
199 // w's underlying writer.
200 func (w
*Writer
) Close() error
{
202 if w
.err
== errClosed
{
207 // Make any future calls to Write return errClosed.
209 // Write the savedCode if valid.
210 if w
.savedCode
!= invalidCode
{
211 if err
:= w
.write(w
, w
.savedCode
); err
!= nil {
214 if err
:= w
.incHi(); err
!= nil && err
!= errOutOfCodes
{
218 // Write the starting clear code, as w.Write did not.
219 clear
:= uint32(1) << w
.litWidth
220 if err
:= w
.write(w
, clear
); err
!= nil {
224 // Write the eof code.
225 eof
:= uint32(1)<<w
.litWidth
+ 1
226 if err
:= w
.write(w
, eof
); err
!= nil {
229 // Write the final bits.
234 if err
:= w
.w
.WriteByte(uint8(w
.bits
)); err
!= nil {
241 // Reset clears the Writer's state and allows it to be reused again
243 func (w
*Writer
) Reset(dst io
.Writer
, order Order
, litWidth
int) {
245 w
.init(dst
, order
, litWidth
)
248 // NewWriter creates a new io.WriteCloser.
249 // Writes to the returned io.WriteCloser are compressed and written to w.
250 // It is the caller's responsibility to call Close on the WriteCloser when
252 // The number of bits to use for literal codes, litWidth, must be in the
253 // range [2,8] and is typically 8. Input bytes must be less than 1<<litWidth.
255 // It is guaranteed that the underlying type of the returned io.WriteCloser
257 func NewWriter(w io
.Writer
, order Order
, litWidth
int) io
.WriteCloser
{
258 return newWriter(w
, order
, litWidth
)
261 func newWriter(dst io
.Writer
, order Order
, litWidth
int) *Writer
{
263 w
.init(dst
, order
, litWidth
)
267 func (w
*Writer
) init(dst io
.Writer
, order Order
, litWidth
int) {
270 w
.write
= (*Writer
).writeLSB
272 w
.write
= (*Writer
).writeMSB
274 w
.err
= errors
.New("lzw: unknown order")
277 if litWidth
< 2 ||
8 < litWidth
{
278 w
.err
= fmt
.Errorf("lzw: litWidth %d out of range", litWidth
)
281 bw
, ok
:= dst
.(writer
)
282 if !ok
&& dst
!= nil {
283 bw
= bufio
.NewWriter(dst
)
291 w
.overflow
= 1 << (lw
+ 1)
292 w
.savedCode
= invalidCode