libgo: update to Go 1.11
[official-gcc.git] / libgo / go / compress / flate / inflate.go
blob25e81f3f72745eb009911cacaeb499d4a5ecc0d2
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Package flate implements the DEFLATE compressed data format, described in
6 // RFC 1951. The gzip and zlib packages implement access to DEFLATE-based file
7 // formats.
8 package flate
10 import (
11 "bufio"
12 "io"
13 "math/bits"
14 "strconv"
15 "sync"
18 const (
19 maxCodeLen = 16 // max length of Huffman code
20 // The next three numbers come from the RFC section 3.2.7, with the
21 // additional proviso in section 3.2.5 which implies that distance codes
22 // 30 and 31 should never occur in compressed data.
23 maxNumLit = 286
24 maxNumDist = 30
25 numCodes = 19 // number of codes in Huffman meta-code
28 // Initialize the fixedHuffmanDecoder only once upon first use.
29 var fixedOnce sync.Once
30 var fixedHuffmanDecoder huffmanDecoder
32 // A CorruptInputError reports the presence of corrupt input at a given offset.
33 type CorruptInputError int64
35 func (e CorruptInputError) Error() string {
36 return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10)
39 // An InternalError reports an error in the flate code itself.
40 type InternalError string
42 func (e InternalError) Error() string { return "flate: internal error: " + string(e) }
44 // A ReadError reports an error encountered while reading input.
46 // Deprecated: No longer returned.
47 type ReadError struct {
48 Offset int64 // byte offset where error occurred
49 Err error // error returned by underlying Read
52 func (e *ReadError) Error() string {
53 return "flate: read error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
56 // A WriteError reports an error encountered while writing output.
58 // Deprecated: No longer returned.
59 type WriteError struct {
60 Offset int64 // byte offset where error occurred
61 Err error // error returned by underlying Write
64 func (e *WriteError) Error() string {
65 return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
68 // Resetter resets a ReadCloser returned by NewReader or NewReaderDict to
69 // to switch to a new underlying Reader. This permits reusing a ReadCloser
70 // instead of allocating a new one.
71 type Resetter interface {
72 // Reset discards any buffered data and resets the Resetter as if it was
73 // newly initialized with the given reader.
74 Reset(r io.Reader, dict []byte) error
77 // The data structure for decoding Huffman tables is based on that of
78 // zlib. There is a lookup table of a fixed bit width (huffmanChunkBits),
79 // For codes smaller than the table width, there are multiple entries
80 // (each combination of trailing bits has the same value). For codes
81 // larger than the table width, the table contains a link to an overflow
82 // table. The width of each entry in the link table is the maximum code
83 // size minus the chunk width.
85 // Note that you can do a lookup in the table even without all bits
86 // filled. Since the extra bits are zero, and the DEFLATE Huffman codes
87 // have the property that shorter codes come before longer ones, the
88 // bit length estimate in the result is a lower bound on the actual
89 // number of bits.
91 // See the following:
92 // http://www.gzip.org/algorithm.txt
94 // chunk & 15 is number of bits
95 // chunk >> 4 is value, including table link
97 const (
98 huffmanChunkBits = 9
99 huffmanNumChunks = 1 << huffmanChunkBits
100 huffmanCountMask = 15
101 huffmanValueShift = 4
104 type huffmanDecoder struct {
105 min int // the minimum code length
106 chunks [huffmanNumChunks]uint32 // chunks as described above
107 links [][]uint32 // overflow links
108 linkMask uint32 // mask the width of the link table
111 // Initialize Huffman decoding tables from array of code lengths.
112 // Following this function, h is guaranteed to be initialized into a complete
113 // tree (i.e., neither over-subscribed nor under-subscribed). The exception is a
114 // degenerate case where the tree has only a single symbol with length 1. Empty
115 // trees are permitted.
116 func (h *huffmanDecoder) init(lengths []int) bool {
117 // Sanity enables additional runtime tests during Huffman
118 // table construction. It's intended to be used during
119 // development to supplement the currently ad-hoc unit tests.
120 const sanity = false
122 if h.min != 0 {
123 *h = huffmanDecoder{}
126 // Count number of codes of each length,
127 // compute min and max length.
128 var count [maxCodeLen]int
129 var min, max int
130 for _, n := range lengths {
131 if n == 0 {
132 continue
134 if min == 0 || n < min {
135 min = n
137 if n > max {
138 max = n
140 count[n]++
143 // Empty tree. The decompressor.huffSym function will fail later if the tree
144 // is used. Technically, an empty tree is only valid for the HDIST tree and
145 // not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree
146 // is guaranteed to fail since it will attempt to use the tree to decode the
147 // codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is
148 // guaranteed to fail later since the compressed data section must be
149 // composed of at least one symbol (the end-of-block marker).
150 if max == 0 {
151 return true
154 code := 0
155 var nextcode [maxCodeLen]int
156 for i := min; i <= max; i++ {
157 code <<= 1
158 nextcode[i] = code
159 code += count[i]
162 // Check that the coding is complete (i.e., that we've
163 // assigned all 2-to-the-max possible bit sequences).
164 // Exception: To be compatible with zlib, we also need to
165 // accept degenerate single-code codings. See also
166 // TestDegenerateHuffmanCoding.
167 if code != 1<<uint(max) && !(code == 1 && max == 1) {
168 return false
171 h.min = min
172 if max > huffmanChunkBits {
173 numLinks := 1 << (uint(max) - huffmanChunkBits)
174 h.linkMask = uint32(numLinks - 1)
176 // create link tables
177 link := nextcode[huffmanChunkBits+1] >> 1
178 h.links = make([][]uint32, huffmanNumChunks-link)
179 for j := uint(link); j < huffmanNumChunks; j++ {
180 reverse := int(bits.Reverse16(uint16(j)))
181 reverse >>= uint(16 - huffmanChunkBits)
182 off := j - uint(link)
183 if sanity && h.chunks[reverse] != 0 {
184 panic("impossible: overwriting existing chunk")
186 h.chunks[reverse] = uint32(off<<huffmanValueShift | (huffmanChunkBits + 1))
187 h.links[off] = make([]uint32, numLinks)
191 for i, n := range lengths {
192 if n == 0 {
193 continue
195 code := nextcode[n]
196 nextcode[n]++
197 chunk := uint32(i<<huffmanValueShift | n)
198 reverse := int(bits.Reverse16(uint16(code)))
199 reverse >>= uint(16 - n)
200 if n <= huffmanChunkBits {
201 for off := reverse; off < len(h.chunks); off += 1 << uint(n) {
202 // We should never need to overwrite
203 // an existing chunk. Also, 0 is
204 // never a valid chunk, because the
205 // lower 4 "count" bits should be
206 // between 1 and 15.
207 if sanity && h.chunks[off] != 0 {
208 panic("impossible: overwriting existing chunk")
210 h.chunks[off] = chunk
212 } else {
213 j := reverse & (huffmanNumChunks - 1)
214 if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 {
215 // Longer codes should have been
216 // associated with a link table above.
217 panic("impossible: not an indirect chunk")
219 value := h.chunks[j] >> huffmanValueShift
220 linktab := h.links[value]
221 reverse >>= huffmanChunkBits
222 for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) {
223 if sanity && linktab[off] != 0 {
224 panic("impossible: overwriting existing chunk")
226 linktab[off] = chunk
231 if sanity {
232 // Above we've sanity checked that we never overwrote
233 // an existing entry. Here we additionally check that
234 // we filled the tables completely.
235 for i, chunk := range h.chunks {
236 if chunk == 0 {
237 // As an exception, in the degenerate
238 // single-code case, we allow odd
239 // chunks to be missing.
240 if code == 1 && i%2 == 1 {
241 continue
243 panic("impossible: missing chunk")
246 for _, linktab := range h.links {
247 for _, chunk := range linktab {
248 if chunk == 0 {
249 panic("impossible: missing chunk")
255 return true
258 // The actual read interface needed by NewReader.
259 // If the passed in io.Reader does not also have ReadByte,
260 // the NewReader will introduce its own buffering.
261 type Reader interface {
262 io.Reader
263 io.ByteReader
266 // Decompress state.
267 type decompressor struct {
268 // Input source.
269 r Reader
270 roffset int64
272 // Input bits, in top of b.
273 b uint32
274 nb uint
276 // Huffman decoders for literal/length, distance.
277 h1, h2 huffmanDecoder
279 // Length arrays used to define Huffman codes.
280 bits *[maxNumLit + maxNumDist]int
281 codebits *[numCodes]int
283 // Output history, buffer.
284 dict dictDecoder
286 // Temporary buffer (avoids repeated allocation).
287 buf [4]byte
289 // Next step in the decompression,
290 // and decompression state.
291 step func(*decompressor)
292 stepState int
293 final bool
294 err error
295 toRead []byte
296 hl, hd *huffmanDecoder
297 copyLen int
298 copyDist int
301 func (f *decompressor) nextBlock() {
302 for f.nb < 1+2 {
303 if f.err = f.moreBits(); f.err != nil {
304 return
307 f.final = f.b&1 == 1
308 f.b >>= 1
309 typ := f.b & 3
310 f.b >>= 2
311 f.nb -= 1 + 2
312 switch typ {
313 case 0:
314 f.dataBlock()
315 case 1:
316 // compressed, fixed Huffman tables
317 f.hl = &fixedHuffmanDecoder
318 f.hd = nil
319 f.huffmanBlock()
320 case 2:
321 // compressed, dynamic Huffman tables
322 if f.err = f.readHuffman(); f.err != nil {
323 break
325 f.hl = &f.h1
326 f.hd = &f.h2
327 f.huffmanBlock()
328 default:
329 // 3 is reserved.
330 f.err = CorruptInputError(f.roffset)
334 func (f *decompressor) Read(b []byte) (int, error) {
335 for {
336 if len(f.toRead) > 0 {
337 n := copy(b, f.toRead)
338 f.toRead = f.toRead[n:]
339 if len(f.toRead) == 0 {
340 return n, f.err
342 return n, nil
344 if f.err != nil {
345 return 0, f.err
347 f.step(f)
348 if f.err != nil && len(f.toRead) == 0 {
349 f.toRead = f.dict.readFlush() // Flush what's left in case of error
354 func (f *decompressor) Close() error {
355 if f.err == io.EOF {
356 return nil
358 return f.err
361 // RFC 1951 section 3.2.7.
362 // Compression with dynamic Huffman codes
364 var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
366 func (f *decompressor) readHuffman() error {
367 // HLIT[5], HDIST[5], HCLEN[4].
368 for f.nb < 5+5+4 {
369 if err := f.moreBits(); err != nil {
370 return err
373 nlit := int(f.b&0x1F) + 257
374 if nlit > maxNumLit {
375 return CorruptInputError(f.roffset)
377 f.b >>= 5
378 ndist := int(f.b&0x1F) + 1
379 if ndist > maxNumDist {
380 return CorruptInputError(f.roffset)
382 f.b >>= 5
383 nclen := int(f.b&0xF) + 4
384 // numCodes is 19, so nclen is always valid.
385 f.b >>= 4
386 f.nb -= 5 + 5 + 4
388 // (HCLEN+4)*3 bits: code lengths in the magic codeOrder order.
389 for i := 0; i < nclen; i++ {
390 for f.nb < 3 {
391 if err := f.moreBits(); err != nil {
392 return err
395 f.codebits[codeOrder[i]] = int(f.b & 0x7)
396 f.b >>= 3
397 f.nb -= 3
399 for i := nclen; i < len(codeOrder); i++ {
400 f.codebits[codeOrder[i]] = 0
402 if !f.h1.init(f.codebits[0:]) {
403 return CorruptInputError(f.roffset)
406 // HLIT + 257 code lengths, HDIST + 1 code lengths,
407 // using the code length Huffman code.
408 for i, n := 0, nlit+ndist; i < n; {
409 x, err := f.huffSym(&f.h1)
410 if err != nil {
411 return err
413 if x < 16 {
414 // Actual length.
415 f.bits[i] = x
417 continue
419 // Repeat previous length or zero.
420 var rep int
421 var nb uint
422 var b int
423 switch x {
424 default:
425 return InternalError("unexpected length code")
426 case 16:
427 rep = 3
428 nb = 2
429 if i == 0 {
430 return CorruptInputError(f.roffset)
432 b = f.bits[i-1]
433 case 17:
434 rep = 3
435 nb = 3
436 b = 0
437 case 18:
438 rep = 11
439 nb = 7
440 b = 0
442 for f.nb < nb {
443 if err := f.moreBits(); err != nil {
444 return err
447 rep += int(f.b & uint32(1<<nb-1))
448 f.b >>= nb
449 f.nb -= nb
450 if i+rep > n {
451 return CorruptInputError(f.roffset)
453 for j := 0; j < rep; j++ {
454 f.bits[i] = b
459 if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) {
460 return CorruptInputError(f.roffset)
463 // As an optimization, we can initialize the min bits to read at a time
464 // for the HLIT tree to the length of the EOB marker since we know that
465 // every block must terminate with one. This preserves the property that
466 // we never read any extra bytes after the end of the DEFLATE stream.
467 if f.h1.min < f.bits[endBlockMarker] {
468 f.h1.min = f.bits[endBlockMarker]
471 return nil
474 // Decode a single Huffman block from f.
475 // hl and hd are the Huffman states for the lit/length values
476 // and the distance values, respectively. If hd == nil, using the
477 // fixed distance encoding associated with fixed Huffman blocks.
478 func (f *decompressor) huffmanBlock() {
479 const (
480 stateInit = iota // Zero value must be stateInit
481 stateDict
484 switch f.stepState {
485 case stateInit:
486 goto readLiteral
487 case stateDict:
488 goto copyHistory
491 readLiteral:
492 // Read literal and/or (length, distance) according to RFC section 3.2.3.
494 v, err := f.huffSym(f.hl)
495 if err != nil {
496 f.err = err
497 return
499 var n uint // number of bits extra
500 var length int
501 switch {
502 case v < 256:
503 f.dict.writeByte(byte(v))
504 if f.dict.availWrite() == 0 {
505 f.toRead = f.dict.readFlush()
506 f.step = (*decompressor).huffmanBlock
507 f.stepState = stateInit
508 return
510 goto readLiteral
511 case v == 256:
512 f.finishBlock()
513 return
514 // otherwise, reference to older data
515 case v < 265:
516 length = v - (257 - 3)
517 n = 0
518 case v < 269:
519 length = v*2 - (265*2 - 11)
520 n = 1
521 case v < 273:
522 length = v*4 - (269*4 - 19)
523 n = 2
524 case v < 277:
525 length = v*8 - (273*8 - 35)
526 n = 3
527 case v < 281:
528 length = v*16 - (277*16 - 67)
529 n = 4
530 case v < 285:
531 length = v*32 - (281*32 - 131)
532 n = 5
533 case v < maxNumLit:
534 length = 258
535 n = 0
536 default:
537 f.err = CorruptInputError(f.roffset)
538 return
540 if n > 0 {
541 for f.nb < n {
542 if err = f.moreBits(); err != nil {
543 f.err = err
544 return
547 length += int(f.b & uint32(1<<n-1))
548 f.b >>= n
549 f.nb -= n
552 var dist int
553 if f.hd == nil {
554 for f.nb < 5 {
555 if err = f.moreBits(); err != nil {
556 f.err = err
557 return
560 dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3)))
561 f.b >>= 5
562 f.nb -= 5
563 } else {
564 if dist, err = f.huffSym(f.hd); err != nil {
565 f.err = err
566 return
570 switch {
571 case dist < 4:
572 dist++
573 case dist < maxNumDist:
574 nb := uint(dist-2) >> 1
575 // have 1 bit in bottom of dist, need nb more.
576 extra := (dist & 1) << nb
577 for f.nb < nb {
578 if err = f.moreBits(); err != nil {
579 f.err = err
580 return
583 extra |= int(f.b & uint32(1<<nb-1))
584 f.b >>= nb
585 f.nb -= nb
586 dist = 1<<(nb+1) + 1 + extra
587 default:
588 f.err = CorruptInputError(f.roffset)
589 return
592 // No check on length; encoding can be prescient.
593 if dist > f.dict.histSize() {
594 f.err = CorruptInputError(f.roffset)
595 return
598 f.copyLen, f.copyDist = length, dist
599 goto copyHistory
602 copyHistory:
603 // Perform a backwards copy according to RFC section 3.2.3.
605 cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
606 if cnt == 0 {
607 cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
609 f.copyLen -= cnt
611 if f.dict.availWrite() == 0 || f.copyLen > 0 {
612 f.toRead = f.dict.readFlush()
613 f.step = (*decompressor).huffmanBlock // We need to continue this work
614 f.stepState = stateDict
615 return
617 goto readLiteral
621 // Copy a single uncompressed data block from input to output.
622 func (f *decompressor) dataBlock() {
623 // Uncompressed.
624 // Discard current half-byte.
625 f.nb = 0
626 f.b = 0
628 // Length then ones-complement of length.
629 nr, err := io.ReadFull(f.r, f.buf[0:4])
630 f.roffset += int64(nr)
631 if err != nil {
632 f.err = noEOF(err)
633 return
635 n := int(f.buf[0]) | int(f.buf[1])<<8
636 nn := int(f.buf[2]) | int(f.buf[3])<<8
637 if uint16(nn) != uint16(^n) {
638 f.err = CorruptInputError(f.roffset)
639 return
642 if n == 0 {
643 f.toRead = f.dict.readFlush()
644 f.finishBlock()
645 return
648 f.copyLen = n
649 f.copyData()
652 // copyData copies f.copyLen bytes from the underlying reader into f.hist.
653 // It pauses for reads when f.hist is full.
654 func (f *decompressor) copyData() {
655 buf := f.dict.writeSlice()
656 if len(buf) > f.copyLen {
657 buf = buf[:f.copyLen]
660 cnt, err := io.ReadFull(f.r, buf)
661 f.roffset += int64(cnt)
662 f.copyLen -= cnt
663 f.dict.writeMark(cnt)
664 if err != nil {
665 f.err = noEOF(err)
666 return
669 if f.dict.availWrite() == 0 || f.copyLen > 0 {
670 f.toRead = f.dict.readFlush()
671 f.step = (*decompressor).copyData
672 return
674 f.finishBlock()
677 func (f *decompressor) finishBlock() {
678 if f.final {
679 if f.dict.availRead() > 0 {
680 f.toRead = f.dict.readFlush()
682 f.err = io.EOF
684 f.step = (*decompressor).nextBlock
687 // noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF.
688 func noEOF(e error) error {
689 if e == io.EOF {
690 return io.ErrUnexpectedEOF
692 return e
695 func (f *decompressor) moreBits() error {
696 c, err := f.r.ReadByte()
697 if err != nil {
698 return noEOF(err)
700 f.roffset++
701 f.b |= uint32(c) << f.nb
702 f.nb += 8
703 return nil
706 // Read the next Huffman-encoded symbol from f according to h.
707 func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
708 // Since a huffmanDecoder can be empty or be composed of a degenerate tree
709 // with single element, huffSym must error on these two edge cases. In both
710 // cases, the chunks slice will be 0 for the invalid sequence, leading it
711 // satisfy the n == 0 check below.
712 n := uint(h.min)
713 // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
714 // but is smart enough to keep local variables in registers, so use nb and b,
715 // inline call to moreBits and reassign b,nb back to f on return.
716 nb, b := f.nb, f.b
717 for {
718 for nb < n {
719 c, err := f.r.ReadByte()
720 if err != nil {
721 f.b = b
722 f.nb = nb
723 return 0, noEOF(err)
725 f.roffset++
726 b |= uint32(c) << (nb & 31)
727 nb += 8
729 chunk := h.chunks[b&(huffmanNumChunks-1)]
730 n = uint(chunk & huffmanCountMask)
731 if n > huffmanChunkBits {
732 chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask]
733 n = uint(chunk & huffmanCountMask)
735 if n <= nb {
736 if n == 0 {
737 f.b = b
738 f.nb = nb
739 f.err = CorruptInputError(f.roffset)
740 return 0, f.err
742 f.b = b >> (n & 31)
743 f.nb = nb - n
744 return int(chunk >> huffmanValueShift), nil
749 func makeReader(r io.Reader) Reader {
750 if rr, ok := r.(Reader); ok {
751 return rr
753 return bufio.NewReader(r)
756 func fixedHuffmanDecoderInit() {
757 fixedOnce.Do(func() {
758 // These come from the RFC section 3.2.6.
759 var bits [288]int
760 for i := 0; i < 144; i++ {
761 bits[i] = 8
763 for i := 144; i < 256; i++ {
764 bits[i] = 9
766 for i := 256; i < 280; i++ {
767 bits[i] = 7
769 for i := 280; i < 288; i++ {
770 bits[i] = 8
772 fixedHuffmanDecoder.init(bits[:])
776 func (f *decompressor) Reset(r io.Reader, dict []byte) error {
777 *f = decompressor{
778 r: makeReader(r),
779 bits: f.bits,
780 codebits: f.codebits,
781 dict: f.dict,
782 step: (*decompressor).nextBlock,
784 f.dict.init(maxMatchOffset, dict)
785 return nil
788 // NewReader returns a new ReadCloser that can be used
789 // to read the uncompressed version of r.
790 // If r does not also implement io.ByteReader,
791 // the decompressor may read more data than necessary from r.
792 // It is the caller's responsibility to call Close on the ReadCloser
793 // when finished reading.
795 // The ReadCloser returned by NewReader also implements Resetter.
796 func NewReader(r io.Reader) io.ReadCloser {
797 fixedHuffmanDecoderInit()
799 var f decompressor
800 f.r = makeReader(r)
801 f.bits = new([maxNumLit + maxNumDist]int)
802 f.codebits = new([numCodes]int)
803 f.step = (*decompressor).nextBlock
804 f.dict.init(maxMatchOffset, nil)
805 return &f
808 // NewReaderDict is like NewReader but initializes the reader
809 // with a preset dictionary. The returned Reader behaves as if
810 // the uncompressed data stream started with the given dictionary,
811 // which has already been read. NewReaderDict is typically used
812 // to read data compressed by NewWriterDict.
814 // The ReadCloser returned by NewReader also implements Resetter.
815 func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
816 fixedHuffmanDecoderInit()
818 var f decompressor
819 f.r = makeReader(r)
820 f.bits = new([maxNumLit + maxNumDist]int)
821 f.codebits = new([numCodes]int)
822 f.step = (*decompressor).nextBlock
823 f.dict.init(maxMatchOffset, dict)
824 return &f