1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
15 // The largest offset code.
18 // The largest offset code in the extensions.
19 extendedOffsetCodeCount
= 42
21 // The special code used to mark the end of a block.
24 // The first length code.
25 lengthCodesStart
= 257
27 // The number of codegen codes.
32 // The number of extra bits needed by length code X - LENGTH_CODES_START.
33 var lengthExtraBits
= []int8{
35 /* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2,
36 /* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4,
37 /* 280 */ 4, 5, 5, 5, 5, 0,
40 // The length indicated by length code X - LENGTH_CODES_START.
41 var lengthBase
= []uint32{
42 0, 1, 2, 3, 4, 5, 6, 7, 8, 10,
43 12, 14, 16, 20, 24, 28, 32, 40, 48, 56,
44 64, 80, 96, 112, 128, 160, 192, 224, 255,
47 // offset code word extra bits.
48 var offsetExtraBits
= []int8{
49 0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
50 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
51 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
53 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20,
56 var offsetBase
= []uint32{
58 0x000000, 0x000001, 0x000002, 0x000003, 0x000004,
59 0x000006, 0x000008, 0x00000c, 0x000010, 0x000018,
60 0x000020, 0x000030, 0x000040, 0x000060, 0x000080,
61 0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300,
62 0x000400, 0x000600, 0x000800, 0x000c00, 0x001000,
63 0x001800, 0x002000, 0x003000, 0x004000, 0x006000,
66 0x008000, 0x00c000, 0x010000, 0x018000, 0x020000,
67 0x030000, 0x040000, 0x060000, 0x080000, 0x0c0000,
68 0x100000, 0x180000, 0x200000, 0x300000,
71 // The odd order in which the codegen code sizes are written.
72 var codegenOrder
= []uint32{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
74 type huffmanBitWriter
struct {
76 // Data waiting to be written is bytes[0:nbytes]
77 // and then the low nbits of bits.
86 literalEncoding
*huffmanEncoder
87 offsetEncoding
*huffmanEncoder
88 codegenEncoding
*huffmanEncoder
92 type WrongValueError
struct {
99 func newHuffmanBitWriter(w io
.Writer
) *huffmanBitWriter
{
100 return &huffmanBitWriter
{
102 literalFreq
: make([]int32, maxLit
),
103 offsetFreq
: make([]int32, extendedOffsetCodeCount
),
104 codegen
: make([]uint8, maxLit
+extendedOffsetCodeCount
+1),
105 codegenFreq
: make([]int32, codegenCodeCount
),
106 literalEncoding
: newHuffmanEncoder(maxLit
),
107 offsetEncoding
: newHuffmanEncoder(extendedOffsetCodeCount
),
108 codegenEncoding
: newHuffmanEncoder(codegenCodeCount
),
112 func (err WrongValueError
) String() string {
113 return "huffmanBitWriter: " + err
.name
+ " should belong to [" + strconv
.Itoa64(int64(err
.from
)) + ";" +
114 strconv
.Itoa64(int64(err
.to
)) + "] but actual value is " + strconv
.Itoa64(int64(err
.value
))
117 func (w
*huffmanBitWriter
) flushBits() {
126 w
.bytes
[n
] = byte(bits
)
127 w
.bytes
[n
+1] = byte(bits
>> 8)
128 if n
+= 2; n
>= len(w
.bytes
) {
129 _
, w
.err
= w
.w
.Write(w
.bytes
[0:])
135 func (w
*huffmanBitWriter
) flush() {
142 w
.bytes
[n
] = byte(w
.bits
)
148 w
.bytes
[n
] = byte(w
.bits
)
153 _
, w
.err
= w
.w
.Write(w
.bytes
[0:n
])
157 func (w
*huffmanBitWriter
) writeBits(b
, nb
int32) {
158 w
.bits |
= uint32(b
) << w
.nbits
159 if w
.nbits
+= uint32(nb
); w
.nbits
>= 16 {
164 func (w
*huffmanBitWriter
) writeBytes(bytes
[]byte) {
170 w
.bytes
[n
] = byte(w
.bits
)
175 w
.err
= InternalError("writeBytes with unfinished bits")
179 _
, w
.err
= w
.w
.Write(w
.bytes
[0:n
])
185 _
, w
.err
= w
.w
.Write(bytes
)
188 // RFC 1951 3.2.7 specifies a special run-length encoding for specifiying
189 // the literal and offset lengths arrays (which are concatenated into a single
190 // array). This method generates that run-length encoding.
192 // The result is written into the codegen array, and the frequencies
193 // of each code is written into the codegenFreq array.
194 // Codes 0-15 are single byte codes. Codes 16-18 are followed by additional
195 // information. Code badCode is an end marker
197 // numLiterals The number of literals in literalEncoding
198 // numOffsets The number of offsets in offsetEncoding
199 func (w
*huffmanBitWriter
) generateCodegen(numLiterals
int, numOffsets
int) {
200 fillInt32s(w
.codegenFreq
, 0)
201 // Note that we are using codegen both as a temporary variable for holding
202 // a copy of the frequencies, and as the place where we put the result.
203 // This is fine because the output is always shorter than the input used
205 codegen
:= w
.codegen
// cache
206 // Copy the concatenated code sizes to codegen. Put a marker at the end.
207 copyUint8s(codegen
[0:numLiterals
], w
.literalEncoding
.codeBits
)
208 copyUint8s(codegen
[numLiterals
:numLiterals
+numOffsets
], w
.offsetEncoding
.codeBits
)
209 codegen
[numLiterals
+numOffsets
] = badCode
214 for inIndex
:= 1; size
!= badCode
; inIndex
++ {
215 // INVARIANT: We have seen "count" copies of size that have not yet
216 // had output generated for them.
217 nextSize
:= codegen
[inIndex
]
218 if nextSize
== size
{
222 // We need to generate codegen indicating "count" of size.
224 codegen
[outIndex
] = size
226 w
.codegenFreq
[size
]++
230 codegen
[outIndex
] = 16
232 codegen
[outIndex
] = uint8(n
- 3)
240 codegen
[outIndex
] = 18
242 codegen
[outIndex
] = uint8(n
- 11)
248 // count >= 3 && count <= 10
249 codegen
[outIndex
] = 17
251 codegen
[outIndex
] = uint8(count
- 3)
258 for ; count
>= 0; count
-- {
259 codegen
[outIndex
] = size
261 w
.codegenFreq
[size
]++
263 // Set up invariant for next time through the loop.
267 // Marker indicating the end of the codegen.
268 codegen
[outIndex
] = badCode
271 func (w
*huffmanBitWriter
) writeCode(code
*huffmanEncoder
, literal
uint32) {
275 w
.writeBits(int32(code
.code
[literal
]), int32(code
.codeBits
[literal
]))
278 // Write the header of a dynamic Huffman block to the output stream.
280 // numLiterals The number of literals specified in codegen
281 // numOffsets The number of offsets specified in codegen
282 // numCodegens Tne number of codegens used in codegen
283 func (w
*huffmanBitWriter
) writeDynamicHeader(numLiterals
int, numOffsets
int, numCodegens
int, isEof
bool) {
287 var firstBits
int32 = 4
291 w
.writeBits(firstBits
, 3)
292 w
.writeBits(int32(numLiterals
-257), 5)
293 if numOffsets
> offsetCodeCount
{
294 // Extended version of decompressor
295 w
.writeBits(int32(offsetCodeCount
+((numOffsets
-(1+offsetCodeCount
))>>3)), 5)
296 w
.writeBits(int32((numOffsets
-(1+offsetCodeCount
))&0x7), 3)
298 w
.writeBits(int32(numOffsets
-1), 5)
300 w
.writeBits(int32(numCodegens
-4), 4)
302 for i
:= 0; i
< numCodegens
; i
++ {
303 value
:= w
.codegenEncoding
.codeBits
[codegenOrder
[i
]]
304 w
.writeBits(int32(value
), 3)
309 var codeWord
int = int(w
.codegen
[i
])
311 if codeWord
== badCode
{
314 // The low byte contains the actual code to generate.
315 w
.writeCode(w
.codegenEncoding
, uint32(codeWord
))
319 w
.writeBits(int32(w
.codegen
[i
]), 2)
323 w
.writeBits(int32(w
.codegen
[i
]), 3)
327 w
.writeBits(int32(w
.codegen
[i
]), 7)
334 func (w
*huffmanBitWriter
) writeStoredHeader(length
int, isEof
bool) {
344 w
.writeBits(int32(length
), 16)
345 w
.writeBits(int32(^uint16(length
)), 16)
348 func (w
*huffmanBitWriter
) writeFixedHeader(isEof
bool) {
352 // Indicate that we are a fixed Huffman block
357 w
.writeBits(value
, 3)
360 func (w
*huffmanBitWriter
) writeBlock(tokens
[]token
, eof
bool, input
[]byte) {
364 fillInt32s(w
.literalFreq
, 0)
365 fillInt32s(w
.offsetFreq
, 0)
368 tokens
= tokens
[0 : n
+1]
369 tokens
[n
] = endBlockMarker
371 totalLength
:= -1 // Subtract 1 for endBlock.
372 for _
, t
:= range tokens
{
375 w
.literalFreq
[t
.literal()]++
381 totalLength
+= int(length
+ 3)
382 w
.literalFreq
[lengthCodesStart
+lengthCode(length
)]++
383 w
.offsetFreq
[offsetCode(offset
)]++
387 w
.literalEncoding
.generate(w
.literalFreq
, 15)
388 w
.offsetEncoding
.generate(w
.offsetFreq
, 15)
390 // get the number of literals
391 numLiterals
:= len(w
.literalFreq
)
392 for w
.literalFreq
[numLiterals
-1] == 0 {
395 // get the number of offsets
396 numOffsets
:= len(w
.offsetFreq
)
397 for numOffsets
> 1 && w
.offsetFreq
[numOffsets
-1] == 0 {
402 storedBytes
= len(input
)
406 if storedBytes
<= maxStoreBlockSize
&& input
!= nil {
407 storedSize
= int64((storedBytes
+ 5) * 8)
408 // We only bother calculating the costs of the extra bits required by
409 // the length of offset fields (which will be the same for both fixed
410 // and dynamic encoding), if we need to compare those two encodings
411 // against stored encoding.
412 for lengthCode
:= lengthCodesStart
+ 8; lengthCode
< numLiterals
; lengthCode
++ {
413 // First eight length codes have extra size = 0.
414 extraBits
+= int64(w
.literalFreq
[lengthCode
]) * int64(lengthExtraBits
[lengthCode
-lengthCodesStart
])
416 for offsetCode
:= 4; offsetCode
< numOffsets
; offsetCode
++ {
417 // First four offset codes have extra size = 0.
418 extraBits
+= int64(w
.offsetFreq
[offsetCode
]) * int64(offsetExtraBits
[offsetCode
])
421 storedSize
= math
.MaxInt32
424 // Figure out which generates smaller code, fixed Huffman, dynamic
425 // Huffman, or just storing the data.
426 var fixedSize
int64 = math
.MaxInt64
427 if numOffsets
<= offsetCodeCount
{
428 fixedSize
= int64(3) +
429 fixedLiteralEncoding
.bitLength(w
.literalFreq
) +
430 fixedOffsetEncoding
.bitLength(w
.offsetFreq
) +
433 // Generate codegen and codegenFrequencies, which indicates how to encode
434 // the literalEncoding and the offsetEncoding.
435 w
.generateCodegen(numLiterals
, numOffsets
)
436 w
.codegenEncoding
.generate(w
.codegenFreq
, 7)
437 numCodegens
:= len(w
.codegenFreq
)
438 for numCodegens
> 4 && w
.codegenFreq
[codegenOrder
[numCodegens
-1]] == 0 {
441 extensionSummand
:= 0
442 if numOffsets
> offsetCodeCount
{
445 dynamicHeader
:= int64(3+5+5+4+(3*numCodegens
)) +
446 // Following line is an extension.
447 int64(extensionSummand
) +
448 w
.codegenEncoding
.bitLength(w
.codegenFreq
) +
450 int64(w
.codegenFreq
[16]*2) +
451 int64(w
.codegenFreq
[17]*3) +
452 int64(w
.codegenFreq
[18]*7)
453 dynamicSize
:= dynamicHeader
+
454 w
.literalEncoding
.bitLength(w
.literalFreq
) +
455 w
.offsetEncoding
.bitLength(w
.offsetFreq
)
457 if storedSize
< fixedSize
&& storedSize
< dynamicSize
{
458 w
.writeStoredHeader(storedBytes
, eof
)
459 w
.writeBytes(input
[0:storedBytes
])
462 var literalEncoding
*huffmanEncoder
463 var offsetEncoding
*huffmanEncoder
465 if fixedSize
<= dynamicSize
{
466 w
.writeFixedHeader(eof
)
467 literalEncoding
= fixedLiteralEncoding
468 offsetEncoding
= fixedOffsetEncoding
471 w
.writeDynamicHeader(numLiterals
, numOffsets
, numCodegens
, eof
)
472 literalEncoding
= w
.literalEncoding
473 offsetEncoding
= w
.offsetEncoding
477 for _
, t
:= range tokens
{
480 w
.writeCode(literalEncoding
, t
.literal())
485 lengthCode
:= lengthCode(length
)
486 w
.writeCode(literalEncoding
, lengthCode
+lengthCodesStart
)
487 extraLengthBits
:= int32(lengthExtraBits
[lengthCode
])
488 if extraLengthBits
> 0 {
489 extraLength
:= int32(length
- lengthBase
[lengthCode
])
490 w
.writeBits(extraLength
, extraLengthBits
)
494 offsetCode
:= offsetCode(offset
)
495 w
.writeCode(offsetEncoding
, offsetCode
)
496 extraOffsetBits
:= int32(offsetExtraBits
[offsetCode
])
497 if extraOffsetBits
> 0 {
498 extraOffset
:= int32(offset
- offsetBase
[offsetCode
])
499 w
.writeBits(extraOffset
, extraOffsetBits
)
503 panic("unknown token type: " + string(t
))