1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
13 // hcode is a huffman code with a bit code and bit length.
18 type huffmanEncoder
struct {
20 freqcache
[]literalNode
22 lns byLiteral
// stored to avoid repeated allocation in generate
23 lfs byFreq
// stored to avoid repeated allocation in generate
26 type literalNode
struct {
31 // A levelInfo describes the state of the constructed tree for a given depth.
32 type levelInfo
struct {
33 // Our level. for better printing
36 // The frequency of the last node at this level
39 // The frequency of the next character to add to this level
42 // The frequency of the next pair (from level below) to add to this level.
43 // Only valid if the "needed" value of the next lower level is 0.
46 // The number of chains remaining to generate for this level before moving
47 // up to the next level
51 // set sets the code and length of an hcode.
52 func (h
*hcode
) set(code
uint16, length
uint16) {
57 func maxNode() literalNode
{ return literalNode
{math
.MaxUint16
, math
.MaxInt32
} }
59 func newHuffmanEncoder(size
int) *huffmanEncoder
{
60 return &huffmanEncoder
{codes
: make([]hcode
, size
)}
63 // Generates a HuffmanCode corresponding to the fixed literal table
64 func generateFixedLiteralEncoding() *huffmanEncoder
{
65 h
:= newHuffmanEncoder(maxNumLit
)
68 for ch
= 0; ch
< maxNumLit
; ch
++ {
73 // size 8, 000110000 .. 10111111
78 // size 9, 110010000 .. 111111111
83 // size 7, 0000000 .. 0010111
88 // size 8, 11000000 .. 11000111
92 codes
[ch
] = hcode
{code
: reverseBits(bits
, byte(size
)), len: size
}
97 func generateFixedOffsetEncoding() *huffmanEncoder
{
98 h
:= newHuffmanEncoder(30)
100 for ch
:= range codes
{
101 codes
[ch
] = hcode
{code
: reverseBits(uint16(ch
), 5), len: 5}
106 var fixedLiteralEncoding
*huffmanEncoder
= generateFixedLiteralEncoding()
107 var fixedOffsetEncoding
*huffmanEncoder
= generateFixedOffsetEncoding()
109 func (h
*huffmanEncoder
) bitLength(freq
[]int32) int {
111 for i
, f
:= range freq
{
113 total
+= int(f
) * int(h
.codes
[i
].len)
119 const maxBitsLimit
= 16
121 // Return the number of literals assigned to each bit size in the Huffman encoding
123 // This method is only called when list.length >= 3
124 // The cases of 0, 1, and 2 literals are handled by special case code.
126 // list An array of the literals with non-zero frequencies
127 // and their associated frequencies. The array is in order of increasing
128 // frequency, and has as its last element a special element with frequency
130 // maxBits The maximum number of bits that should be used to encode any literal.
131 // Must be less than 16.
132 // return An integer array in which array[i] indicates the number of literals
133 // that should be encoded in i bits.
134 func (h
*huffmanEncoder
) bitCounts(list
[]literalNode
, maxBits
int32) []int32 {
135 if maxBits
>= maxBitsLimit
{
136 panic("flate: maxBits too large")
138 n
:= int32(len(list
))
142 // The tree can't have greater depth than n - 1, no matter what. This
143 // saves a little bit of work in some small cases
148 // Create information about each of the levels.
149 // A bogus "Level 0" whose sole purpose is so that
150 // level1.prev.needed==0. This makes level1.nextPairFreq
151 // be a legitimate value that never gets chosen.
152 var levels
[maxBitsLimit
]levelInfo
153 // leafCounts[i] counts the number of literals at the left
154 // of ancestors of the rightmost node at level i.
155 // leafCounts[i][j] is the number of literals at the left
156 // of the level j ancestor.
157 var leafCounts
[maxBitsLimit
][maxBitsLimit
]int32
159 for level
:= int32(1); level
<= maxBits
; level
++ {
160 // For every level, the first two items are the first two characters.
161 // We initialize the levels as if we had already figured this out.
162 levels
[level
] = levelInfo
{
164 lastFreq
: list
[1].freq
,
165 nextCharFreq
: list
[2].freq
,
166 nextPairFreq
: list
[0].freq
+ list
[1].freq
,
168 leafCounts
[level
][level
] = 2
170 levels
[level
].nextPairFreq
= math
.MaxInt32
174 // We need a total of 2*n - 2 items at top level and have already generated 2.
175 levels
[maxBits
].needed
= 2*n
- 4
180 if l
.nextPairFreq
== math
.MaxInt32
&& l
.nextCharFreq
== math
.MaxInt32
{
181 // We've run out of both leafs and pairs.
182 // End all calculations for this level.
183 // To make sure we never come back to this level or any lower level,
184 // set nextPairFreq impossibly large.
186 levels
[level
+1].nextPairFreq
= math
.MaxInt32
191 prevFreq
:= l
.lastFreq
192 if l
.nextCharFreq
< l
.nextPairFreq
{
193 // The next item on this row is a leaf node.
194 n
:= leafCounts
[level
][level
] + 1
195 l
.lastFreq
= l
.nextCharFreq
196 // Lower leafCounts are the same of the previous node.
197 leafCounts
[level
][level
] = n
198 l
.nextCharFreq
= list
[n
].freq
200 // The next item on this row is a pair from the previous row.
201 // nextPairFreq isn't valid until we generate two
202 // more values in the level below
203 l
.lastFreq
= l
.nextPairFreq
204 // Take leaf counts from the lower level, except counts[level] remains the same.
205 copy(leafCounts
[level
][:level
], leafCounts
[level
-1][:level
])
206 levels
[l
.level
-1].needed
= 2
209 if l
.needed
--; l
.needed
== 0 {
210 // We've done everything we need to do for this level.
211 // Continue calculating one level up. Fill in nextPairFreq
212 // of that level with the sum of the two nodes we've just calculated on
214 if l
.level
== maxBits
{
218 levels
[l
.level
+1].nextPairFreq
= prevFreq
+ l
.lastFreq
221 // If we stole from below, move down temporarily to replenish it.
222 for levels
[level
-1].needed
> 0 {
228 // Somethings is wrong if at the end, the top level is null or hasn't used
229 // all of the leaves.
230 if leafCounts
[maxBits
][maxBits
] != n
{
231 panic("leafCounts[maxBits][maxBits] != n")
234 bitCount
:= h
.bitCount
[:maxBits
+1]
236 counts
:= &leafCounts
[maxBits
]
237 for level
:= maxBits
; level
> 0; level
-- {
238 // chain.leafCount gives the number of literals requiring at least "bits"
240 bitCount
[bits
] = counts
[level
] - counts
[level
-1]
246 // Look at the leaves and assign them a bit count and an encoding as specified
248 func (h
*huffmanEncoder
) assignEncodingAndSize(bitCount
[]int32, list
[]literalNode
) {
250 for n
, bits
:= range bitCount
{
252 if n
== 0 || bits
== 0 {
255 // The literals list[len(list)-bits] .. list[len(list)-bits]
256 // are encoded using "bits" bits, and get the values
257 // code, code + 1, .... The code values are
258 // assigned in literal order (not frequency order).
259 chunk
:= list
[len(list
)-int(bits
):]
262 for _
, node
:= range chunk
{
263 h
.codes
[node
.literal
] = hcode
{code
: reverseBits(code
, uint8(n
)), len: uint16(n
)}
266 list
= list
[0 : len(list
)-int(bits
)]
270 // Update this Huffman Code object to be the minimum code for the specified frequency count.
272 // freq An array of frequencies, in which frequency[i] gives the frequency of literal i.
273 // maxBits The maximum number of bits to use for any literal.
274 func (h
*huffmanEncoder
) generate(freq
[]int32, maxBits
int32) {
275 if h
.freqcache
== nil {
276 // Allocate a reusable buffer with the longest possible frequency table.
277 // Possible lengths are codegenCodeCount, offsetCodeCount and maxNumLit.
278 // The largest of these is maxNumLit, so we allocate for that case.
279 h
.freqcache
= make([]literalNode
, maxNumLit
+1)
281 list
:= h
.freqcache
[:len(freq
)+1]
282 // Number of non-zero literals
284 // Set list to be the set of all non-zero literals and their frequencies
285 for i
, f
:= range freq
{
287 list
[count
] = literalNode
{uint16(i
), f
}
290 list
[count
] = literalNode
{}
294 list
[len(freq
)] = literalNode
{}
298 // Handle the small cases here, because they are awkward for the general case code. With
299 // two or fewer literals, everything has bit length 1.
300 for i
, node
:= range list
{
301 // "list" is in order of increasing literal value.
302 h
.codes
[node
.literal
].set(uint16(i
), 1)
308 // Get the number of literals for each bit count
309 bitCount
:= h
.bitCounts(list
, maxBits
)
310 // And do the assignment
311 h
.assignEncodingAndSize(bitCount
, list
)
314 type byLiteral
[]literalNode
316 func (s
*byLiteral
) sort(a
[]literalNode
) {
321 func (s byLiteral
) Len() int { return len(s
) }
323 func (s byLiteral
) Less(i
, j
int) bool {
324 return s
[i
].literal
< s
[j
].literal
327 func (s byLiteral
) Swap(i
, j
int) { s
[i
], s
[j
] = s
[j
], s
[i
] }
329 type byFreq
[]literalNode
331 func (s
*byFreq
) sort(a
[]literalNode
) {
336 func (s byFreq
) Len() int { return len(s
) }
338 func (s byFreq
) Less(i
, j
int) bool {
339 if s
[i
].freq
== s
[j
].freq
{
340 return s
[i
].literal
< s
[j
].literal
342 return s
[i
].freq
< s
[j
].freq
345 func (s byFreq
) Swap(i
, j
int) { s
[i
], s
[j
] = s
[j
], s
[i
] }
347 func reverseBits(number
uint16, bitLength
byte) uint16 {
348 return bits
.Reverse16(number
<< (16 - bitLength
))