libgo/go/compress/flate/deflate.go

   1 // Copyright 2009 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package flate
   6
   7 import (
   8         "io"
   9         "math"
  10         "os"
  11 )
  12
  13 const (
  14         NoCompression        = 0
  15         BestSpeed            = 1
  16         fastCompression      = 3
  17         BestCompression      = 9
  18         DefaultCompression   = -1
  19         logMaxOffsetSize     = 15  // Standard DEFLATE
  20         wideLogMaxOffsetSize = 22  // Wide DEFLATE
  21         minMatchLength       = 3   // The smallest match that the compressor looks for
  22         maxMatchLength       = 258 // The longest match for the compressor
  23         minOffsetSize        = 1   // The shortest offset that makes any sence
  24
  25         // The maximum number of tokens we put into a single flat block, just too
  26         // stop things from getting too large.
  27         maxFlateBlockTokens = 1 << 14
  28         maxStoreBlockSize   = 65535
  29         hashBits            = 15
  30         hashSize            = 1 << hashBits
  31         hashMask            = (1 << hashBits) - 1
  32         hashShift           = (hashBits + minMatchLength - 1) / minMatchLength
  33 )
  34
  35 type syncPipeReader struct {
  36         *io.PipeReader
  37         closeChan chan bool
  38 }
  39
  40 func (sr *syncPipeReader) CloseWithError(err os.Error) os.Error {
  41         retErr := sr.PipeReader.CloseWithError(err)
  42         sr.closeChan <- true // finish writer close
  43         return retErr
  44 }
  45
  46 type syncPipeWriter struct {
  47         *io.PipeWriter
  48         closeChan chan bool
  49 }
  50
  51 type compressionLevel struct {
  52         good, lazy, nice, chain, fastSkipHashing int
  53 }
  54
  55 var levels = []compressionLevel{
  56         {}, // 0
  57         // For levels 1-3 we don't bother trying with lazy matches
  58         {3, 0, 8, 4, 4},
  59         {3, 0, 16, 8, 5},
  60         {3, 0, 32, 32, 6},
  61         // Levels 4-9 use increasingly more lazy matching
  62         // and increasingly stringent conditions for "good enough".
  63         {4, 4, 16, 16, math.MaxInt32},
  64         {8, 16, 32, 32, math.MaxInt32},
  65         {8, 16, 128, 128, math.MaxInt32},
  66         {8, 32, 128, 256, math.MaxInt32},
  67         {32, 128, 258, 1024, math.MaxInt32},
  68         {32, 258, 258, 4096, math.MaxInt32},
  69 }
  70
  71 func (sw *syncPipeWriter) Close() os.Error {
  72         err := sw.PipeWriter.Close()
  73         <-sw.closeChan // wait for reader close
  74         return err
  75 }
  76
  77 func syncPipe() (*syncPipeReader, *syncPipeWriter) {
  78         r, w := io.Pipe()
  79         sr := &syncPipeReader{r, make(chan bool, 1)}
  80         sw := &syncPipeWriter{w, sr.closeChan}
  81         return sr, sw
  82 }
  83
  84 type compressor struct {
  85         level         int
  86         logWindowSize uint
  87         w             *huffmanBitWriter
  88         r             io.Reader
  89         // (1 << logWindowSize) - 1.
  90         windowMask int
  91
  92         // hashHead[hashValue] contains the largest inputIndex with the specified hash value
  93         hashHead []int
  94
  95         // If hashHead[hashValue] is within the current window, then
  96         // hashPrev[hashHead[hashValue] & windowMask] contains the previous index
  97         // with the same hash value.
  98         hashPrev []int
  99
 100         // If we find a match of length >= niceMatch, then we don't bother searching
 101         // any further.
 102         niceMatch int
 103
 104         // If we find a match of length >= goodMatch, we only do a half-hearted
 105         // effort at doing lazy matching starting at the next character
 106         goodMatch int
 107
 108         // The maximum number of chains we look at when finding a match
 109         maxChainLength int
 110
 111         // The sliding window we use for matching
 112         window []byte
 113
 114         // The index just past the last valid character
 115         windowEnd int
 116
 117         // index in "window" at which current block starts
 118         blockStart int
 119 }
 120
 121 func (d *compressor) flush() os.Error {
 122         d.w.flush()
 123         return d.w.err
 124 }
 125
 126 func (d *compressor) fillWindow(index int) (int, os.Error) {
 127         wSize := d.windowMask + 1
 128         if index >= wSize+wSize-(minMatchLength+maxMatchLength) {
 129                 // shift the window by wSize
 130                 copy(d.window, d.window[wSize:2*wSize])
 131                 index -= wSize
 132                 d.windowEnd -= wSize
 133                 if d.blockStart >= wSize {
 134                         d.blockStart -= wSize
 135                 } else {
 136                         d.blockStart = math.MaxInt32
 137                 }
 138                 for i, h := range d.hashHead {
 139                         d.hashHead[i] = max(h-wSize, -1)
 140                 }
 141                 for i, h := range d.hashPrev {
 142                         d.hashPrev[i] = max(h-wSize, -1)
 143                 }
 144         }
 145         var count int
 146         var err os.Error
 147         count, err = io.ReadAtLeast(d.r, d.window[d.windowEnd:], 1)
 148         d.windowEnd += count
 149         if err == os.EOF {
 150                 return index, nil
 151         }
 152         return index, err
 153 }
 154
 155 func (d *compressor) writeBlock(tokens []token, index int, eof bool) os.Error {
 156         if index > 0 || eof {
 157                 var window []byte
 158                 if d.blockStart <= index {
 159                         window = d.window[d.blockStart:index]
 160                 }
 161                 d.blockStart = index
 162                 d.w.writeBlock(tokens, eof, window)
 163                 return d.w.err
 164         }
 165         return nil
 166 }
 167
 168 // Try to find a match starting at index whose length is greater than prevSize.
 169 // We only look at chainCount possibilities before giving up.
 170 func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead int) (length, offset int, ok bool) {
 171         win := d.window[0 : pos+min(maxMatchLength, lookahead)]
 172
 173         // We quit when we get a match that's at least nice long
 174         nice := min(d.niceMatch, len(win)-pos)
 175
 176         // If we've got a match that's good enough, only look in 1/4 the chain.
 177         tries := d.maxChainLength
 178         length = prevLength
 179         if length >= d.goodMatch {
 180                 tries >>= 2
 181         }
 182
 183         w0 := win[pos]
 184         w1 := win[pos+1]
 185         wEnd := win[pos+length]
 186         minIndex := pos - (d.windowMask + 1)
 187
 188         for i := prevHead; tries > 0; tries-- {
 189                 if w0 == win[i] && w1 == win[i+1] && wEnd == win[i+length] {
 190                         // The hash function ensures that if win[i] and win[i+1] match, win[i+2] matches
 191
 192                         n := 3
 193                         for pos+n < len(win) && win[i+n] == win[pos+n] {
 194                                 n++
 195                         }
 196                         if n > length && (n > 3 || pos-i <= 4096) {
 197                                 length = n
 198                                 offset = pos - i
 199                                 ok = true
 200                                 if n >= nice {
 201                                         // The match is good enough that we don't try to find a better one.
 202                                         break
 203                                 }
 204                                 wEnd = win[pos+n]
 205                         }
 206                 }
 207                 if i == minIndex {
 208                         // hashPrev[i & windowMask] has already been overwritten, so stop now.
 209                         break
 210                 }
 211                 if i = d.hashPrev[i&d.windowMask]; i < minIndex || i < 0 {
 212                         break
 213                 }
 214         }
 215         return
 216 }
 217
 218 func (d *compressor) writeStoredBlock(buf []byte) os.Error {
 219         if d.w.writeStoredHeader(len(buf), false); d.w.err != nil {
 220                 return d.w.err
 221         }
 222         d.w.writeBytes(buf)
 223         return d.w.err
 224 }
 225
 226 func (d *compressor) storedDeflate() os.Error {
 227         buf := make([]byte, maxStoreBlockSize)
 228         for {
 229                 n, err := d.r.Read(buf)
 230                 if n > 0 {
 231                         if err := d.writeStoredBlock(buf[0:n]); err != nil {
 232                                 return err
 233                         }
 234                 }
 235                 if err != nil {
 236                         if err == os.EOF {
 237                                 break
 238                         }
 239                         return err
 240                 }
 241         }
 242         return nil
 243 }
 244
 245 func (d *compressor) doDeflate() (err os.Error) {
 246         // init
 247         d.windowMask = 1<<d.logWindowSize - 1
 248         d.hashHead = make([]int, hashSize)
 249         d.hashPrev = make([]int, 1<<d.logWindowSize)
 250         d.window = make([]byte, 2<<d.logWindowSize)
 251         fillInts(d.hashHead, -1)
 252         tokens := make([]token, maxFlateBlockTokens, maxFlateBlockTokens+1)
 253         l := levels[d.level]
 254         d.goodMatch = l.good
 255         d.niceMatch = l.nice
 256         d.maxChainLength = l.chain
 257         lazyMatch := l.lazy
 258         length := minMatchLength - 1
 259         offset := 0
 260         byteAvailable := false
 261         isFastDeflate := l.fastSkipHashing != 0
 262         index := 0
 263         // run
 264         if index, err = d.fillWindow(index); err != nil {
 265                 return
 266         }
 267         maxOffset := d.windowMask + 1 // (1 << logWindowSize);
 268         // only need to change when you refill the window
 269         windowEnd := d.windowEnd
 270         maxInsertIndex := windowEnd - (minMatchLength - 1)
 271         ti := 0
 272
 273         hash := int(0)
 274         if index < maxInsertIndex {
 275                 hash = int(d.window[index])<<hashShift + int(d.window[index+1])
 276         }
 277         chainHead := -1
 278         for {
 279                 if index > windowEnd {
 280                         panic("index > windowEnd")
 281                 }
 282                 lookahead := windowEnd - index
 283                 if lookahead < minMatchLength+maxMatchLength {
 284                         if index, err = d.fillWindow(index); err != nil {
 285                                 return
 286                         }
 287                         windowEnd = d.windowEnd
 288                         if index > windowEnd {
 289                                 panic("index > windowEnd")
 290                         }
 291                         maxInsertIndex = windowEnd - (minMatchLength - 1)
 292                         lookahead = windowEnd - index
 293                         if lookahead == 0 {
 294                                 break
 295                         }
 296                 }
 297                 if index < maxInsertIndex {
 298                         // Update the hash
 299                         hash = (hash<<hashShift + int(d.window[index+2])) & hashMask
 300                         chainHead = d.hashHead[hash]
 301                         d.hashPrev[index&d.windowMask] = chainHead
 302                         d.hashHead[hash] = index
 303                 }
 304                 prevLength := length
 305                 prevOffset := offset
 306                 minIndex := max(index-maxOffset, 0)
 307                 length = minMatchLength - 1
 308                 offset = 0
 309
 310                 if chainHead >= minIndex &&
 311                         (isFastDeflate && lookahead > minMatchLength-1 ||
 312                                 !isFastDeflate && lookahead > prevLength && prevLength < lazyMatch) {
 313                         if newLength, newOffset, ok := d.findMatch(index, chainHead, minMatchLength-1, lookahead); ok {
 314                                 length = newLength
 315                                 offset = newOffset
 316                         }
 317                 }
 318                 if isFastDeflate && length >= minMatchLength ||
 319                         !isFastDeflate && prevLength >= minMatchLength && length <= prevLength {
 320                         // There was a match at the previous step, and the current match is
 321                         // not better. Output the previous match.
 322                         if isFastDeflate {
 323                                 tokens[ti] = matchToken(uint32(length-minMatchLength), uint32(offset-minOffsetSize))
 324                         } else {
 325                                 tokens[ti] = matchToken(uint32(prevLength-minMatchLength), uint32(prevOffset-minOffsetSize))
 326                         }
 327                         ti++
 328                         // Insert in the hash table all strings up to the end of the match.
 329                         // index and index-1 are already inserted. If there is not enough
 330                         // lookahead, the last two strings are not inserted into the hash
 331                         // table.
 332                         if length <= l.fastSkipHashing {
 333                                 var newIndex int
 334                                 if isFastDeflate {
 335                                         newIndex = index + length
 336                                 } else {
 337                                         newIndex = prevLength - 1
 338                                 }
 339                                 for index++; index < newIndex; index++ {
 340                                         if index < maxInsertIndex {
 341                                                 hash = (hash<<hashShift + int(d.window[index+2])) & hashMask
 342                                                 // Get previous value with the same hash.
 343                                                 // Our chain should point to the previous value.
 344                                                 d.hashPrev[index&d.windowMask] = d.hashHead[hash]
 345                                                 // Set the head of the hash chain to us.
 346                                                 d.hashHead[hash] = index
 347                                         }
 348                                 }
 349                                 if !isFastDeflate {
 350                                         byteAvailable = false
 351                                         length = minMatchLength - 1
 352                                 }
 353                         } else {
 354                                 // For matches this long, we don't bother inserting each individual
 355                                 // item into the table.
 356                                 index += length
 357                                 hash = (int(d.window[index])<<hashShift + int(d.window[index+1]))
 358                         }
 359                         if ti == maxFlateBlockTokens {
 360                                 // The block includes the current character
 361                                 if err = d.writeBlock(tokens, index, false); err != nil {
 362                                         return
 363                                 }
 364                                 ti = 0
 365                         }
 366                 } else {
 367                         if isFastDeflate || byteAvailable {
 368                                 i := index - 1
 369                                 if isFastDeflate {
 370                                         i = index
 371                                 }
 372                                 tokens[ti] = literalToken(uint32(d.window[i]) & 0xFF)
 373                                 ti++
 374                                 if ti == maxFlateBlockTokens {
 375                                         if err = d.writeBlock(tokens, i+1, false); err != nil {
 376                                                 return
 377                                         }
 378                                         ti = 0
 379                                 }
 380                         }
 381                         index++
 382                         if !isFastDeflate {
 383                                 byteAvailable = true
 384                         }
 385                 }
 386
 387         }
 388         if byteAvailable {
 389                 // There is still one pending token that needs to be flushed
 390                 tokens[ti] = literalToken(uint32(d.window[index-1]) & 0xFF)
 391                 ti++
 392         }
 393
 394         if ti > 0 {
 395                 if err = d.writeBlock(tokens[0:ti], index, false); err != nil {
 396                         return
 397                 }
 398         }
 399         return
 400 }
 401
 402 func (d *compressor) compressor(r io.Reader, w io.Writer, level int, logWindowSize uint) (err os.Error) {
 403         d.r = r
 404         d.w = newHuffmanBitWriter(w)
 405         d.level = level
 406         d.logWindowSize = logWindowSize
 407
 408         switch {
 409         case level == NoCompression:
 410                 err = d.storedDeflate()
 411         case level == DefaultCompression:
 412                 d.level = 6
 413                 fallthrough
 414         case 1 <= level && level <= 9:
 415                 err = d.doDeflate()
 416         default:
 417                 return WrongValueError{"level", 0, 9, int32(level)}
 418         }
 419
 420         if err != nil {
 421                 return err
 422         }
 423         if d.w.writeStoredHeader(0, true); d.w.err != nil {
 424                 return d.w.err
 425         }
 426         return d.flush()
 427 }
 428
 429 func newCompressor(w io.Writer, level int, logWindowSize uint) io.WriteCloser {
 430         var d compressor
 431         pr, pw := syncPipe()
 432         go func() {
 433                 err := d.compressor(pr, w, level, logWindowSize)
 434                 pr.CloseWithError(err)
 435         }()
 436         return pw
 437 }
 438
 439 func NewWriter(w io.Writer, level int) io.WriteCloser {
 440         return newCompressor(w, level, logMaxOffsetSize)
 441 }