1 /* Inflater.java - Decompress a data stream
2 Copyright (C) 1999, 2000, 2001, 2003, 2005 Free Software Foundation, Inc.
4 This file is part of GNU Classpath.
6 GNU Classpath is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GNU Classpath is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Classpath; see the file COPYING. If not, write to the
18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 Linking this library statically or dynamically with other modules is
22 making a combined work based on this library. Thus, the terms and
23 conditions of the GNU General Public License cover the whole
26 As a special exception, the copyright holders of this library give you
27 permission to link this library with independent modules to produce an
28 executable, regardless of the license terms of these independent
29 modules, and to copy and distribute the resulting executable under
30 terms of your choice, provided that you also meet, for each linked
31 independent module, the terms and conditions of the license of that
32 module. An independent module is a module which is not derived from
33 or based on this library. If you modify this library, you may extend
34 this exception to your version of the library, but you are not
35 obligated to do so. If you do not wish to do so, delete this
36 exception statement from your version. */
38 package java
.util
.zip
;
40 /* Written using on-line Java Platform 1.2 API Specification
42 * Believed complete and correct.
46 * Inflater is used to decompress data that has been compressed according
47 * to the "deflate" standard described in rfc1950.
49 * The usage is as following. First you have to set some input with
50 * <code>setInput()</code>, then inflate() it. If inflate doesn't
51 * inflate any bytes there may be three reasons:
53 * <li>needsInput() returns true because the input buffer is empty.
54 * You have to provide more input with <code>setInput()</code>.
55 * NOTE: needsInput() also returns true when, the stream is finished.
57 * <li>needsDictionary() returns true, you have to provide a preset
58 * dictionary with <code>setDictionary()</code>.</li>
59 * <li>finished() returns true, the inflater has finished.</li>
61 * Once the first output byte is produced, a dictionary will not be
62 * needed at a later stage.
64 * @author John Leuner, Jochen Hoenicke
71 /* Copy lengths for literal codes 257..285 */
72 private static final int CPLENS
[] =
74 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
75 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258
78 /* Extra bits for literal codes 257..285 */
79 private static final int CPLEXT
[] =
81 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
82 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0
85 /* Copy offsets for distance codes 0..29 */
86 private static final int CPDIST
[] = {
87 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
88 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
89 8193, 12289, 16385, 24577
92 /* Extra bits for distance codes */
93 private static final int CPDEXT
[] = {
94 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
95 7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
99 /* This are the state in which the inflater can be. */
100 private static final int DECODE_HEADER
= 0;
101 private static final int DECODE_DICT
= 1;
102 private static final int DECODE_BLOCKS
= 2;
103 private static final int DECODE_STORED_LEN1
= 3;
104 private static final int DECODE_STORED_LEN2
= 4;
105 private static final int DECODE_STORED
= 5;
106 private static final int DECODE_DYN_HEADER
= 6;
107 private static final int DECODE_HUFFMAN
= 7;
108 private static final int DECODE_HUFFMAN_LENBITS
= 8;
109 private static final int DECODE_HUFFMAN_DIST
= 9;
110 private static final int DECODE_HUFFMAN_DISTBITS
= 10;
111 private static final int DECODE_CHKSUM
= 11;
112 private static final int FINISHED
= 12;
114 /** This variable contains the current state. */
118 * The adler checksum of the dictionary or of the decompressed
119 * stream, as it is written in the header resp. footer of the
120 * compressed stream. <br>
122 * Only valid if mode is DECODE_DICT or DECODE_CHKSUM.
124 private int readAdler
;
126 * The number of bits needed to complete the current state. This
127 * is valid, if mode is DECODE_DICT, DECODE_CHKSUM,
128 * DECODE_HUFFMAN_LENBITS or DECODE_HUFFMAN_DISTBITS.
130 private int neededBits
;
131 private int repLength
, repDist
;
132 private int uncomprLen
;
134 * True, if the last block flag was set in the last block of the
135 * inflated stream. This means that the stream ends after the
138 private boolean isLastBlock
;
141 * The total number of inflated bytes.
143 private int totalOut
;
145 * The total number of bytes set with setInput(). This is not the
146 * value returned by getTotalIn(), since this also includes the
151 * This variable stores the nowrap flag that was given to the constructor.
152 * True means, that the inflated stream doesn't contain a header nor the
153 * checksum in the footer.
155 private boolean nowrap
;
157 private StreamManipulator input
;
158 private OutputWindow outputWindow
;
159 private InflaterDynHeader dynHeader
;
160 private InflaterHuffmanTree litlenTree
, distTree
;
161 private Adler32 adler
;
164 * Creates a new inflater.
172 * Creates a new inflater.
173 * @param nowrap true if no header and checksum field appears in the
174 * stream. This is used for GZIPed input. For compatibility with
175 * Sun JDK you should provide one byte of input more than needed in
178 public Inflater (boolean nowrap
)
180 this.nowrap
= nowrap
;
181 this.adler
= new Adler32();
182 input
= new StreamManipulator();
183 outputWindow
= new OutputWindow();
184 mode
= nowrap ? DECODE_BLOCKS
: DECODE_HEADER
;
188 * Finalizes this object.
190 protected void finalize ()
192 /* Exists only for compatibility */
196 * Frees all objects allocated by the inflater. There's no reason
197 * to call this, since you can just rely on garbage collection (even
198 * for the Sun implementation). Exists only for compatibility
199 * with Sun's JDK, where the compressor allocates native memory.
200 * If you call any method (even reset) afterwards the behaviour is
202 * @deprecated Just clear all references to inflater instead.
215 * Returns true, if the inflater has finished. This means, that no
216 * input is needed and no output can be produced.
218 public boolean finished()
220 return mode
== FINISHED
&& outputWindow
.getAvailable() == 0;
224 * Gets the adler checksum. This is either the checksum of all
225 * uncompressed bytes returned by inflate(), or if needsDictionary()
226 * returns true (and thus no output was yet produced) this is the
227 * adler checksum of the expected dictionary.
228 * @returns the adler checksum.
230 public int getAdler()
232 return needsDictionary() ? readAdler
: (int) adler
.getValue();
236 * Gets the number of unprocessed input. Useful, if the end of the
237 * stream is reached and you want to further process the bytes after
238 * the deflate stream.
239 * @return the number of bytes of the input which were not processed.
241 public int getRemaining()
243 return input
.getAvailableBytes();
247 * Gets the total number of processed compressed input bytes.
248 * @return the total number of bytes of processed input bytes.
250 public int getTotalIn()
252 return totalIn
- getRemaining();
256 * Gets the total number of output bytes returned by inflate().
257 * @return the total number of output bytes.
259 public int getTotalOut()
265 * Inflates the compressed stream to the output buffer. If this
266 * returns 0, you should check, whether needsDictionary(),
267 * needsInput() or finished() returns true, to determine why no
268 * further output is produced.
269 * @param buf the output buffer.
270 * @return the number of bytes written to the buffer, 0 if no further
271 * output can be produced.
272 * @exception DataFormatException if deflated stream is invalid.
273 * @exception IllegalArgumentException if buf has length 0.
275 public int inflate (byte[] buf
) throws DataFormatException
277 return inflate (buf
, 0, buf
.length
);
281 * Inflates the compressed stream to the output buffer. If this
282 * returns 0, you should check, whether needsDictionary(),
283 * needsInput() or finished() returns true, to determine why no
284 * further output is produced.
285 * @param buf the output buffer.
286 * @param off the offset into buffer where the output should start.
287 * @param len the maximum length of the output.
288 * @return the number of bytes written to the buffer, 0 if no further
289 * output can be produced.
290 * @exception DataFormatException if deflated stream is invalid.
291 * @exception IndexOutOfBoundsException if the off and/or len are wrong.
293 public int inflate (byte[] buf
, int off
, int len
) throws DataFormatException
295 /* Special case: len may be zero */
298 /* Check for correct buff, off, len triple */
299 if (0 > off
|| off
> off
+ len
|| off
+ len
> buf
.length
)
300 throw new ArrayIndexOutOfBoundsException();
305 if (mode
!= DECODE_CHKSUM
)
307 /* Don't give away any output, if we are waiting for the
308 * checksum in the input stream.
310 * With this trick we have always:
311 * needsInput() and not finished()
312 * implies more output can be produced.
314 more
= outputWindow
.copyOutput(buf
, off
, len
);
315 adler
.update(buf
, off
, more
);
324 while (decode() || (outputWindow
.getAvailable() > 0
325 && mode
!= DECODE_CHKSUM
));
330 * Returns true, if a preset dictionary is needed to inflate the input.
332 public boolean needsDictionary ()
334 return mode
== DECODE_DICT
&& neededBits
== 0;
338 * Returns true, if the input buffer is empty.
339 * You should then call setInput(). <br>
341 * <em>NOTE</em>: This method also returns true when the stream is finished.
343 public boolean needsInput ()
345 return input
.needsInput ();
349 * Resets the inflater so that a new stream can be decompressed. All
350 * pending input and output will be discarded.
354 mode
= nowrap ? DECODE_BLOCKS
: DECODE_HEADER
;
355 totalIn
= totalOut
= 0;
357 outputWindow
.reset();
366 * Sets the preset dictionary. This should only be called, if
367 * needsDictionary() returns true and it should set the same
368 * dictionary, that was used for deflating. The getAdler()
369 * function returns the checksum of the dictionary needed.
370 * @param buffer the dictionary.
371 * @exception IllegalStateException if no dictionary is needed.
372 * @exception IllegalArgumentException if the dictionary checksum is
375 public void setDictionary (byte[] buffer
)
377 setDictionary(buffer
, 0, buffer
.length
);
381 * Sets the preset dictionary. This should only be called, if
382 * needsDictionary() returns true and it should set the same
383 * dictionary, that was used for deflating. The getAdler()
384 * function returns the checksum of the dictionary needed.
385 * @param buffer the dictionary.
386 * @param off the offset into buffer where the dictionary starts.
387 * @param len the length of the dictionary.
388 * @exception IllegalStateException if no dictionary is needed.
389 * @exception IllegalArgumentException if the dictionary checksum is
391 * @exception IndexOutOfBoundsException if the off and/or len are wrong.
393 public void setDictionary (byte[] buffer
, int off
, int len
)
395 if (!needsDictionary())
396 throw new IllegalStateException();
398 adler
.update(buffer
, off
, len
);
399 if ((int) adler
.getValue() != readAdler
)
400 throw new IllegalArgumentException("Wrong adler checksum");
402 outputWindow
.copyDict(buffer
, off
, len
);
403 mode
= DECODE_BLOCKS
;
407 * Sets the input. This should only be called, if needsInput()
409 * @param buf the input.
410 * @exception IllegalStateException if no input is needed.
412 public void setInput (byte[] buf
)
414 setInput (buf
, 0, buf
.length
);
418 * Sets the input. This should only be called, if needsInput()
420 * @param buf the input.
421 * @param off the offset into buffer where the input starts.
422 * @param len the length of the input.
423 * @exception IllegalStateException if no input is needed.
424 * @exception IndexOutOfBoundsException if the off and/or len are wrong.
426 public void setInput (byte[] buf
, int off
, int len
)
428 input
.setInput (buf
, off
, len
);
433 * Decodes the deflate header.
434 * @return false if more input is needed.
435 * @exception DataFormatException if header is invalid.
437 private boolean decodeHeader () throws DataFormatException
439 int header
= input
.peekBits(16);
444 /* The header is written in "wrong" byte order */
445 header
= ((header
<< 8) | (header
>> 8)) & 0xffff;
446 if (header
% 31 != 0)
447 throw new DataFormatException("Header checksum illegal");
449 if ((header
& 0x0f00) != (Deflater
.DEFLATED
<< 8))
450 throw new DataFormatException("Compression Method unknown");
452 /* Maximum size of the backwards window in bits.
453 * We currently ignore this, but we could use it to make the
454 * inflater window more space efficient. On the other hand the
455 * full window (15 bits) is needed most times, anyway.
456 int max_wbits = ((header & 0x7000) >> 12) + 8;
459 if ((header
& 0x0020) == 0) // Dictionary flag?
461 mode
= DECODE_BLOCKS
;
472 * Decodes the dictionary checksum after the deflate header.
473 * @return false if more input is needed.
475 private boolean decodeDict ()
477 while (neededBits
> 0)
479 int dictByte
= input
.peekBits(8);
483 readAdler
= (readAdler
<< 8) | dictByte
;
490 * Decodes the huffman encoded symbols in the input stream.
491 * @return false if more input is needed, true if output window is
492 * full or the current block ends.
493 * @exception DataFormatException if deflated stream is invalid.
495 private boolean decodeHuffman () throws DataFormatException
497 int free
= outputWindow
.getFreeSpace();
504 /* This is the inner loop so it is optimized a bit */
505 while (((symbol
= litlenTree
.getSymbol(input
)) & ~
0xff) == 0)
507 outputWindow
.write(symbol
);
517 /* symbol == 256: end of block */
520 mode
= DECODE_BLOCKS
;
527 repLength
= CPLENS
[symbol
- 257];
528 neededBits
= CPLEXT
[symbol
- 257];
530 catch (ArrayIndexOutOfBoundsException ex
)
532 throw new DataFormatException("Illegal rep length code");
535 case DECODE_HUFFMAN_LENBITS
:
538 mode
= DECODE_HUFFMAN_LENBITS
;
539 int i
= input
.peekBits(neededBits
);
542 input
.dropBits(neededBits
);
545 mode
= DECODE_HUFFMAN_DIST
;
547 case DECODE_HUFFMAN_DIST
:
548 symbol
= distTree
.getSymbol(input
);
553 repDist
= CPDIST
[symbol
];
554 neededBits
= CPDEXT
[symbol
];
556 catch (ArrayIndexOutOfBoundsException ex
)
558 throw new DataFormatException("Illegal rep dist code");
561 case DECODE_HUFFMAN_DISTBITS
:
564 mode
= DECODE_HUFFMAN_DISTBITS
;
565 int i
= input
.peekBits(neededBits
);
568 input
.dropBits(neededBits
);
571 outputWindow
.repeat(repLength
, repDist
);
573 mode
= DECODE_HUFFMAN
;
576 throw new IllegalStateException();
583 * Decodes the adler checksum after the deflate stream.
584 * @return false if more input is needed.
585 * @exception DataFormatException if checksum doesn't match.
587 private boolean decodeChksum () throws DataFormatException
589 while (neededBits
> 0)
591 int chkByte
= input
.peekBits(8);
595 readAdler
= (readAdler
<< 8) | chkByte
;
598 if ((int) adler
.getValue() != readAdler
)
599 throw new DataFormatException("Adler chksum doesn't match: "
600 +Integer
.toHexString((int)adler
.getValue())
601 +" vs. "+Integer
.toHexString(readAdler
));
607 * Decodes the deflated stream.
608 * @return false if more input is needed, or if finished.
609 * @exception DataFormatException if deflated stream is invalid.
611 private boolean decode () throws DataFormatException
616 return decodeHeader();
620 return decodeChksum();
632 input
.skipToByteBoundary();
634 mode
= DECODE_CHKSUM
;
639 int type
= input
.peekBits(3);
648 case DeflaterConstants
.STORED_BLOCK
:
649 input
.skipToByteBoundary();
650 mode
= DECODE_STORED_LEN1
;
652 case DeflaterConstants
.STATIC_TREES
:
653 litlenTree
= InflaterHuffmanTree
.defLitLenTree
;
654 distTree
= InflaterHuffmanTree
.defDistTree
;
655 mode
= DECODE_HUFFMAN
;
657 case DeflaterConstants
.DYN_TREES
:
658 dynHeader
= new InflaterDynHeader();
659 mode
= DECODE_DYN_HEADER
;
662 throw new DataFormatException("Unknown block type "+type
);
666 case DECODE_STORED_LEN1
:
668 if ((uncomprLen
= input
.peekBits(16)) < 0)
671 mode
= DECODE_STORED_LEN2
;
674 case DECODE_STORED_LEN2
:
676 int nlen
= input
.peekBits(16);
680 if (nlen
!= (uncomprLen ^
0xffff))
681 throw new DataFormatException("broken uncompressed block");
682 mode
= DECODE_STORED
;
687 int more
= outputWindow
.copyStored(input
, uncomprLen
);
691 mode
= DECODE_BLOCKS
;
694 return !input
.needsInput();
697 case DECODE_DYN_HEADER
:
698 if (!dynHeader
.decode(input
))
700 litlenTree
= dynHeader
.buildLitLenTree();
701 distTree
= dynHeader
.buildDistTree();
702 mode
= DECODE_HUFFMAN
;
705 case DECODE_HUFFMAN_LENBITS
:
706 case DECODE_HUFFMAN_DIST
:
707 case DECODE_HUFFMAN_DISTBITS
:
708 return decodeHuffman();
712 throw new IllegalStateException();