gomp-20050608-branch/libjava/classpath/java/util/zip/Inflater.java

   1 /* Inflater.java - Decompress a data stream
   2    Copyright (C) 1999, 2000, 2001, 2003, 2005  Free Software Foundation, Inc.
   3
   4 This file is part of GNU Classpath.
   5
   6 GNU Classpath is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2, or (at your option)
   9 any later version.
  10
  11 GNU Classpath is distributed in the hope that it will be useful, but
  12 WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GNU Classpath; see the file COPYING.  If not, write to the
  18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 02110-1301 USA.
  20
  21 Linking this library statically or dynamically with other modules is
  22 making a combined work based on this library.  Thus, the terms and
  23 conditions of the GNU General Public License cover the whole
  24 combination.
  25
  26 As a special exception, the copyright holders of this library give you
  27 permission to link this library with independent modules to produce an
  28 executable, regardless of the license terms of these independent
  29 modules, and to copy and distribute the resulting executable under
  30 terms of your choice, provided that you also meet, for each linked
  31 independent module, the terms and conditions of the license of that
  32 module.  An independent module is a module which is not derived from
  33 or based on this library.  If you modify this library, you may extend
  34 this exception to your version of the library, but you are not
  35 obligated to do so.  If you do not wish to do so, delete this
  36 exception statement from your version. */
  37
  38 package java.util.zip;
  39
  40 /* Written using on-line Java Platform 1.2 API Specification
  41  * and JCL book.
  42  * Believed complete and correct.
  43  */
  44
  45 /**
  46  * Inflater is used to decompress data that has been compressed according
  47  * to the "deflate" standard described in rfc1950.
  48  *
  49  * The usage is as following.  First you have to set some input with
  50  * <code>setInput()</code>, then inflate() it.  If inflate doesn't
  51  * inflate any bytes there may be three reasons:
  52  * <ul>
  53  * <li>needsInput() returns true because the input buffer is empty.
  54  * You have to provide more input with <code>setInput()</code>.
  55  * NOTE: needsInput() also returns true when, the stream is finished.
  56  * </li>
  57  * <li>needsDictionary() returns true, you have to provide a preset
  58  *     dictionary with <code>setDictionary()</code>.</li>
  59  * <li>finished() returns true, the inflater has finished.</li>
  60  * </ul>
  61  * Once the first output byte is produced, a dictionary will not be
  62  * needed at a later stage.
  63  *
  64  * @author John Leuner, Jochen Hoenicke
  65  * @author Tom Tromey
  66  * @date May 17, 1999
  67  * @since JDK 1.1
  68  */
  69 public class Inflater
  70 {
  71   /* Copy lengths for literal codes 257..285 */
  72   private static final int CPLENS[] =
  73   {
  74     3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
  75     35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258
  76   };
  77
  78   /* Extra bits for literal codes 257..285 */
  79   private static final int CPLEXT[] =
  80   {
  81     0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
  82     3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0
  83   };
  84
  85   /* Copy offsets for distance codes 0..29 */
  86   private static final int CPDIST[] = {
  87     1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
  88     257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
  89     8193, 12289, 16385, 24577
  90   };
  91
  92   /* Extra bits for distance codes */
  93   private static final int CPDEXT[] = {
  94     0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
  95     7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
  96     12, 12, 13, 13
  97   };
  98
  99   /* This are the state in which the inflater can be.  */
 100   private static final int DECODE_HEADER           = 0;
 101   private static final int DECODE_DICT             = 1;
 102   private static final int DECODE_BLOCKS           = 2;
 103   private static final int DECODE_STORED_LEN1      = 3;
 104   private static final int DECODE_STORED_LEN2      = 4;
 105   private static final int DECODE_STORED           = 5;
 106   private static final int DECODE_DYN_HEADER       = 6;
 107   private static final int DECODE_HUFFMAN          = 7;
 108   private static final int DECODE_HUFFMAN_LENBITS  = 8;
 109   private static final int DECODE_HUFFMAN_DIST     = 9;
 110   private static final int DECODE_HUFFMAN_DISTBITS = 10;
 111   private static final int DECODE_CHKSUM           = 11;
 112   private static final int FINISHED                = 12;
 113
 114   /** This variable contains the current state. */
 115   private int mode;
 116
 117   /**
 118    * The adler checksum of the dictionary or of the decompressed
 119    * stream, as it is written in the header resp. footer of the
 120    * compressed stream.  <br>
 121    *
 122    * Only valid if mode is DECODE_DICT or DECODE_CHKSUM.
 123    */
 124   private int readAdler;
 125   /**
 126    * The number of bits needed to complete the current state.  This
 127    * is valid, if mode is DECODE_DICT, DECODE_CHKSUM,
 128    * DECODE_HUFFMAN_LENBITS or DECODE_HUFFMAN_DISTBITS.
 129    */
 130   private int neededBits;
 131   private int repLength, repDist;
 132   private int uncomprLen;
 133   /**
 134    * True, if the last block flag was set in the last block of the
 135    * inflated stream.  This means that the stream ends after the
 136    * current block.
 137    */
 138   private boolean isLastBlock;
 139
 140   /**
 141    * The total number of inflated bytes.
 142    */
 143   private int totalOut;
 144   /**
 145    * The total number of bytes set with setInput().  This is not the
 146    * value returned by getTotalIn(), since this also includes the
 147    * unprocessed input.
 148    */
 149   private int totalIn;
 150   /**
 151    * This variable stores the nowrap flag that was given to the constructor.
 152    * True means, that the inflated stream doesn't contain a header nor the
 153    * checksum in the footer.
 154    */
 155   private boolean nowrap;
 156
 157   private StreamManipulator input;
 158   private OutputWindow outputWindow;
 159   private InflaterDynHeader dynHeader;
 160   private InflaterHuffmanTree litlenTree, distTree;
 161   private Adler32 adler;
 162
 163   /**
 164    * Creates a new inflater.
 165    */
 166   public Inflater ()
 167   {
 168     this (false);
 169   }
 170
 171   /**
 172    * Creates a new inflater.
 173    * @param nowrap true if no header and checksum field appears in the
 174    * stream.  This is used for GZIPed input.  For compatibility with
 175    * Sun JDK you should provide one byte of input more than needed in
 176    * this case.
 177    */
 178   public Inflater (boolean nowrap)
 179   {
 180     this.nowrap = nowrap;
 181     this.adler = new Adler32();
 182     input = new StreamManipulator();
 183     outputWindow = new OutputWindow();
 184     mode = nowrap ? DECODE_BLOCKS : DECODE_HEADER;
 185   }
 186
 187   /**
 188    * Finalizes this object.
 189    */
 190   protected void finalize ()
 191   {
 192     /* Exists only for compatibility */
 193   }
 194
 195   /**
 196    * Frees all objects allocated by the inflater.  There's no reason
 197    * to call this, since you can just rely on garbage collection (even
 198    * for the Sun implementation).  Exists only for compatibility
 199    * with Sun's JDK, where the compressor allocates native memory.
 200    * If you call any method (even reset) afterwards the behaviour is
 201    * <i>undefined</i>.
 202    * @deprecated Just clear all references to inflater instead.
 203    */
 204   public void end ()
 205   {
 206     outputWindow = null;
 207     input = null;
 208     dynHeader = null;
 209     litlenTree = null;
 210     distTree = null;
 211     adler = null;
 212   }
 213
 214   /**
 215    * Returns true, if the inflater has finished.  This means, that no
 216    * input is needed and no output can be produced.
 217    */
 218   public boolean finished()
 219   {
 220     return mode == FINISHED && outputWindow.getAvailable() == 0;
 221   }
 222
 223   /**
 224    * Gets the adler checksum.  This is either the checksum of all
 225    * uncompressed bytes returned by inflate(), or if needsDictionary()
 226    * returns true (and thus no output was yet produced) this is the
 227    * adler checksum of the expected dictionary.
 228    * @returns the adler checksum.
 229    */
 230   public int getAdler()
 231   {
 232     return needsDictionary() ? readAdler : (int) adler.getValue();
 233   }
 234
 235   /**
 236    * Gets the number of unprocessed input.  Useful, if the end of the
 237    * stream is reached and you want to further process the bytes after
 238    * the deflate stream.
 239    * @return the number of bytes of the input which were not processed.
 240    */
 241   public int getRemaining()
 242   {
 243     return input.getAvailableBytes();
 244   }
 245
 246   /**
 247    * Gets the total number of processed compressed input bytes.
 248    * @return the total number of bytes of processed input bytes.
 249    */
 250   public int getTotalIn()
 251   {
 252     return totalIn - getRemaining();
 253   }
 254
 255   /**
 256    * Gets the total number of output bytes returned by inflate().
 257    * @return the total number of output bytes.
 258    */
 259   public int getTotalOut()
 260   {
 261     return totalOut;
 262   }
 263
 264   /**
 265    * Inflates the compressed stream to the output buffer.  If this
 266    * returns 0, you should check, whether needsDictionary(),
 267    * needsInput() or finished() returns true, to determine why no
 268    * further output is produced.
 269    * @param buf the output buffer.
 270    * @return the number of bytes written to the buffer, 0 if no further
 271    * output can be produced.
 272    * @exception DataFormatException if deflated stream is invalid.
 273    * @exception IllegalArgumentException if buf has length 0.
 274    */
 275   public int inflate (byte[] buf) throws DataFormatException
 276   {
 277     return inflate (buf, 0, buf.length);
 278   }
 279
 280   /**
 281    * Inflates the compressed stream to the output buffer.  If this
 282    * returns 0, you should check, whether needsDictionary(),
 283    * needsInput() or finished() returns true, to determine why no
 284    * further output is produced.
 285    * @param buf the output buffer.
 286    * @param off the offset into buffer where the output should start.
 287    * @param len the maximum length of the output.
 288    * @return the number of bytes written to the buffer, 0 if no further
 289    * output can be produced.
 290    * @exception DataFormatException if deflated stream is invalid.
 291    * @exception IndexOutOfBoundsException if the off and/or len are wrong.
 292    */
 293   public int inflate (byte[] buf, int off, int len) throws DataFormatException
 294   {
 295     /* Special case: len may be zero */
 296     if (len == 0)
 297       return 0;
 298     /* Check for correct buff, off, len triple */
 299     if (0 > off || off > off + len || off + len > buf.length)
 300       throw new ArrayIndexOutOfBoundsException();
 301     int count = 0;
 302     int more;
 303     do
 304       {
 305         if (mode != DECODE_CHKSUM)
 306           {
 307             /* Don't give away any output, if we are waiting for the
 308              * checksum in the input stream.
 309              *
 310              * With this trick we have always:
 311              *   needsInput() and not finished()
 312              *   implies more output can be produced.
 313              */
 314             more = outputWindow.copyOutput(buf, off, len);
 315             adler.update(buf, off, more);
 316             off += more;
 317             count += more;
 318             totalOut += more;
 319             len -= more;
 320             if (len == 0)
 321               return count;
 322           }
 323       }
 324     while (decode() || (outputWindow.getAvailable() > 0
 325                         && mode != DECODE_CHKSUM));
 326     return count;
 327   }
 328
 329   /**
 330    * Returns true, if a preset dictionary is needed to inflate the input.
 331    */
 332   public boolean needsDictionary ()
 333   {
 334     return mode == DECODE_DICT && neededBits == 0;
 335   }
 336
 337   /**
 338    * Returns true, if the input buffer is empty.
 339    * You should then call setInput(). <br>
 340    *
 341    * <em>NOTE</em>: This method also returns true when the stream is finished.
 342    */
 343   public boolean needsInput ()
 344   {
 345     return input.needsInput ();
 346   }
 347
 348   /**
 349    * Resets the inflater so that a new stream can be decompressed.  All
 350    * pending input and output will be discarded.
 351    */
 352   public void reset ()
 353   {
 354     mode = nowrap ? DECODE_BLOCKS : DECODE_HEADER;
 355     totalIn = totalOut = 0;
 356     input.reset();
 357     outputWindow.reset();
 358     dynHeader = null;
 359     litlenTree = null;
 360     distTree = null;
 361     isLastBlock = false;
 362     adler.reset();
 363   }
 364
 365   /**
 366    * Sets the preset dictionary.  This should only be called, if
 367    * needsDictionary() returns true and it should set the same
 368    * dictionary, that was used for deflating.  The getAdler()
 369    * function returns the checksum of the dictionary needed.
 370    * @param buffer the dictionary.
 371    * @exception IllegalStateException if no dictionary is needed.
 372    * @exception IllegalArgumentException if the dictionary checksum is
 373    * wrong.
 374    */
 375   public void setDictionary (byte[] buffer)
 376   {
 377     setDictionary(buffer, 0, buffer.length);
 378   }
 379
 380   /**
 381    * Sets the preset dictionary.  This should only be called, if
 382    * needsDictionary() returns true and it should set the same
 383    * dictionary, that was used for deflating.  The getAdler()
 384    * function returns the checksum of the dictionary needed.
 385    * @param buffer the dictionary.
 386    * @param off the offset into buffer where the dictionary starts.
 387    * @param len the length of the dictionary.
 388    * @exception IllegalStateException if no dictionary is needed.
 389    * @exception IllegalArgumentException if the dictionary checksum is
 390    * wrong.
 391    * @exception IndexOutOfBoundsException if the off and/or len are wrong.
 392    */
 393   public void setDictionary (byte[] buffer, int off, int len)
 394   {
 395     if (!needsDictionary())
 396       throw new IllegalStateException();
 397
 398     adler.update(buffer, off, len);
 399     if ((int) adler.getValue() != readAdler)
 400       throw new IllegalArgumentException("Wrong adler checksum");
 401     adler.reset();
 402     outputWindow.copyDict(buffer, off, len);
 403     mode = DECODE_BLOCKS;
 404   }
 405
 406   /**
 407    * Sets the input.  This should only be called, if needsInput()
 408    * returns true.
 409    * @param buf the input.
 410    * @exception IllegalStateException if no input is needed.
 411    */
 412   public void setInput (byte[] buf)
 413   {
 414     setInput (buf, 0, buf.length);
 415   }
 416
 417   /**
 418    * Sets the input.  This should only be called, if needsInput()
 419    * returns true.
 420    * @param buf the input.
 421    * @param off the offset into buffer where the input starts.
 422    * @param len the length of the input.
 423    * @exception IllegalStateException if no input is needed.
 424    * @exception IndexOutOfBoundsException if the off and/or len are wrong.
 425    */
 426   public void setInput (byte[] buf, int off, int len)
 427   {
 428     input.setInput (buf, off, len);
 429     totalIn += len;
 430   }
 431
 432   /**
 433    * Decodes the deflate header.
 434    * @return false if more input is needed.
 435    * @exception DataFormatException if header is invalid.
 436    */
 437   private boolean decodeHeader () throws DataFormatException
 438   {
 439     int header = input.peekBits(16);
 440     if (header < 0)
 441       return false;
 442     input.dropBits(16);
 443
 444     /* The header is written in "wrong" byte order */
 445     header = ((header << 8) | (header >> 8)) & 0xffff;
 446     if (header % 31 != 0)
 447       throw new DataFormatException("Header checksum illegal");
 448
 449     if ((header & 0x0f00) != (Deflater.DEFLATED << 8))
 450       throw new DataFormatException("Compression Method unknown");
 451
 452     /* Maximum size of the backwards window in bits.
 453      * We currently ignore this, but we could use it to make the
 454      * inflater window more space efficient. On the other hand the
 455      * full window (15 bits) is needed most times, anyway.
 456      int max_wbits = ((header & 0x7000) >> 12) + 8;
 457      */
 458
 459     if ((header & 0x0020) == 0) // Dictionary flag?
 460       {
 461         mode = DECODE_BLOCKS;
 462       }
 463     else
 464       {
 465         mode = DECODE_DICT;
 466         neededBits = 32;
 467       }
 468     return true;
 469   }
 470
 471   /**
 472    * Decodes the dictionary checksum after the deflate header.
 473    * @return false if more input is needed.
 474    */
 475   private boolean decodeDict ()
 476   {
 477     while (neededBits > 0)
 478       {
 479         int dictByte = input.peekBits(8);
 480         if (dictByte < 0)
 481           return false;
 482         input.dropBits(8);
 483         readAdler = (readAdler << 8) | dictByte;
 484         neededBits -= 8;
 485       }
 486     return false;
 487   }
 488
 489   /**
 490    * Decodes the huffman encoded symbols in the input stream.
 491    * @return false if more input is needed, true if output window is
 492    * full or the current block ends.
 493    * @exception DataFormatException if deflated stream is invalid.
 494    */
 495   private boolean decodeHuffman () throws DataFormatException
 496   {
 497     int free = outputWindow.getFreeSpace();
 498     while (free >= 258)
 499       {
 500         int symbol;
 501         switch (mode)
 502           {
 503           case DECODE_HUFFMAN:
 504             /* This is the inner loop so it is optimized a bit */
 505             while (((symbol = litlenTree.getSymbol(input)) & ~0xff) == 0)
 506               {
 507                 outputWindow.write(symbol);
 508                 if (--free < 258)
 509                   return true;
 510               }
 511             if (symbol < 257)
 512               {
 513                 if (symbol < 0)
 514                   return false;
 515                 else
 516                   {
 517                     /* symbol == 256: end of block */
 518                     distTree = null;
 519                     litlenTree = null;
 520                     mode = DECODE_BLOCKS;
 521                     return true;
 522                   }
 523               }
 524
 525             try
 526               {
 527                 repLength = CPLENS[symbol - 257];
 528                 neededBits = CPLEXT[symbol - 257];
 529               }
 530             catch (ArrayIndexOutOfBoundsException ex)
 531               {
 532                 throw new DataFormatException("Illegal rep length code");
 533               }
 534             /* fall through */
 535           case DECODE_HUFFMAN_LENBITS:
 536             if (neededBits > 0)
 537               {
 538                 mode = DECODE_HUFFMAN_LENBITS;
 539                 int i = input.peekBits(neededBits);
 540                 if (i < 0)
 541                   return false;
 542                 input.dropBits(neededBits);
 543                 repLength += i;
 544               }
 545             mode = DECODE_HUFFMAN_DIST;
 546             /* fall through */
 547           case DECODE_HUFFMAN_DIST:
 548             symbol = distTree.getSymbol(input);
 549             if (symbol < 0)
 550               return false;
 551             try
 552               {
 553                 repDist = CPDIST[symbol];
 554                 neededBits = CPDEXT[symbol];
 555               }
 556             catch (ArrayIndexOutOfBoundsException ex)
 557               {
 558                 throw new DataFormatException("Illegal rep dist code");
 559               }
 560             /* fall through */
 561           case DECODE_HUFFMAN_DISTBITS:
 562             if (neededBits > 0)
 563               {
 564                 mode = DECODE_HUFFMAN_DISTBITS;
 565                 int i = input.peekBits(neededBits);
 566                 if (i < 0)
 567                   return false;
 568                 input.dropBits(neededBits);
 569                 repDist += i;
 570               }
 571             outputWindow.repeat(repLength, repDist);
 572             free -= repLength;
 573             mode = DECODE_HUFFMAN;
 574             break;
 575           default:
 576             throw new IllegalStateException();
 577           }
 578       }
 579     return true;
 580   }
 581
 582   /**
 583    * Decodes the adler checksum after the deflate stream.
 584    * @return false if more input is needed.
 585    * @exception DataFormatException if checksum doesn't match.
 586    */
 587   private boolean decodeChksum () throws DataFormatException
 588   {
 589     while (neededBits > 0)
 590       {
 591         int chkByte = input.peekBits(8);
 592         if (chkByte < 0)
 593           return false;
 594         input.dropBits(8);
 595         readAdler = (readAdler << 8) | chkByte;
 596         neededBits -= 8;
 597       }
 598     if ((int) adler.getValue() != readAdler)
 599       throw new DataFormatException("Adler chksum doesn't match: "
 600                                     +Integer.toHexString((int)adler.getValue())
 601                                     +" vs. "+Integer.toHexString(readAdler));
 602     mode = FINISHED;
 603     return false;
 604   }
 605
 606   /**
 607    * Decodes the deflated stream.
 608    * @return false if more input is needed, or if finished.
 609    * @exception DataFormatException if deflated stream is invalid.
 610    */
 611   private boolean decode () throws DataFormatException
 612   {
 613     switch (mode)
 614       {
 615       case DECODE_HEADER:
 616         return decodeHeader();
 617       case DECODE_DICT:
 618         return decodeDict();
 619       case DECODE_CHKSUM:
 620         return decodeChksum();
 621
 622       case DECODE_BLOCKS:
 623         if (isLastBlock)
 624           {
 625             if (nowrap)
 626               {
 627                 mode = FINISHED;
 628                 return false;
 629               }
 630             else
 631               {
 632                 input.skipToByteBoundary();
 633                 neededBits = 32;
 634                 mode = DECODE_CHKSUM;
 635                 return true;
 636               }
 637           }
 638
 639         int type = input.peekBits(3);
 640         if (type < 0)
 641           return false;
 642         input.dropBits(3);
 643
 644         if ((type & 1) != 0)
 645           isLastBlock = true;
 646         switch (type >> 1)
 647           {
 648           case DeflaterConstants.STORED_BLOCK:
 649             input.skipToByteBoundary();
 650             mode = DECODE_STORED_LEN1;
 651             break;
 652           case DeflaterConstants.STATIC_TREES:
 653             litlenTree = InflaterHuffmanTree.defLitLenTree;
 654             distTree = InflaterHuffmanTree.defDistTree;
 655             mode = DECODE_HUFFMAN;
 656             break;
 657           case DeflaterConstants.DYN_TREES:
 658             dynHeader = new InflaterDynHeader();
 659             mode = DECODE_DYN_HEADER;
 660             break;
 661           default:
 662             throw new DataFormatException("Unknown block type "+type);
 663           }
 664         return true;
 665
 666       case DECODE_STORED_LEN1:
 667         {
 668           if ((uncomprLen = input.peekBits(16)) < 0)
 669             return false;
 670           input.dropBits(16);
 671           mode = DECODE_STORED_LEN2;
 672         }
 673         /* fall through */
 674       case DECODE_STORED_LEN2:
 675         {
 676           int nlen = input.peekBits(16);
 677           if (nlen < 0)
 678             return false;
 679           input.dropBits(16);
 680           if (nlen != (uncomprLen ^ 0xffff))
 681             throw new DataFormatException("broken uncompressed block");
 682           mode = DECODE_STORED;
 683         }
 684         /* fall through */
 685       case DECODE_STORED:
 686         {
 687           int more = outputWindow.copyStored(input, uncomprLen);
 688           uncomprLen -= more;
 689           if (uncomprLen == 0)
 690             {
 691               mode = DECODE_BLOCKS;
 692               return true;
 693             }
 694           return !input.needsInput();
 695         }
 696
 697       case DECODE_DYN_HEADER:
 698         if (!dynHeader.decode(input))
 699           return false;
 700         litlenTree = dynHeader.buildLitLenTree();
 701         distTree = dynHeader.buildDistTree();
 702         mode = DECODE_HUFFMAN;
 703         /* fall through */
 704       case DECODE_HUFFMAN:
 705       case DECODE_HUFFMAN_LENBITS:
 706       case DECODE_HUFFMAN_DIST:
 707       case DECODE_HUFFMAN_DISTBITS:
 708         return decodeHuffman();
 709       case FINISHED:
 710         return false;
 711       default:
 712         throw new IllegalStateException();
 713       }
 714   }
 715 }