contrib/libarchive-2.1/libarchive/archive_read_support_compression_compress.c

   1 /*-
   2  * Copyright (c) 2003-2007 Tim Kientzle
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  *
  14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
  15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
  18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  24  */
  25
  26 /*
  27  * This code borrows heavily from "compress" source code, which is
  28  * protected by the following copyright.  (Clause 3 dropped by request
  29  * of the Regents.)
  30  */
  31
  32 /*-
  33  * Copyright (c) 1985, 1986, 1992, 1993
  34  *      The Regents of the University of California.  All rights reserved.
  35  *
  36  * This code is derived from software contributed to Berkeley by
  37  * Diomidis Spinellis and James A. Woods, derived from original
  38  * work by Spencer Thomas and Joseph Orost.
  39  *
  40  * Redistribution and use in source and binary forms, with or without
  41  * modification, are permitted provided that the following conditions
  42  * are met:
  43  * 1. Redistributions of source code must retain the above copyright
  44  *    notice, this list of conditions and the following disclaimer.
  45  * 2. Redistributions in binary form must reproduce the above copyright
  46  *    notice, this list of conditions and the following disclaimer in the
  47  *    documentation and/or other materials provided with the distribution.
  48  * 4. Neither the name of the University nor the names of its contributors
  49  *    may be used to endorse or promote products derived from this software
  50  *    without specific prior written permission.
  51  *
  52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  62  * SUCH DAMAGE.
  63  */
  64
  65
  66 #include "archive_platform.h"
  67 __FBSDID("$FreeBSD: src/lib/libarchive/archive_read_support_compression_compress.c,v 1.9 2007/04/05 05:18:16 kientzle Exp $");
  68
  69 #ifdef HAVE_ERRNO_H
  70 #include <errno.h>
  71 #endif
  72 #ifdef HAVE_STDLIB_H
  73 #include <stdlib.h>
  74 #endif
  75 #ifdef HAVE_STRING_H
  76 #include <string.h>
  77 #endif
  78 #ifdef HAVE_UNISTD_H
  79 #include <unistd.h>
  80 #endif
  81
  82 #include "archive.h"
  83 #include "archive_private.h"
  84 #include "archive_read_private.h"
  85
  86 /*
  87  * Because LZW decompression is pretty simple, I've just implemented
  88  * the whole decompressor here (cribbing from "compress" source code,
  89  * of course), rather than relying on an external library.  I have
  90  * made an effort to clarify and simplify the algorithm, so the
  91  * names and structure here don't exactly match those used by compress.
  92  */
  93
  94 struct private_data {
  95         /* Input variables. */
  96         const unsigned char     *next_in;
  97         size_t                   avail_in;
  98         int                      bit_buffer;
  99         int                      bits_avail;
 100         size_t                   bytes_in_section;
 101
 102         /* Output variables. */
 103         size_t                   uncompressed_buffer_size;
 104         void                    *uncompressed_buffer;
 105         unsigned char           *read_next;   /* Data for client. */
 106         unsigned char           *next_out;    /* Where to write new data. */
 107         size_t                   avail_out;   /* Space at end of buffer. */
 108
 109         /* Decompression status variables. */
 110         int                      use_reset_code;
 111         int                      end_of_stream; /* EOF status. */
 112         int                      maxcode;       /* Largest code. */
 113         int                      maxcode_bits;  /* Length of largest code. */
 114         int                      section_end_code; /* When to increase bits. */
 115         int                      bits;          /* Current code length. */
 116         int                      oldcode;       /* Previous code. */
 117         int                      finbyte;       /* Last byte of prev code. */
 118
 119         /* Dictionary. */
 120         int                      free_ent;       /* Next dictionary entry. */
 121         unsigned char            suffix[65536];
 122         uint16_t                 prefix[65536];
 123
 124         /*
 125          * Scratch area for expanding dictionary entries.  Note:
 126          * "worst" case here comes from compressing /dev/zero: the
 127          * last code in the dictionary will code a sequence of
 128          * 65536-256 zero bytes.  Thus, we need stack space to expand
 129          * a 65280-byte dictionary entry.  (Of course, 32640:1
 130          * compression could also be considered the "best" case. ;-)
 131          */
 132         unsigned char           *stackp;
 133         unsigned char            stack[65300];
 134 };
 135
 136 static int      bid(const void *, size_t);
 137 static int      finish(struct archive_read *);
 138 static int      init(struct archive_read *, const void *, size_t);
 139 static ssize_t  read_ahead(struct archive_read *, const void **, size_t);
 140 static ssize_t  read_consume(struct archive_read *, size_t);
 141 static int      getbits(struct archive_read *, struct private_data *, int n);
 142 static int      next_code(struct archive_read *a, struct private_data *state);
 143
 144 int
 145 archive_read_support_compression_compress(struct archive *_a)
 146 {
 147         struct archive_read *a = (struct archive_read *)_a;
 148         if (__archive_read_register_compression(a, bid, init) != NULL)
 149                 return (ARCHIVE_OK);
 150         return (ARCHIVE_FATAL);
 151 }
 152
 153 /*
 154  * Test whether we can handle this data.
 155  *
 156  * This logic returns zero if any part of the signature fails.  It
 157  * also tries to Do The Right Thing if a very short buffer prevents us
 158  * from verifying as much as we would like.
 159  */
 160 static int
 161 bid(const void *buff, size_t len)
 162 {
 163         const unsigned char *buffer;
 164         int bits_checked;
 165
 166         if (len < 1)
 167                 return (0);
 168
 169         buffer = (const unsigned char *)buff;
 170         bits_checked = 0;
 171         if (buffer[0] != 037)   /* Verify first ID byte. */
 172                 return (0);
 173         bits_checked += 8;
 174         if (len < 2)
 175                 return (bits_checked);
 176
 177         if (buffer[1] != 0235)  /* Verify second ID byte. */
 178                 return (0);
 179         bits_checked += 8;
 180         if (len < 3)
 181                 return (bits_checked);
 182
 183         /*
 184          * TODO: Verify more.
 185          */
 186
 187         return (bits_checked);
 188 }
 189
 190 /*
 191  * Setup the callbacks.
 192  */
 193 static int
 194 init(struct archive_read *a, const void *buff, size_t n)
 195 {
 196         struct private_data *state;
 197         int code;
 198
 199         a->archive.compression_code = ARCHIVE_COMPRESSION_COMPRESS;
 200         a->archive.compression_name = "compress (.Z)";
 201
 202         a->decompressor->read_ahead = read_ahead;
 203         a->decompressor->consume = read_consume;
 204         a->decompressor->skip = NULL; /* not supported */
 205         a->decompressor->finish = finish;
 206
 207         state = (struct private_data *)malloc(sizeof(*state));
 208         if (state == NULL) {
 209                 archive_set_error(&a->archive, ENOMEM,
 210                     "Can't allocate data for %s decompression",
 211                     a->archive.compression_name);
 212                 return (ARCHIVE_FATAL);
 213         }
 214         memset(state, 0, sizeof(*state));
 215         a->decompressor->data = state;
 216
 217         state->uncompressed_buffer_size = 64 * 1024;
 218         state->uncompressed_buffer = malloc(state->uncompressed_buffer_size);
 219
 220         if (state->uncompressed_buffer == NULL) {
 221                 archive_set_error(&a->archive, ENOMEM,
 222                     "Can't allocate %s decompression buffers",
 223                     a->archive.compression_name);
 224                 goto fatal;
 225         }
 226
 227         state->next_in = (const unsigned char *)buff;
 228         state->avail_in = n;
 229         state->read_next = state->next_out = (unsigned char *)state->uncompressed_buffer;
 230         state->avail_out = state->uncompressed_buffer_size;
 231
 232         code = getbits(a, state, 8);
 233         if (code != 037) /* This should be impossible. */
 234                 goto fatal;
 235
 236         code = getbits(a, state, 8);
 237         if (code != 0235) {
 238                 /* This can happen if the library is receiving 1-byte
 239                  * blocks and gzip and compress are both enabled.
 240                  * You can't distinguish gzip and compress only from
 241                  * the first byte. */
 242                 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
 243                     "Compress signature did not match.");
 244                 goto fatal;
 245         }
 246
 247         code = getbits(a, state, 8);
 248         state->maxcode_bits = code & 0x1f;
 249         state->maxcode = (1 << state->maxcode_bits);
 250         state->use_reset_code = code & 0x80;
 251
 252         /* Initialize decompressor. */
 253         state->free_ent = 256;
 254         state->stackp = state->stack;
 255         if (state->use_reset_code)
 256                 state->free_ent++;
 257         state->bits = 9;
 258         state->section_end_code = (1<<state->bits) - 1;
 259         state->oldcode = -1;
 260         for (code = 255; code >= 0; code--) {
 261                 state->prefix[code] = 0;
 262                 state->suffix[code] = code;
 263         }
 264         next_code(a, state);
 265         return (ARCHIVE_OK);
 266
 267 fatal:
 268         finish(a);
 269         return (ARCHIVE_FATAL);
 270 }
 271
 272 /*
 273  * Return a block of data from the decompression buffer.  Decompress more
 274  * as necessary.
 275  */
 276 static ssize_t
 277 read_ahead(struct archive_read *a, const void **p, size_t min)
 278 {
 279         struct private_data *state;
 280         size_t read_avail;
 281         int ret;
 282
 283         state = (struct private_data *)a->decompressor->data;
 284         if (!a->client_reader) {
 285                 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
 286                     "No read callback is registered?  "
 287                     "This is probably an internal programming error.");
 288                 return (ARCHIVE_FATAL);
 289         }
 290
 291         read_avail = state->next_out - state->read_next;
 292
 293         if (read_avail < min  &&  state->end_of_stream) {
 294                 if (state->end_of_stream == ARCHIVE_EOF)
 295                         return (0);
 296                 else
 297                         return (-1);
 298         }
 299
 300         if (read_avail < min) {
 301                 memmove(state->uncompressed_buffer, state->read_next,
 302                     read_avail);
 303                 state->read_next = (unsigned char *)state->uncompressed_buffer;
 304                 state->next_out = state->read_next + read_avail;
 305                 state->avail_out
 306                     = state->uncompressed_buffer_size - read_avail;
 307
 308                 while (read_avail < state->uncompressed_buffer_size
 309                         && !state->end_of_stream) {
 310                         if (state->stackp > state->stack) {
 311                                 *state->next_out++ = *--state->stackp;
 312                                 state->avail_out--;
 313                                 read_avail++;
 314                         } else {
 315                                 ret = next_code(a, state);
 316                                 if (ret == ARCHIVE_EOF)
 317                                         state->end_of_stream = ret;
 318                                 else if (ret != ARCHIVE_OK)
 319                                         return (ret);
 320                         }
 321                 }
 322         }
 323
 324         *p = state->read_next;
 325         return (read_avail);
 326 }
 327
 328 /*
 329  * Mark a previously-returned block of data as read.
 330  */
 331 static ssize_t
 332 read_consume(struct archive_read *a, size_t n)
 333 {
 334         struct private_data *state;
 335
 336         state = (struct private_data *)a->decompressor->data;
 337         a->archive.file_position += n;
 338         state->read_next += n;
 339         if (state->read_next > state->next_out)
 340                 __archive_errx(1, "Request to consume too many "
 341                     "bytes from compress decompressor");
 342         return (n);
 343 }
 344
 345 /*
 346  * Clean up the decompressor.
 347  */
 348 static int
 349 finish(struct archive_read *a)
 350 {
 351         struct private_data *state;
 352         int ret = ARCHIVE_OK;
 353
 354         state = (struct private_data *)a->decompressor->data;
 355
 356         if (state != NULL) {
 357                 if (state->uncompressed_buffer != NULL)
 358                         free(state->uncompressed_buffer);
 359                 free(state);
 360         }
 361
 362         a->decompressor->data = NULL;
 363         return (ret);
 364 }
 365
 366 /*
 367  * Process the next code and fill the stack with the expansion
 368  * of the code.  Returns ARCHIVE_FATAL if there is a fatal I/O or
 369  * format error, ARCHIVE_EOF if we hit end of data, ARCHIVE_OK otherwise.
 370  */
 371 static int
 372 next_code(struct archive_read *a, struct private_data *state)
 373 {
 374         int code, newcode;
 375
 376         static int debug_buff[1024];
 377         static unsigned debug_index;
 378
 379         code = newcode = getbits(a, state, state->bits);
 380         if (code < 0)
 381                 return (code);
 382
 383         debug_buff[debug_index++] = code;
 384         if (debug_index >= sizeof(debug_buff)/sizeof(debug_buff[0]))
 385                 debug_index = 0;
 386
 387         /* If it's a reset code, reset the dictionary. */
 388         if ((code == 256) && state->use_reset_code) {
 389                 /*
 390                  * The original 'compress' implementation blocked its
 391                  * I/O in a manner that resulted in junk bytes being
 392                  * inserted after every reset.  The next section skips
 393                  * this junk.  (Yes, the number of *bytes* to skip is
 394                  * a function of the current *bit* length.)
 395                  */
 396                 int skip_bytes =  state->bits -
 397                     (state->bytes_in_section % state->bits);
 398                 skip_bytes %= state->bits;
 399                 state->bits_avail = 0; /* Discard rest of this byte. */
 400                 while (skip_bytes-- > 0) {
 401                         code = getbits(a, state, 8);
 402                         if (code < 0)
 403                                 return (code);
 404                 }
 405                 /* Now, actually do the reset. */
 406                 state->bytes_in_section = 0;
 407                 state->bits = 9;
 408                 state->section_end_code = (1 << state->bits) - 1;
 409                 state->free_ent = 257;
 410                 state->oldcode = -1;
 411                 return (next_code(a, state));
 412         }
 413
 414         if (code > state->free_ent) {
 415                 /* An invalid code is a fatal error. */
 416                 archive_set_error(&a->archive, -1, "Invalid compressed data");
 417                 return (ARCHIVE_FATAL);
 418         }
 419
 420         /* Special case for KwKwK string. */
 421         if (code >= state->free_ent) {
 422                 *state->stackp++ = state->finbyte;
 423                 code = state->oldcode;
 424         }
 425
 426         /* Generate output characters in reverse order. */
 427         while (code >= 256) {
 428                 *state->stackp++ = state->suffix[code];
 429                 code = state->prefix[code];
 430         }
 431         *state->stackp++ = state->finbyte = code;
 432
 433         /* Generate the new entry. */
 434         code = state->free_ent;
 435         if (code < state->maxcode && state->oldcode >= 0) {
 436                 state->prefix[code] = state->oldcode;
 437                 state->suffix[code] = state->finbyte;
 438                 ++state->free_ent;
 439         }
 440         if (state->free_ent > state->section_end_code) {
 441                 state->bits++;
 442                 state->bytes_in_section = 0;
 443                 if (state->bits == state->maxcode_bits)
 444                         state->section_end_code = state->maxcode;
 445                 else
 446                         state->section_end_code = (1 << state->bits) - 1;
 447         }
 448
 449         /* Remember previous code. */
 450         state->oldcode = newcode;
 451         return (ARCHIVE_OK);
 452 }
 453
 454 /*
 455  * Return next 'n' bits from stream.
 456  *
 457  * -1 indicates end of available data.
 458  */
 459 static int
 460 getbits(struct archive_read *a, struct private_data *state, int n)
 461 {
 462         int code, ret;
 463         static const int mask[] = {
 464                 0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff,
 465                 0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff
 466         };
 467         const void *read_buf;
 468
 469         while (state->bits_avail < n) {
 470                 if (state->avail_in <= 0) {
 471                         read_buf = state->next_in;
 472                         ret = (a->client_reader)(&a->archive, a->client_data,
 473                             &read_buf);
 474                         state->next_in = read_buf;
 475                         if (ret < 0)
 476                                 return (ARCHIVE_FATAL);
 477                         if (ret == 0)
 478                                 return (ARCHIVE_EOF);
 479                         a->archive.raw_position += ret;
 480                         state->avail_in = ret;
 481                 }
 482                 state->bit_buffer |= *state->next_in++ << state->bits_avail;
 483                 state->avail_in--;
 484                 state->bits_avail += 8;
 485                 state->bytes_in_section++;
 486         }
 487
 488         code = state->bit_buffer;
 489         state->bit_buffer >>= n;
 490         state->bits_avail -= n;
 491
 492         return (code & mask[n]);
 493 }