contrib/libarchive/archive_read_support_compression_compress.c

   1 /*-
   2  * Copyright (c) 2003-2004 Tim Kientzle
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer
  10  *    in this position and unchanged.
  11  * 2. Redistributions in binary form must reproduce the above copyright
  12  *    notice, this list of conditions and the following disclaimer in the
  13  *    documentation and/or other materials provided with the distribution.
  14  *
  15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
  16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  18  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
  19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25  */
  26
  27 /*
  28  * This code borrows heavily from "compress" source code, which is
  29  * protected by the following copyright.  (Clause 3 dropped by request
  30  * of the Regents.)
  31  */
  32
  33 /*-
  34  * Copyright (c) 1985, 1986, 1992, 1993
  35  *      The Regents of the University of California.  All rights reserved.
  36  *
  37  * This code is derived from software contributed to Berkeley by
  38  * Diomidis Spinellis and James A. Woods, derived from original
  39  * work by Spencer Thomas and Joseph Orost.
  40  *
  41  * Redistribution and use in source and binary forms, with or without
  42  * modification, are permitted provided that the following conditions
  43  * are met:
  44  * 1. Redistributions of source code must retain the above copyright
  45  *    notice, this list of conditions and the following disclaimer.
  46  * 2. Redistributions in binary form must reproduce the above copyright
  47  *    notice, this list of conditions and the following disclaimer in the
  48  *    documentation and/or other materials provided with the distribution.
  49  * 4. Neither the name of the University nor the names of its contributors
  50  *    may be used to endorse or promote products derived from this software
  51  *    without specific prior written permission.
  52  *
  53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  63  * SUCH DAMAGE.
  64  */
  65
  66
  67 #include "archive_platform.h"
  68 __FBSDID("$FreeBSD: src/lib/libarchive/archive_read_support_compression_compress.c,v 1.3 2004/10/17 23:40:10 kientzle Exp $");
  69
  70 #include <errno.h>
  71 #include <stdlib.h>
  72 #include <string.h>
  73 #include <unistd.h>
  74
  75 #include "archive.h"
  76 #include "archive_private.h"
  77
  78 /*
  79  * Because LZW decompression is pretty simple, I've just implemented
  80  * the whole decompressor here (cribbing from "compress" source code,
  81  * of course), rather than relying on an external library.  I have
  82  * made an effort to clarify and simplify the algorithm, so the
  83  * names and structure here don't exactly match those used by compress.
  84  */
  85
  86 struct private_data {
  87         /* Input variables. */
  88         const unsigned char     *next_in;
  89         size_t                   avail_in;
  90         int                      bit_buffer;
  91         int                      bits_avail;
  92         size_t                   bytes_in_section;
  93
  94         /* Output variables. */
  95         size_t                   uncompressed_buffer_size;
  96         void                    *uncompressed_buffer;
  97         unsigned char           *read_next;   /* Data for client. */
  98         unsigned char           *next_out;    /* Where to write new data. */
  99         size_t                   avail_out;   /* Space at end of buffer. */
 100
 101         /* Decompression status variables. */
 102         int                      use_reset_code;
 103         int                      end_of_stream; /* EOF status. */
 104         int                      maxcode;       /* Largest code. */
 105         int                      maxcode_bits;  /* Length of largest code. */
 106         int                      section_end_code; /* When to increase bits. */
 107         int                      bits;          /* Current code length. */
 108         int                      oldcode;       /* Previous code. */
 109         int                      finbyte;       /* Last byte of prev code. */
 110
 111         /* Dictionary. */
 112         int                      free_ent;       /* Next dictionary entry. */
 113         unsigned char            suffix[65536];
 114         uint16_t                 prefix[65536];
 115
 116         /*
 117          * Scratch area for expanding dictionary entries.  Note:
 118          * "worst" case here comes from compressing /dev/zero: the
 119          * last code in the dictionary will code a sequence of
 120          * 65536-256 zero bytes.  Thus, we need stack space to expand
 121          * a 65280-byte dictionary entry.  (Of course, 32640:1
 122          * compression could also be considered the "best" case. ;-)
 123          */
 124         unsigned char           *stackp;
 125         unsigned char            stack[65300];
 126 };
 127
 128 static int      bid(const void *, size_t);
 129 static int      finish(struct archive *);
 130 static int      init(struct archive *, const void *, size_t);
 131 static ssize_t  read_ahead(struct archive *, const void **, size_t);
 132 static ssize_t  read_consume(struct archive *, size_t);
 133 static int      getbits(struct archive *, struct private_data *, int n);
 134 static int      next_code(struct archive *a, struct private_data *state);
 135
 136 int
 137 archive_read_support_compression_compress(struct archive *a)
 138 {
 139         return (__archive_read_register_compression(a, bid, init));
 140 }
 141
 142 /*
 143  * Test whether we can handle this data.
 144  *
 145  * This logic returns zero if any part of the signature fails.  It
 146  * also tries to Do The Right Thing if a very short buffer prevents us
 147  * from verifying as much as we would like.
 148  */
 149 static int
 150 bid(const void *buff, size_t len)
 151 {
 152         const unsigned char *buffer;
 153         int bits_checked;
 154
 155         if (len < 1)
 156                 return (0);
 157
 158         buffer = buff;
 159         bits_checked = 0;
 160         if (buffer[0] != 037)   /* Verify first ID byte. */
 161                 return (0);
 162         bits_checked += 8;
 163         if (len < 2)
 164                 return (bits_checked);
 165
 166         if (buffer[1] != 0235)  /* Verify second ID byte. */
 167                 return (0);
 168         bits_checked += 8;
 169         if (len < 3)
 170                 return (bits_checked);
 171
 172         /*
 173          * TODO: Verify more.
 174          */
 175
 176         return (bits_checked);
 177 }
 178
 179 /*
 180  * Setup the callbacks.
 181  */
 182 static int
 183 init(struct archive *a, const void *buff, size_t n)
 184 {
 185         struct private_data *state;
 186         int code;
 187
 188         a->compression_code = ARCHIVE_COMPRESSION_COMPRESS;
 189         a->compression_name = "compress (.Z)";
 190
 191         a->compression_read_ahead = read_ahead;
 192         a->compression_read_consume = read_consume;
 193         a->compression_finish = finish;
 194
 195         state = malloc(sizeof(*state));
 196         if (state == NULL) {
 197                 archive_set_error(a, ENOMEM,
 198                     "Can't allocate data for %s decompression",
 199                     a->compression_name);
 200                 return (ARCHIVE_FATAL);
 201         }
 202         memset(state, 0, sizeof(*state));
 203
 204         state->uncompressed_buffer_size = 64 * 1024;
 205         state->uncompressed_buffer = malloc(state->uncompressed_buffer_size);
 206
 207         if (state->uncompressed_buffer == NULL) {
 208                 archive_set_error(a, ENOMEM,
 209                     "Can't allocate %s decompression buffers",
 210                     a->compression_name);
 211                 goto fatal;
 212         }
 213
 214         state->next_in = buff;
 215         state->avail_in = n;
 216         state->read_next = state->next_out = state->uncompressed_buffer;
 217         state->avail_out = state->uncompressed_buffer_size;
 218
 219         code = getbits(a, state, 8);
 220         if (code != 037)
 221                 goto fatal;
 222
 223         code = getbits(a, state, 8);
 224         if (code != 0235)
 225                 goto fatal;
 226
 227         code = getbits(a, state, 8);
 228         state->maxcode_bits = code & 0x1f;
 229         state->maxcode = (1 << state->maxcode_bits);
 230         state->use_reset_code = code & 0x80;
 231
 232         /* Initialize decompressor. */
 233         state->free_ent = 256;
 234         state->stackp = state->stack;
 235         if (state->use_reset_code)
 236                 state->free_ent++;
 237         state->bits = 9;
 238         state->section_end_code = (1<<state->bits) - 1;
 239         state->oldcode = -1;
 240         for (code = 255; code >= 0; code--) {
 241                 state->prefix[code] = 0;
 242                 state->suffix[code] = code;
 243         }
 244         next_code(a, state);
 245         a->compression_data = state;
 246
 247         return (ARCHIVE_OK);
 248
 249 fatal:
 250         finish(a);
 251         return (ARCHIVE_FATAL);
 252 }
 253
 254 /*
 255  * Return a block of data from the decompression buffer.  Decompress more
 256  * as necessary.
 257  */
 258 static ssize_t
 259 read_ahead(struct archive *a, const void **p, size_t min)
 260 {
 261         struct private_data *state;
 262         int read_avail, was_avail, ret;
 263
 264         state = a->compression_data;
 265         was_avail = -1;
 266         if (!a->client_reader) {
 267                 archive_set_error(a, ARCHIVE_ERRNO_PROGRAMMER,
 268                     "No read callback is registered?  "
 269                     "This is probably an internal programming error.");
 270                 return (ARCHIVE_FATAL);
 271         }
 272
 273         read_avail = state->next_out - state->read_next;
 274
 275         if (read_avail < (int)min  &&  state->end_of_stream) {
 276                 if (state->end_of_stream == ARCHIVE_EOF)
 277                         return (0);
 278                 else
 279                         return (-1);
 280         }
 281
 282         if (read_avail < (int)min) {
 283                 memmove(state->uncompressed_buffer, state->read_next,
 284                     read_avail);
 285                 state->read_next = state->uncompressed_buffer;
 286                 state->next_out = state->read_next + read_avail;
 287                 state->avail_out
 288                     = state->uncompressed_buffer_size - read_avail;
 289
 290                 while (read_avail < (int)state->uncompressed_buffer_size
 291                         && !state->end_of_stream) {
 292                         if (state->stackp > state->stack) {
 293                                 *state->next_out++ = *--state->stackp;
 294                                 state->avail_out--;
 295                                 read_avail++;
 296                         } else {
 297                                 ret = next_code(a, state);
 298                                 if (ret == ARCHIVE_EOF)
 299                                         state->end_of_stream = ret;
 300                                 else if (ret != ARCHIVE_OK)
 301                                         return (ret);
 302                         }
 303                 }
 304         }
 305
 306         *p = state->read_next;
 307         return (read_avail);
 308 }
 309
 310 /*
 311  * Mark a previously-returned block of data as read.
 312  */
 313 static ssize_t
 314 read_consume(struct archive *a, size_t n)
 315 {
 316         struct private_data *state;
 317
 318         state = a->compression_data;
 319         a->file_position += n;
 320         state->read_next += n;
 321         if (state->read_next > state->next_out)
 322                 __archive_errx(1, "Request to consume too many "
 323                     "bytes from compress decompressor");
 324         return (n);
 325 }
 326
 327 /*
 328  * Clean up the decompressor.
 329  */
 330 static int
 331 finish(struct archive *a)
 332 {
 333         struct private_data *state;
 334         int ret;
 335
 336         state = a->compression_data;
 337         ret = ARCHIVE_OK;
 338
 339         free(state->uncompressed_buffer);
 340         free(state);
 341
 342         a->compression_data = NULL;
 343         if (a->client_closer != NULL)
 344                 (a->client_closer)(a, a->client_data);
 345
 346         return (ret);
 347 }
 348
 349 /*
 350  * Process the next code and fill the stack with the expansion
 351  * of the code.  Returns ARCHIVE_FATAL if there is a fatal I/O or
 352  * format error, ARCHIVE_EOF if we hit end of data, ARCHIVE_OK otherwise.
 353  */
 354 static int
 355 next_code(struct archive *a, struct private_data *state)
 356 {
 357         int code, newcode;
 358
 359         static int debug_buff[1024];
 360         static unsigned debug_index;
 361
 362         code = newcode = getbits(a, state, state->bits);
 363         if (code < 0)
 364                 return (code);
 365
 366         debug_buff[debug_index++] = code;
 367         if (debug_index >= sizeof(debug_buff)/sizeof(debug_buff[0]))
 368                 debug_index = 0;
 369
 370         /* If it's a reset code, reset the dictionary. */
 371         if ((code == 256) && state->use_reset_code) {
 372                 /*
 373                  * The original 'compress' implementation blocked its
 374                  * I/O in a manner that resulted in junk bytes being
 375                  * inserted after every reset.  The next section skips
 376                  * this junk.  (Yes, the number of *bytes* to skip is
 377                  * a function of the current *bit* length.)
 378                  */
 379                 int skip_bytes =  state->bits -
 380                     (state->bytes_in_section % state->bits);
 381                 skip_bytes %= state->bits;
 382                 state->bits_avail = 0; /* Discard rest of this byte. */
 383                 while (skip_bytes-- > 0) {
 384                         code = getbits(a, state, 8);
 385                         if (code < 0)
 386                                 return (code);
 387                 }
 388                 /* Now, actually do the reset. */
 389                 state->bytes_in_section = 0;
 390                 state->bits = 9;
 391                 state->section_end_code = (1 << state->bits) - 1;
 392                 state->free_ent = 257;
 393                 state->oldcode = -1;
 394                 return (next_code(a, state));
 395         }
 396
 397         if (code > state->free_ent) {
 398                 /* An invalid code is a fatal error. */
 399                 archive_set_error(a, -1, "Invalid compressed data");
 400                 return (ARCHIVE_FATAL);
 401         }
 402
 403         /* Special case for KwKwK string. */
 404         if (code >= state->free_ent) {
 405                 *state->stackp++ = state->finbyte;
 406                 code = state->oldcode;
 407         }
 408
 409         /* Generate output characters in reverse order. */
 410         while (code >= 256) {
 411                 *state->stackp++ = state->suffix[code];
 412                 code = state->prefix[code];
 413         }
 414         *state->stackp++ = state->finbyte = code;
 415
 416         /* Generate the new entry. */
 417         code = state->free_ent;
 418         if (code < state->maxcode && state->oldcode >= 0) {
 419                 state->prefix[code] = state->oldcode;
 420                 state->suffix[code] = state->finbyte;
 421                 ++state->free_ent;
 422         }
 423         if (state->free_ent > state->section_end_code) {
 424                 state->bits++;
 425                 state->bytes_in_section = 0;
 426                 if (state->bits == state->maxcode_bits)
 427                         state->section_end_code = state->maxcode;
 428                 else
 429                         state->section_end_code = (1 << state->bits) - 1;
 430         }
 431
 432         /* Remember previous code. */
 433         state->oldcode = newcode;
 434         return (ARCHIVE_OK);
 435 }
 436
 437 /*
 438  * Return next 'n' bits from stream.
 439  *
 440  * -1 indicates end of available data.
 441  */
 442 static int
 443 getbits(struct archive *a, struct private_data *state, int n)
 444 {
 445         int code, ret;
 446         static const int mask[] = {
 447                 0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff,
 448                 0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff
 449         };
 450
 451
 452         while (state->bits_avail < n) {
 453                 if (state->avail_in <= 0) {
 454                         ret = (a->client_reader)(a, a->client_data,
 455                             (const void **)&state->next_in);
 456                         if (ret < 0)
 457                                 return (ARCHIVE_FATAL);
 458                         if (ret == 0)
 459                                 return (ARCHIVE_EOF);
 460                         a->raw_position += ret;
 461                         state->avail_in = ret;
 462                 }
 463                 state->bit_buffer |= *state->next_in++ << state->bits_avail;
 464                 state->avail_in--;
 465                 state->bits_avail += 8;
 466                 state->bytes_in_section++;
 467         }
 468
 469         code = state->bit_buffer;
 470         state->bit_buffer >>= n;
 471         state->bits_avail -= n;
 472
 473         return (code & mask[n]);
 474 }