This commit sets new users to see the DragonFly-tips fortunes instead
[dragonfly.git] / contrib / libarchive / archive_read_support_compression_compress.c
bloba84a43fb093deaa4be25711cfd6a4ca197f609c6
1 /*-
2 * Copyright (c) 2003-2004 Tim Kientzle
3 * All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 * This code borrows heavily from "compress" source code, which is
29 * protected by the following copyright. (Clause 3 dropped by request
30 * of the Regents.)
33 /*-
34 * Copyright (c) 1985, 1986, 1992, 1993
35 * The Regents of the University of California. All rights reserved.
37 * This code is derived from software contributed to Berkeley by
38 * Diomidis Spinellis and James A. Woods, derived from original
39 * work by Spencer Thomas and Joseph Orost.
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
67 #include "archive_platform.h"
68 __FBSDID("$FreeBSD: src/lib/libarchive/archive_read_support_compression_compress.c,v 1.3 2004/10/17 23:40:10 kientzle Exp $");
70 #include <errno.h>
71 #include <stdlib.h>
72 #include <string.h>
73 #include <unistd.h>
75 #include "archive.h"
76 #include "archive_private.h"
79 * Because LZW decompression is pretty simple, I've just implemented
80 * the whole decompressor here (cribbing from "compress" source code,
81 * of course), rather than relying on an external library. I have
82 * made an effort to clarify and simplify the algorithm, so the
83 * names and structure here don't exactly match those used by compress.
86 struct private_data {
87 /* Input variables. */
88 const unsigned char *next_in;
89 size_t avail_in;
90 int bit_buffer;
91 int bits_avail;
92 size_t bytes_in_section;
94 /* Output variables. */
95 size_t uncompressed_buffer_size;
96 void *uncompressed_buffer;
97 unsigned char *read_next; /* Data for client. */
98 unsigned char *next_out; /* Where to write new data. */
99 size_t avail_out; /* Space at end of buffer. */
101 /* Decompression status variables. */
102 int use_reset_code;
103 int end_of_stream; /* EOF status. */
104 int maxcode; /* Largest code. */
105 int maxcode_bits; /* Length of largest code. */
106 int section_end_code; /* When to increase bits. */
107 int bits; /* Current code length. */
108 int oldcode; /* Previous code. */
109 int finbyte; /* Last byte of prev code. */
111 /* Dictionary. */
112 int free_ent; /* Next dictionary entry. */
113 unsigned char suffix[65536];
114 uint16_t prefix[65536];
117 * Scratch area for expanding dictionary entries. Note:
118 * "worst" case here comes from compressing /dev/zero: the
119 * last code in the dictionary will code a sequence of
120 * 65536-256 zero bytes. Thus, we need stack space to expand
121 * a 65280-byte dictionary entry. (Of course, 32640:1
122 * compression could also be considered the "best" case. ;-)
124 unsigned char *stackp;
125 unsigned char stack[65300];
128 static int bid(const void *, size_t);
129 static int finish(struct archive *);
130 static int init(struct archive *, const void *, size_t);
131 static ssize_t read_ahead(struct archive *, const void **, size_t);
132 static ssize_t read_consume(struct archive *, size_t);
133 static int getbits(struct archive *, struct private_data *, int n);
134 static int next_code(struct archive *a, struct private_data *state);
137 archive_read_support_compression_compress(struct archive *a)
139 return (__archive_read_register_compression(a, bid, init));
143 * Test whether we can handle this data.
145 * This logic returns zero if any part of the signature fails. It
146 * also tries to Do The Right Thing if a very short buffer prevents us
147 * from verifying as much as we would like.
149 static int
150 bid(const void *buff, size_t len)
152 const unsigned char *buffer;
153 int bits_checked;
155 if (len < 1)
156 return (0);
158 buffer = buff;
159 bits_checked = 0;
160 if (buffer[0] != 037) /* Verify first ID byte. */
161 return (0);
162 bits_checked += 8;
163 if (len < 2)
164 return (bits_checked);
166 if (buffer[1] != 0235) /* Verify second ID byte. */
167 return (0);
168 bits_checked += 8;
169 if (len < 3)
170 return (bits_checked);
173 * TODO: Verify more.
176 return (bits_checked);
180 * Setup the callbacks.
182 static int
183 init(struct archive *a, const void *buff, size_t n)
185 struct private_data *state;
186 int code;
188 a->compression_code = ARCHIVE_COMPRESSION_COMPRESS;
189 a->compression_name = "compress (.Z)";
191 a->compression_read_ahead = read_ahead;
192 a->compression_read_consume = read_consume;
193 a->compression_finish = finish;
195 state = malloc(sizeof(*state));
196 if (state == NULL) {
197 archive_set_error(a, ENOMEM,
198 "Can't allocate data for %s decompression",
199 a->compression_name);
200 return (ARCHIVE_FATAL);
202 memset(state, 0, sizeof(*state));
204 state->uncompressed_buffer_size = 64 * 1024;
205 state->uncompressed_buffer = malloc(state->uncompressed_buffer_size);
207 if (state->uncompressed_buffer == NULL) {
208 archive_set_error(a, ENOMEM,
209 "Can't allocate %s decompression buffers",
210 a->compression_name);
211 goto fatal;
214 state->next_in = buff;
215 state->avail_in = n;
216 state->read_next = state->next_out = state->uncompressed_buffer;
217 state->avail_out = state->uncompressed_buffer_size;
219 code = getbits(a, state, 8);
220 if (code != 037)
221 goto fatal;
223 code = getbits(a, state, 8);
224 if (code != 0235)
225 goto fatal;
227 code = getbits(a, state, 8);
228 state->maxcode_bits = code & 0x1f;
229 state->maxcode = (1 << state->maxcode_bits);
230 state->use_reset_code = code & 0x80;
232 /* Initialize decompressor. */
233 state->free_ent = 256;
234 state->stackp = state->stack;
235 if (state->use_reset_code)
236 state->free_ent++;
237 state->bits = 9;
238 state->section_end_code = (1<<state->bits) - 1;
239 state->oldcode = -1;
240 for (code = 255; code >= 0; code--) {
241 state->prefix[code] = 0;
242 state->suffix[code] = code;
244 next_code(a, state);
245 a->compression_data = state;
247 return (ARCHIVE_OK);
249 fatal:
250 finish(a);
251 return (ARCHIVE_FATAL);
255 * Return a block of data from the decompression buffer. Decompress more
256 * as necessary.
258 static ssize_t
259 read_ahead(struct archive *a, const void **p, size_t min)
261 struct private_data *state;
262 int read_avail, was_avail, ret;
264 state = a->compression_data;
265 was_avail = -1;
266 if (!a->client_reader) {
267 archive_set_error(a, ARCHIVE_ERRNO_PROGRAMMER,
268 "No read callback is registered? "
269 "This is probably an internal programming error.");
270 return (ARCHIVE_FATAL);
273 read_avail = state->next_out - state->read_next;
275 if (read_avail < (int)min && state->end_of_stream) {
276 if (state->end_of_stream == ARCHIVE_EOF)
277 return (0);
278 else
279 return (-1);
282 if (read_avail < (int)min) {
283 memmove(state->uncompressed_buffer, state->read_next,
284 read_avail);
285 state->read_next = state->uncompressed_buffer;
286 state->next_out = state->read_next + read_avail;
287 state->avail_out
288 = state->uncompressed_buffer_size - read_avail;
290 while (read_avail < (int)state->uncompressed_buffer_size
291 && !state->end_of_stream) {
292 if (state->stackp > state->stack) {
293 *state->next_out++ = *--state->stackp;
294 state->avail_out--;
295 read_avail++;
296 } else {
297 ret = next_code(a, state);
298 if (ret == ARCHIVE_EOF)
299 state->end_of_stream = ret;
300 else if (ret != ARCHIVE_OK)
301 return (ret);
306 *p = state->read_next;
307 return (read_avail);
311 * Mark a previously-returned block of data as read.
313 static ssize_t
314 read_consume(struct archive *a, size_t n)
316 struct private_data *state;
318 state = a->compression_data;
319 a->file_position += n;
320 state->read_next += n;
321 if (state->read_next > state->next_out)
322 __archive_errx(1, "Request to consume too many "
323 "bytes from compress decompressor");
324 return (n);
328 * Clean up the decompressor.
330 static int
331 finish(struct archive *a)
333 struct private_data *state;
334 int ret;
336 state = a->compression_data;
337 ret = ARCHIVE_OK;
339 free(state->uncompressed_buffer);
340 free(state);
342 a->compression_data = NULL;
343 if (a->client_closer != NULL)
344 (a->client_closer)(a, a->client_data);
346 return (ret);
350 * Process the next code and fill the stack with the expansion
351 * of the code. Returns ARCHIVE_FATAL if there is a fatal I/O or
352 * format error, ARCHIVE_EOF if we hit end of data, ARCHIVE_OK otherwise.
354 static int
355 next_code(struct archive *a, struct private_data *state)
357 int code, newcode;
359 static int debug_buff[1024];
360 static unsigned debug_index;
362 code = newcode = getbits(a, state, state->bits);
363 if (code < 0)
364 return (code);
366 debug_buff[debug_index++] = code;
367 if (debug_index >= sizeof(debug_buff)/sizeof(debug_buff[0]))
368 debug_index = 0;
370 /* If it's a reset code, reset the dictionary. */
371 if ((code == 256) && state->use_reset_code) {
373 * The original 'compress' implementation blocked its
374 * I/O in a manner that resulted in junk bytes being
375 * inserted after every reset. The next section skips
376 * this junk. (Yes, the number of *bytes* to skip is
377 * a function of the current *bit* length.)
379 int skip_bytes = state->bits -
380 (state->bytes_in_section % state->bits);
381 skip_bytes %= state->bits;
382 state->bits_avail = 0; /* Discard rest of this byte. */
383 while (skip_bytes-- > 0) {
384 code = getbits(a, state, 8);
385 if (code < 0)
386 return (code);
388 /* Now, actually do the reset. */
389 state->bytes_in_section = 0;
390 state->bits = 9;
391 state->section_end_code = (1 << state->bits) - 1;
392 state->free_ent = 257;
393 state->oldcode = -1;
394 return (next_code(a, state));
397 if (code > state->free_ent) {
398 /* An invalid code is a fatal error. */
399 archive_set_error(a, -1, "Invalid compressed data");
400 return (ARCHIVE_FATAL);
403 /* Special case for KwKwK string. */
404 if (code >= state->free_ent) {
405 *state->stackp++ = state->finbyte;
406 code = state->oldcode;
409 /* Generate output characters in reverse order. */
410 while (code >= 256) {
411 *state->stackp++ = state->suffix[code];
412 code = state->prefix[code];
414 *state->stackp++ = state->finbyte = code;
416 /* Generate the new entry. */
417 code = state->free_ent;
418 if (code < state->maxcode && state->oldcode >= 0) {
419 state->prefix[code] = state->oldcode;
420 state->suffix[code] = state->finbyte;
421 ++state->free_ent;
423 if (state->free_ent > state->section_end_code) {
424 state->bits++;
425 state->bytes_in_section = 0;
426 if (state->bits == state->maxcode_bits)
427 state->section_end_code = state->maxcode;
428 else
429 state->section_end_code = (1 << state->bits) - 1;
432 /* Remember previous code. */
433 state->oldcode = newcode;
434 return (ARCHIVE_OK);
438 * Return next 'n' bits from stream.
440 * -1 indicates end of available data.
442 static int
443 getbits(struct archive *a, struct private_data *state, int n)
445 int code, ret;
446 static const int mask[] = {
447 0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff,
448 0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff
452 while (state->bits_avail < n) {
453 if (state->avail_in <= 0) {
454 ret = (a->client_reader)(a, a->client_data,
455 (const void **)&state->next_in);
456 if (ret < 0)
457 return (ARCHIVE_FATAL);
458 if (ret == 0)
459 return (ARCHIVE_EOF);
460 a->raw_position += ret;
461 state->avail_in = ret;
463 state->bit_buffer |= *state->next_in++ << state->bits_avail;
464 state->avail_in--;
465 state->bits_avail += 8;
466 state->bytes_in_section++;
469 code = state->bit_buffer;
470 state->bit_buffer >>= n;
471 state->bits_avail -= n;
473 return (code & mask[n]);