2 * Copyright (c) 2003-2007 Tim Kientzle
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 * This code borrows heavily from "compress" source code, which is
28 * protected by the following copyright. (Clause 3 dropped by request
33 * Copyright (c) 1985, 1986, 1992, 1993
34 * The Regents of the University of California. All rights reserved.
36 * This code is derived from software contributed to Berkeley by
37 * Diomidis Spinellis and James A. Woods, derived from original
38 * work by Spencer Thomas and Joseph Orost.
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
66 #include "archive_platform.h"
67 __FBSDID("$FreeBSD: src/lib/libarchive/archive_read_support_compression_compress.c,v 1.9 2007/04/05 05:18:16 kientzle Exp $");
83 #include "archive_private.h"
84 #include "archive_read_private.h"
87 * Because LZW decompression is pretty simple, I've just implemented
88 * the whole decompressor here (cribbing from "compress" source code,
89 * of course), rather than relying on an external library. I have
90 * made an effort to clarify and simplify the algorithm, so the
91 * names and structure here don't exactly match those used by compress.
95 /* Input variables. */
96 const unsigned char *next_in
;
100 size_t bytes_in_section
;
102 /* Output variables. */
103 size_t uncompressed_buffer_size
;
104 void *uncompressed_buffer
;
105 unsigned char *read_next
; /* Data for client. */
106 unsigned char *next_out
; /* Where to write new data. */
107 size_t avail_out
; /* Space at end of buffer. */
109 /* Decompression status variables. */
111 int end_of_stream
; /* EOF status. */
112 int maxcode
; /* Largest code. */
113 int maxcode_bits
; /* Length of largest code. */
114 int section_end_code
; /* When to increase bits. */
115 int bits
; /* Current code length. */
116 int oldcode
; /* Previous code. */
117 int finbyte
; /* Last byte of prev code. */
120 int free_ent
; /* Next dictionary entry. */
121 unsigned char suffix
[65536];
122 uint16_t prefix
[65536];
125 * Scratch area for expanding dictionary entries. Note:
126 * "worst" case here comes from compressing /dev/zero: the
127 * last code in the dictionary will code a sequence of
128 * 65536-256 zero bytes. Thus, we need stack space to expand
129 * a 65280-byte dictionary entry. (Of course, 32640:1
130 * compression could also be considered the "best" case. ;-)
132 unsigned char *stackp
;
133 unsigned char stack
[65300];
136 static int bid(const void *, size_t);
137 static int finish(struct archive_read
*);
138 static int init(struct archive_read
*, const void *, size_t);
139 static ssize_t
read_ahead(struct archive_read
*, const void **, size_t);
140 static ssize_t
read_consume(struct archive_read
*, size_t);
141 static int getbits(struct archive_read
*, struct private_data
*, int n
);
142 static int next_code(struct archive_read
*a
, struct private_data
*state
);
145 archive_read_support_compression_compress(struct archive
*_a
)
147 struct archive_read
*a
= (struct archive_read
*)_a
;
148 if (__archive_read_register_compression(a
, bid
, init
) != NULL
)
150 return (ARCHIVE_FATAL
);
154 * Test whether we can handle this data.
156 * This logic returns zero if any part of the signature fails. It
157 * also tries to Do The Right Thing if a very short buffer prevents us
158 * from verifying as much as we would like.
161 bid(const void *buff
, size_t len
)
163 const unsigned char *buffer
;
169 buffer
= (const unsigned char *)buff
;
171 if (buffer
[0] != 037) /* Verify first ID byte. */
175 return (bits_checked
);
177 if (buffer
[1] != 0235) /* Verify second ID byte. */
181 return (bits_checked
);
187 return (bits_checked
);
191 * Setup the callbacks.
194 init(struct archive_read
*a
, const void *buff
, size_t n
)
196 struct private_data
*state
;
199 a
->archive
.compression_code
= ARCHIVE_COMPRESSION_COMPRESS
;
200 a
->archive
.compression_name
= "compress (.Z)";
202 a
->decompressor
->read_ahead
= read_ahead
;
203 a
->decompressor
->consume
= read_consume
;
204 a
->decompressor
->skip
= NULL
; /* not supported */
205 a
->decompressor
->finish
= finish
;
207 state
= (struct private_data
*)malloc(sizeof(*state
));
209 archive_set_error(&a
->archive
, ENOMEM
,
210 "Can't allocate data for %s decompression",
211 a
->archive
.compression_name
);
212 return (ARCHIVE_FATAL
);
214 memset(state
, 0, sizeof(*state
));
215 a
->decompressor
->data
= state
;
217 state
->uncompressed_buffer_size
= 64 * 1024;
218 state
->uncompressed_buffer
= malloc(state
->uncompressed_buffer_size
);
220 if (state
->uncompressed_buffer
== NULL
) {
221 archive_set_error(&a
->archive
, ENOMEM
,
222 "Can't allocate %s decompression buffers",
223 a
->archive
.compression_name
);
227 state
->next_in
= (const unsigned char *)buff
;
229 state
->read_next
= state
->next_out
= (unsigned char *)state
->uncompressed_buffer
;
230 state
->avail_out
= state
->uncompressed_buffer_size
;
232 code
= getbits(a
, state
, 8);
233 if (code
!= 037) /* This should be impossible. */
236 code
= getbits(a
, state
, 8);
238 /* This can happen if the library is receiving 1-byte
239 * blocks and gzip and compress are both enabled.
240 * You can't distinguish gzip and compress only from
242 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_FILE_FORMAT
,
243 "Compress signature did not match.");
247 code
= getbits(a
, state
, 8);
248 state
->maxcode_bits
= code
& 0x1f;
249 state
->maxcode
= (1 << state
->maxcode_bits
);
250 state
->use_reset_code
= code
& 0x80;
252 /* Initialize decompressor. */
253 state
->free_ent
= 256;
254 state
->stackp
= state
->stack
;
255 if (state
->use_reset_code
)
258 state
->section_end_code
= (1<<state
->bits
) - 1;
260 for (code
= 255; code
>= 0; code
--) {
261 state
->prefix
[code
] = 0;
262 state
->suffix
[code
] = code
;
269 return (ARCHIVE_FATAL
);
273 * Return a block of data from the decompression buffer. Decompress more
277 read_ahead(struct archive_read
*a
, const void **p
, size_t min
)
279 struct private_data
*state
;
283 state
= (struct private_data
*)a
->decompressor
->data
;
284 if (!a
->client_reader
) {
285 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_PROGRAMMER
,
286 "No read callback is registered? "
287 "This is probably an internal programming error.");
288 return (ARCHIVE_FATAL
);
291 read_avail
= state
->next_out
- state
->read_next
;
293 if (read_avail
< min
&& state
->end_of_stream
) {
294 if (state
->end_of_stream
== ARCHIVE_EOF
)
300 if (read_avail
< min
) {
301 memmove(state
->uncompressed_buffer
, state
->read_next
,
303 state
->read_next
= (unsigned char *)state
->uncompressed_buffer
;
304 state
->next_out
= state
->read_next
+ read_avail
;
306 = state
->uncompressed_buffer_size
- read_avail
;
308 while (read_avail
< state
->uncompressed_buffer_size
309 && !state
->end_of_stream
) {
310 if (state
->stackp
> state
->stack
) {
311 *state
->next_out
++ = *--state
->stackp
;
315 ret
= next_code(a
, state
);
316 if (ret
== ARCHIVE_EOF
)
317 state
->end_of_stream
= ret
;
318 else if (ret
!= ARCHIVE_OK
)
324 *p
= state
->read_next
;
329 * Mark a previously-returned block of data as read.
332 read_consume(struct archive_read
*a
, size_t n
)
334 struct private_data
*state
;
336 state
= (struct private_data
*)a
->decompressor
->data
;
337 a
->archive
.file_position
+= n
;
338 state
->read_next
+= n
;
339 if (state
->read_next
> state
->next_out
)
340 __archive_errx(1, "Request to consume too many "
341 "bytes from compress decompressor");
346 * Clean up the decompressor.
349 finish(struct archive_read
*a
)
351 struct private_data
*state
;
352 int ret
= ARCHIVE_OK
;
354 state
= (struct private_data
*)a
->decompressor
->data
;
357 if (state
->uncompressed_buffer
!= NULL
)
358 free(state
->uncompressed_buffer
);
362 a
->decompressor
->data
= NULL
;
367 * Process the next code and fill the stack with the expansion
368 * of the code. Returns ARCHIVE_FATAL if there is a fatal I/O or
369 * format error, ARCHIVE_EOF if we hit end of data, ARCHIVE_OK otherwise.
372 next_code(struct archive_read
*a
, struct private_data
*state
)
376 static int debug_buff
[1024];
377 static unsigned debug_index
;
379 code
= newcode
= getbits(a
, state
, state
->bits
);
383 debug_buff
[debug_index
++] = code
;
384 if (debug_index
>= sizeof(debug_buff
)/sizeof(debug_buff
[0]))
387 /* If it's a reset code, reset the dictionary. */
388 if ((code
== 256) && state
->use_reset_code
) {
390 * The original 'compress' implementation blocked its
391 * I/O in a manner that resulted in junk bytes being
392 * inserted after every reset. The next section skips
393 * this junk. (Yes, the number of *bytes* to skip is
394 * a function of the current *bit* length.)
396 int skip_bytes
= state
->bits
-
397 (state
->bytes_in_section
% state
->bits
);
398 skip_bytes
%= state
->bits
;
399 state
->bits_avail
= 0; /* Discard rest of this byte. */
400 while (skip_bytes
-- > 0) {
401 code
= getbits(a
, state
, 8);
405 /* Now, actually do the reset. */
406 state
->bytes_in_section
= 0;
408 state
->section_end_code
= (1 << state
->bits
) - 1;
409 state
->free_ent
= 257;
411 return (next_code(a
, state
));
414 if (code
> state
->free_ent
) {
415 /* An invalid code is a fatal error. */
416 archive_set_error(&a
->archive
, -1, "Invalid compressed data");
417 return (ARCHIVE_FATAL
);
420 /* Special case for KwKwK string. */
421 if (code
>= state
->free_ent
) {
422 *state
->stackp
++ = state
->finbyte
;
423 code
= state
->oldcode
;
426 /* Generate output characters in reverse order. */
427 while (code
>= 256) {
428 *state
->stackp
++ = state
->suffix
[code
];
429 code
= state
->prefix
[code
];
431 *state
->stackp
++ = state
->finbyte
= code
;
433 /* Generate the new entry. */
434 code
= state
->free_ent
;
435 if (code
< state
->maxcode
&& state
->oldcode
>= 0) {
436 state
->prefix
[code
] = state
->oldcode
;
437 state
->suffix
[code
] = state
->finbyte
;
440 if (state
->free_ent
> state
->section_end_code
) {
442 state
->bytes_in_section
= 0;
443 if (state
->bits
== state
->maxcode_bits
)
444 state
->section_end_code
= state
->maxcode
;
446 state
->section_end_code
= (1 << state
->bits
) - 1;
449 /* Remember previous code. */
450 state
->oldcode
= newcode
;
455 * Return next 'n' bits from stream.
457 * -1 indicates end of available data.
460 getbits(struct archive_read
*a
, struct private_data
*state
, int n
)
463 static const int mask
[] = {
464 0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff,
465 0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff
467 const void *read_buf
;
469 while (state
->bits_avail
< n
) {
470 if (state
->avail_in
<= 0) {
471 read_buf
= state
->next_in
;
472 ret
= (a
->client_reader
)(&a
->archive
, a
->client_data
,
474 state
->next_in
= read_buf
;
476 return (ARCHIVE_FATAL
);
478 return (ARCHIVE_EOF
);
479 a
->archive
.raw_position
+= ret
;
480 state
->avail_in
= ret
;
482 state
->bit_buffer
|= *state
->next_in
++ << state
->bits_avail
;
484 state
->bits_avail
+= 8;
485 state
->bytes_in_section
++;
488 code
= state
->bit_buffer
;
489 state
->bit_buffer
>>= n
;
490 state
->bits_avail
-= n
;
492 return (code
& mask
[n
]);