2 * Copyright (c) 2003-2004 Tim Kientzle
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 * This code borrows heavily from "compress" source code, which is
29 * protected by the following copyright. (Clause 3 dropped by request
34 * Copyright (c) 1985, 1986, 1992, 1993
35 * The Regents of the University of California. All rights reserved.
37 * This code is derived from software contributed to Berkeley by
38 * Diomidis Spinellis and James A. Woods, derived from original
39 * work by Spencer Thomas and Joseph Orost.
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
67 #include "archive_platform.h"
68 __FBSDID("$FreeBSD: src/lib/libarchive/archive_read_support_compression_compress.c,v 1.3 2004/10/17 23:40:10 kientzle Exp $");
76 #include "archive_private.h"
79 * Because LZW decompression is pretty simple, I've just implemented
80 * the whole decompressor here (cribbing from "compress" source code,
81 * of course), rather than relying on an external library. I have
82 * made an effort to clarify and simplify the algorithm, so the
83 * names and structure here don't exactly match those used by compress.
87 /* Input variables. */
88 const unsigned char *next_in
;
92 size_t bytes_in_section
;
94 /* Output variables. */
95 size_t uncompressed_buffer_size
;
96 void *uncompressed_buffer
;
97 unsigned char *read_next
; /* Data for client. */
98 unsigned char *next_out
; /* Where to write new data. */
99 size_t avail_out
; /* Space at end of buffer. */
101 /* Decompression status variables. */
103 int end_of_stream
; /* EOF status. */
104 int maxcode
; /* Largest code. */
105 int maxcode_bits
; /* Length of largest code. */
106 int section_end_code
; /* When to increase bits. */
107 int bits
; /* Current code length. */
108 int oldcode
; /* Previous code. */
109 int finbyte
; /* Last byte of prev code. */
112 int free_ent
; /* Next dictionary entry. */
113 unsigned char suffix
[65536];
114 uint16_t prefix
[65536];
117 * Scratch area for expanding dictionary entries. Note:
118 * "worst" case here comes from compressing /dev/zero: the
119 * last code in the dictionary will code a sequence of
120 * 65536-256 zero bytes. Thus, we need stack space to expand
121 * a 65280-byte dictionary entry. (Of course, 32640:1
122 * compression could also be considered the "best" case. ;-)
124 unsigned char *stackp
;
125 unsigned char stack
[65300];
128 static int bid(const void *, size_t);
129 static int finish(struct archive
*);
130 static int init(struct archive
*, const void *, size_t);
131 static ssize_t
read_ahead(struct archive
*, const void **, size_t);
132 static ssize_t
read_consume(struct archive
*, size_t);
133 static int getbits(struct archive
*, struct private_data
*, int n
);
134 static int next_code(struct archive
*a
, struct private_data
*state
);
137 archive_read_support_compression_compress(struct archive
*a
)
139 return (__archive_read_register_compression(a
, bid
, init
));
143 * Test whether we can handle this data.
145 * This logic returns zero if any part of the signature fails. It
146 * also tries to Do The Right Thing if a very short buffer prevents us
147 * from verifying as much as we would like.
150 bid(const void *buff
, size_t len
)
152 const unsigned char *buffer
;
160 if (buffer
[0] != 037) /* Verify first ID byte. */
164 return (bits_checked
);
166 if (buffer
[1] != 0235) /* Verify second ID byte. */
170 return (bits_checked
);
176 return (bits_checked
);
180 * Setup the callbacks.
183 init(struct archive
*a
, const void *buff
, size_t n
)
185 struct private_data
*state
;
188 a
->compression_code
= ARCHIVE_COMPRESSION_COMPRESS
;
189 a
->compression_name
= "compress (.Z)";
191 a
->compression_read_ahead
= read_ahead
;
192 a
->compression_read_consume
= read_consume
;
193 a
->compression_finish
= finish
;
195 state
= malloc(sizeof(*state
));
197 archive_set_error(a
, ENOMEM
,
198 "Can't allocate data for %s decompression",
199 a
->compression_name
);
200 return (ARCHIVE_FATAL
);
202 memset(state
, 0, sizeof(*state
));
204 state
->uncompressed_buffer_size
= 64 * 1024;
205 state
->uncompressed_buffer
= malloc(state
->uncompressed_buffer_size
);
207 if (state
->uncompressed_buffer
== NULL
) {
208 archive_set_error(a
, ENOMEM
,
209 "Can't allocate %s decompression buffers",
210 a
->compression_name
);
214 state
->next_in
= buff
;
216 state
->read_next
= state
->next_out
= state
->uncompressed_buffer
;
217 state
->avail_out
= state
->uncompressed_buffer_size
;
219 code
= getbits(a
, state
, 8);
223 code
= getbits(a
, state
, 8);
227 code
= getbits(a
, state
, 8);
228 state
->maxcode_bits
= code
& 0x1f;
229 state
->maxcode
= (1 << state
->maxcode_bits
);
230 state
->use_reset_code
= code
& 0x80;
232 /* Initialize decompressor. */
233 state
->free_ent
= 256;
234 state
->stackp
= state
->stack
;
235 if (state
->use_reset_code
)
238 state
->section_end_code
= (1<<state
->bits
) - 1;
240 for (code
= 255; code
>= 0; code
--) {
241 state
->prefix
[code
] = 0;
242 state
->suffix
[code
] = code
;
245 a
->compression_data
= state
;
251 return (ARCHIVE_FATAL
);
255 * Return a block of data from the decompression buffer. Decompress more
259 read_ahead(struct archive
*a
, const void **p
, size_t min
)
261 struct private_data
*state
;
262 int read_avail
, was_avail
, ret
;
264 state
= a
->compression_data
;
266 if (!a
->client_reader
) {
267 archive_set_error(a
, ARCHIVE_ERRNO_PROGRAMMER
,
268 "No read callback is registered? "
269 "This is probably an internal programming error.");
270 return (ARCHIVE_FATAL
);
273 read_avail
= state
->next_out
- state
->read_next
;
275 if (read_avail
< (int)min
&& state
->end_of_stream
) {
276 if (state
->end_of_stream
== ARCHIVE_EOF
)
282 if (read_avail
< (int)min
) {
283 memmove(state
->uncompressed_buffer
, state
->read_next
,
285 state
->read_next
= state
->uncompressed_buffer
;
286 state
->next_out
= state
->read_next
+ read_avail
;
288 = state
->uncompressed_buffer_size
- read_avail
;
290 while (read_avail
< (int)state
->uncompressed_buffer_size
291 && !state
->end_of_stream
) {
292 if (state
->stackp
> state
->stack
) {
293 *state
->next_out
++ = *--state
->stackp
;
297 ret
= next_code(a
, state
);
298 if (ret
== ARCHIVE_EOF
)
299 state
->end_of_stream
= ret
;
300 else if (ret
!= ARCHIVE_OK
)
306 *p
= state
->read_next
;
311 * Mark a previously-returned block of data as read.
314 read_consume(struct archive
*a
, size_t n
)
316 struct private_data
*state
;
318 state
= a
->compression_data
;
319 a
->file_position
+= n
;
320 state
->read_next
+= n
;
321 if (state
->read_next
> state
->next_out
)
322 __archive_errx(1, "Request to consume too many "
323 "bytes from compress decompressor");
328 * Clean up the decompressor.
331 finish(struct archive
*a
)
333 struct private_data
*state
;
336 state
= a
->compression_data
;
339 free(state
->uncompressed_buffer
);
342 a
->compression_data
= NULL
;
343 if (a
->client_closer
!= NULL
)
344 (a
->client_closer
)(a
, a
->client_data
);
350 * Process the next code and fill the stack with the expansion
351 * of the code. Returns ARCHIVE_FATAL if there is a fatal I/O or
352 * format error, ARCHIVE_EOF if we hit end of data, ARCHIVE_OK otherwise.
355 next_code(struct archive
*a
, struct private_data
*state
)
359 static int debug_buff
[1024];
360 static unsigned debug_index
;
362 code
= newcode
= getbits(a
, state
, state
->bits
);
366 debug_buff
[debug_index
++] = code
;
367 if (debug_index
>= sizeof(debug_buff
)/sizeof(debug_buff
[0]))
370 /* If it's a reset code, reset the dictionary. */
371 if ((code
== 256) && state
->use_reset_code
) {
373 * The original 'compress' implementation blocked its
374 * I/O in a manner that resulted in junk bytes being
375 * inserted after every reset. The next section skips
376 * this junk. (Yes, the number of *bytes* to skip is
377 * a function of the current *bit* length.)
379 int skip_bytes
= state
->bits
-
380 (state
->bytes_in_section
% state
->bits
);
381 skip_bytes
%= state
->bits
;
382 state
->bits_avail
= 0; /* Discard rest of this byte. */
383 while (skip_bytes
-- > 0) {
384 code
= getbits(a
, state
, 8);
388 /* Now, actually do the reset. */
389 state
->bytes_in_section
= 0;
391 state
->section_end_code
= (1 << state
->bits
) - 1;
392 state
->free_ent
= 257;
394 return (next_code(a
, state
));
397 if (code
> state
->free_ent
) {
398 /* An invalid code is a fatal error. */
399 archive_set_error(a
, -1, "Invalid compressed data");
400 return (ARCHIVE_FATAL
);
403 /* Special case for KwKwK string. */
404 if (code
>= state
->free_ent
) {
405 *state
->stackp
++ = state
->finbyte
;
406 code
= state
->oldcode
;
409 /* Generate output characters in reverse order. */
410 while (code
>= 256) {
411 *state
->stackp
++ = state
->suffix
[code
];
412 code
= state
->prefix
[code
];
414 *state
->stackp
++ = state
->finbyte
= code
;
416 /* Generate the new entry. */
417 code
= state
->free_ent
;
418 if (code
< state
->maxcode
&& state
->oldcode
>= 0) {
419 state
->prefix
[code
] = state
->oldcode
;
420 state
->suffix
[code
] = state
->finbyte
;
423 if (state
->free_ent
> state
->section_end_code
) {
425 state
->bytes_in_section
= 0;
426 if (state
->bits
== state
->maxcode_bits
)
427 state
->section_end_code
= state
->maxcode
;
429 state
->section_end_code
= (1 << state
->bits
) - 1;
432 /* Remember previous code. */
433 state
->oldcode
= newcode
;
438 * Return next 'n' bits from stream.
440 * -1 indicates end of available data.
443 getbits(struct archive
*a
, struct private_data
*state
, int n
)
446 static const int mask
[] = {
447 0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff,
448 0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff
452 while (state
->bits_avail
< n
) {
453 if (state
->avail_in
<= 0) {
454 ret
= (a
->client_reader
)(a
, a
->client_data
,
455 (const void **)&state
->next_in
);
457 return (ARCHIVE_FATAL
);
459 return (ARCHIVE_EOF
);
460 a
->raw_position
+= ret
;
461 state
->avail_in
= ret
;
463 state
->bit_buffer
|= *state
->next_in
++ << state
->bits_avail
;
465 state
->bits_avail
+= 8;
466 state
->bytes_in_section
++;
469 code
= state
->bit_buffer
;
470 state
->bit_buffer
>>= n
;
471 state
->bits_avail
-= n
;
473 return (code
& mask
[n
]);