1 /* gun.c -- simple gunzip to give an example of the use of inflateBack()
2 * Copyright (C) 2003, 2005 Mark Adler
3 * For conditions of distribution and use, see copyright notice in zlib.h
4 Version 1.3 12 June 2005 Mark Adler */
7 1.0 16 Feb 2003 First version for testing of inflateBack()
8 1.1 21 Feb 2005 Decompress concatenated gzip streams
9 Remove use of "this" variable (C++ keyword)
10 Fix return value for in()
11 Improve allocation failure checking
12 Add typecasting for void * structures
13 Add -h option for command version and usage
14 Add a bunch of comments
15 1.2 20 Mar 2005 Add Unix compress (LZW) decompression
16 Copy file attributes from input file to output file
17 1.3 12 Jun 2005 Add casts for error messages [Oberhumer]
21 gun [ -t ] [ name ... ]
23 decompresses the data in the named gzip files. If no arguments are given,
24 gun will decompress from stdin to stdout. The names must end in .gz, -gz,
25 .z, -z, _z, or .Z. The uncompressed data will be written to a file name
26 with the suffix stripped. On success, the original file is deleted. On
27 failure, the output file is deleted. For most failures, the command will
28 continue to process the remaining names on the command line. A memory
29 allocation failure will abort the command. If -t is specified, then the
30 listed files or stdin will be tested as gzip files for integrity (without
31 checking for a proper suffix), no output will be written, and no files
34 Like gzip, gun allows concatenated gzip streams and will decompress them,
35 writing all of the uncompressed data to the output. Unlike gzip, gun allows
36 an empty file on input, and will produce no error writing an empty output
39 gun will also decompress files made by Unix compress, which uses LZW
40 compression. These files are automatically detected by virtue of their
41 magic header bytes. Since the end of Unix compress stream is marked by the
42 end-of-file, they cannot be concantenated. If a Unix compress stream is
43 encountered in an input file, it is the last stream in that file.
45 Like gunzip and uncompress, the file attributes of the orignal compressed
46 file are maintained in the final uncompressed file, to the extent that the
47 user permissions allow it.
49 On my Mac OS X PowerPC G4, gun is almost twice as fast as gunzip (version
50 1.2.4) is on the same file, when gun is linked with zlib 1.2.2. Also the
51 LZW decompression provided by gun is about twice as fast as the standard
52 Unix uncompress command.
55 /* external functions and related types and constants */
56 #include <stdio.h> /* fprintf() */
57 #include <stdlib.h> /* malloc(), free() */
58 #include <string.h> /* strerror(), strcmp(), strlen(), memcpy() */
59 #include <errno.h> /* errno */
60 #include <fcntl.h> /* open() */
61 #include <unistd.h> /* read(), write(), close(), chown(), unlink() */
62 #include <sys/types.h>
63 #include <sys/stat.h> /* stat(), chmod() */
64 #include <utime.h> /* utime() */
65 #include "zlib.h" /* inflateBackInit(), inflateBack(), */
66 /* inflateBackEnd(), crc32() */
68 /* function declaration */
71 /* buffer constants */
72 #define SIZE 32768U /* input and output buffer sizes */
73 #define PIECE 16384 /* limits i/o chunks for 16-bit int case */
75 /* structure for infback() to pass to input function in() -- it maintains the
76 input file and a buffer of size SIZE */
82 /* Load input buffer, assumed to be empty, and return bytes loaded and a
83 pointer to them. read() is called until the buffer is full, or until it
84 returns end-of-file or error. Return 0 on error. */
85 local
unsigned in(void *in_desc
, unsigned char **buf
)
90 struct ind
*me
= (struct ind
*)in_desc
;
97 if ((unsigned)ret
> SIZE
- len
)
98 ret
= (int)(SIZE
- len
);
99 ret
= (int)read(me
->infile
, next
, ret
);
106 } while (ret
!= 0 && len
< SIZE
);
110 /* structure for infback() to pass to output function out() -- it maintains the
111 output file, a running CRC-32 check on the output and the total number of
112 bytes output, both for checking against the gzip trailer. (The length in
113 the gzip trailer is stored modulo 2^32, so it's ok if a long is 32 bits and
114 the output is greater than 4 GB.) */
117 int check
; /* true if checking crc and total */
122 /* Write output buffer and update the CRC-32 and total bytes written. write()
123 is called until all of the output is written or an error is encountered.
124 On success out() returns 0. For a write failure, out() returns 1. If the
125 output file descriptor is -1, then nothing is written.
127 local
int out(void *out_desc
, unsigned char *buf
, unsigned len
)
130 struct outd
*me
= (struct outd
*)out_desc
;
133 me
->crc
= crc32(me
->crc
, buf
, len
);
136 if (me
->outfile
!= -1)
139 if ((unsigned)ret
> len
)
141 ret
= (int)write(me
->outfile
, buf
, ret
);
150 /* next input byte macro for use inside lunpipe() and gunpipe() */
151 #define NEXT() (have ? 0 : (have = in(indp, &next)), \
152 last = have ? (have--, (int)(*next++)) : -1)
154 /* memory for gunpipe() and lunpipe() --
155 the first 256 entries of prefix[] and suffix[] are never used, could
156 have offset the index, but it's faster to waste the memory */
157 unsigned char inbuf
[SIZE
]; /* input buffer */
158 unsigned char outbuf
[SIZE
]; /* output buffer */
159 unsigned short prefix
[65536]; /* index to LZW prefix string */
160 unsigned char suffix
[65536]; /* one-character LZW suffix */
161 unsigned char match
[65280 + 2]; /* buffer for reversed match or gzip
162 32K sliding window */
164 /* throw out what's left in the current bits byte buffer (this is a vestigial
165 aspect of the compressed data format derived from an implementation that
166 made use of a special VAX machine instruction!) */
167 #define FLUSHCODE() \
171 if (chunk > have) { \
177 if (chunk > have) { \
187 /* Decompress a compress (LZW) file from indp to outfile. The compress magic
188 header (two bytes) has already been read and verified. There are have bytes
189 of buffered input at next. strm is used for passing error information back
192 lunpipe() will return Z_OK on success, Z_BUF_ERROR for an unexpected end of
193 file, read error, or write error (a write error indicated by strm->next_in
194 not equal to Z_NULL), or Z_DATA_ERROR for invalid input.
196 local
int lunpipe(unsigned have
, unsigned char *next
, struct ind
*indp
,
197 int outfile
, z_stream
*strm
)
199 int last
; /* last byte read by NEXT(), or -1 if EOF */
200 int chunk
; /* bytes left in current chunk */
201 int left
; /* bits left in rem */
202 unsigned rem
; /* unused bits from input */
203 int bits
; /* current bits per code */
204 unsigned code
; /* code, table traversal index */
205 unsigned mask
; /* mask for current bits codes */
206 int max
; /* maximum bits per code for this stream */
207 int flags
; /* compress flags, then block compress flag */
208 unsigned end
; /* last valid entry in prefix/suffix tables */
209 unsigned temp
; /* current code */
210 unsigned prev
; /* previous code */
211 unsigned final
; /* last character written for previous code */
212 unsigned stack
; /* next position for reversed string */
213 unsigned outcnt
; /* bytes in output buffer */
214 struct outd outd
; /* output structure */
217 outd
.outfile
= outfile
;
220 /* process remainder of compress header -- a flags byte */
225 strm
->msg
= (char *)"unknown lzw flags set";
229 if (max
< 9 || max
> 16) {
230 strm
->msg
= (char *)"lzw bits out of range";
233 if (max
== 9) /* 9 doesn't really mean 9 */
235 flags
&= 0x80; /* true if block compress */
240 end
= flags
? 256 : 255;
242 /* set up: get first 9-bit code, which is the first decompressed byte, but
243 don't create a table entry until the next code */
244 if (NEXT() == -1) /* no compressed data is ok */
246 final
= prev
= (unsigned)last
; /* low 8 bits of code */
247 if (NEXT() == -1) /* missing a bit */
249 if (last
& 1) { /* code must be < 256 */
250 strm
->msg
= (char *)"invalid lzw code";
253 rem
= (unsigned)last
>> 1; /* remaining 7 bits */
255 chunk
= bits
- 2; /* 7 bytes left in this chunk */
256 outbuf
[0] = (unsigned char)final
; /* write first decompressed byte */
262 /* if the table will be full after this, increment the code size */
263 if (end
>= mask
&& bits
< max
) {
270 /* get a code of length bits */
271 if (chunk
== 0) /* decrement chunk modulo bits */
273 code
= rem
; /* low bits of code */
274 if (NEXT() == -1) { /* EOF is end of compressed data */
275 /* write remaining buffered output */
276 if (outcnt
&& out(&outd
, outbuf
, outcnt
)) {
277 strm
->next_in
= outbuf
; /* signal write error */
282 code
+= (unsigned)last
<< left
; /* middle (or high) bits of code */
285 if (bits
> left
) { /* need more bits */
286 if (NEXT() == -1) /* can't end in middle of code */
288 code
+= (unsigned)last
<< left
; /* high bits of code */
292 code
&= mask
; /* mask to current code length */
293 left
-= bits
; /* number of unused bits */
294 rem
= (unsigned)last
>> (8 - left
); /* unused bits from last byte */
296 /* process clear code (256) */
297 if (code
== 256 && flags
) {
299 bits
= 9; /* initialize bits and mask */
301 end
= 255; /* empty table */
302 continue; /* get next code */
305 /* special code to reuse last match */
306 temp
= code
; /* save the current code */
308 /* Be picky on the allowed code here, and make sure that the code
309 we drop through (prev) will be a valid index so that random
310 input does not cause an exception. The code != end + 1 check is
311 empirically derived, and not checked in the original uncompress
312 code. If this ever causes a problem, that check could be safely
313 removed. Leaving this check in greatly improves gun's ability
314 to detect random or corrupted input after a compress header.
315 In any case, the prev > end check must be retained. */
316 if (code
!= end
+ 1 || prev
> end
) {
317 strm
->msg
= (char *)"invalid lzw code";
320 match
[stack
++] = (unsigned char)final
;
324 /* walk through linked list to generate output in reverse order */
325 while (code
>= 256) {
326 match
[stack
++] = suffix
[code
];
329 match
[stack
++] = (unsigned char)code
;
332 /* link new table entry */
335 prefix
[end
] = (unsigned short)prev
;
336 suffix
[end
] = (unsigned char)final
;
339 /* set previous code for next iteration */
342 /* write output in forward order */
343 while (stack
> SIZE
- outcnt
) {
344 while (outcnt
< SIZE
)
345 outbuf
[outcnt
++] = match
[--stack
];
346 if (out(&outd
, outbuf
, outcnt
)) {
347 strm
->next_in
= outbuf
; /* signal write error */
353 outbuf
[outcnt
++] = match
[--stack
];
356 /* loop for next code with final and prev as the last match, rem and
357 left provide the first 0..7 bits of the next code, end is the last
362 /* Decompress a gzip file from infile to outfile. strm is assumed to have been
363 successfully initialized with inflateBackInit(). The input file may consist
364 of a series of gzip streams, in which case all of them will be decompressed
365 to the output file. If outfile is -1, then the gzip stream(s) integrity is
366 checked and nothing is written.
368 The return value is a zlib error code: Z_MEM_ERROR if out of memory,
369 Z_DATA_ERROR if the header or the compressed data is invalid, or if the
370 trailer CRC-32 check or length doesn't match, Z_BUF_ERROR if the input ends
371 prematurely or a write error occurs, or Z_ERRNO if junk (not a another gzip
372 stream) follows a valid gzip stream.
374 local
int gunpipe(z_stream
*strm
, int infile
, int outfile
)
376 int ret
, first
, last
;
377 unsigned have
, flags
, len
;
379 struct ind ind
, *indp
;
382 /* setup input buffer */
387 /* decompress concatenated gzip streams */
388 have
= 0; /* no input data read in yet */
389 first
= 1; /* looking for first gzip header */
390 strm
->next_in
= Z_NULL
; /* so Z_BUF_ERROR means EOF */
392 /* look for the two magic header bytes for a gzip stream */
395 break; /* empty gzip stream is ok */
397 if (last
!= 31 || (NEXT() != 139 && last
!= 157)) {
398 strm
->msg
= (char *)"incorrect header check";
399 ret
= first
? Z_DATA_ERROR
: Z_ERRNO
;
400 break; /* not a gzip or compress header */
402 first
= 0; /* next non-header is junk */
404 /* process a compress (LZW) file -- can't be concatenated after this */
406 ret
= lunpipe(have
, next
, indp
, outfile
, strm
);
410 /* process remainder of gzip header */
412 if (NEXT() != 8) { /* only deflate method allowed */
413 if (last
== -1) break;
414 strm
->msg
= (char *)"unknown compression method";
418 flags
= NEXT(); /* header flags */
419 NEXT(); /* discard mod time, xflgs, os */
425 if (last
== -1) break;
427 strm
->msg
= (char *)"unknown header flags set";
431 if (flags
& 4) { /* extra field */
433 len
+= (unsigned)(NEXT()) << 8;
434 if (last
== -1) break;
438 if (NEXT() == -1) break;
441 if (last
== -1) break;
445 if (flags
& 8) /* file name */
446 while (NEXT() != 0 && last
!= -1)
448 if (flags
& 16) /* comment */
449 while (NEXT() != 0 && last
!= -1)
451 if (flags
& 2) { /* header crc */
455 if (last
== -1) break;
458 outd
.outfile
= outfile
;
460 outd
.crc
= crc32(0L, Z_NULL
, 0);
463 /* decompress data to output */
464 strm
->next_in
= next
;
465 strm
->avail_in
= have
;
466 ret
= inflateBack(strm
, in
, indp
, out
, &outd
);
467 if (ret
!= Z_STREAM_END
) break;
468 next
= strm
->next_in
;
469 have
= strm
->avail_in
;
470 strm
->next_in
= Z_NULL
; /* so Z_BUF_ERROR means EOF */
474 if (NEXT() != (outd
.crc
& 0xff) ||
475 NEXT() != ((outd
.crc
>> 8) & 0xff) ||
476 NEXT() != ((outd
.crc
>> 16) & 0xff) ||
477 NEXT() != ((outd
.crc
>> 24) & 0xff)) {
480 strm
->msg
= (char *)"incorrect data check";
485 if (NEXT() != (outd
.total
& 0xff) ||
486 NEXT() != ((outd
.total
>> 8) & 0xff) ||
487 NEXT() != ((outd
.total
>> 16) & 0xff) ||
488 NEXT() != ((outd
.total
>> 24) & 0xff)) {
491 strm
->msg
= (char *)"incorrect length check";
497 /* go back and look for another gzip stream */
500 /* clean up and return */
504 /* Copy file attributes, from -> to, as best we can. This is best effort, so
505 no errors are reported. The mode bits, including suid, sgid, and the sticky
506 bit are copied (if allowed), the owner's user id and group id are copied
507 (again if allowed), and the access and modify times are copied. */
508 local
void copymeta(char *from
, char *to
)
513 /* get all of from's Unix meta data, return if not a regular file */
514 if (stat(from
, &was
) != 0 || (was
.st_mode
& S_IFMT
) != S_IFREG
)
517 /* set to's mode bits, ignore errors */
518 (void)chmod(to
, was
.st_mode
& 07777);
520 /* copy owner's user and group, ignore errors */
521 (void)chown(to
, was
.st_uid
, was
.st_gid
);
523 /* copy access and modify times, ignore errors */
524 when
.actime
= was
.st_atime
;
525 when
.modtime
= was
.st_mtime
;
526 (void)utime(to
, &when
);
529 /* Decompress the file inname to the file outnname, of if test is true, just
530 decompress without writing and check the gzip trailer for integrity. If
531 inname is NULL or an empty string, read from stdin. If outname is NULL or
532 an empty string, write to stdout. strm is a pre-initialized inflateBack
533 structure. When appropriate, copy the file attributes from inname to
536 gunzip() returns 1 if there is an out-of-memory error or an unexpected
537 return code from gunpipe(). Otherwise it returns 0.
539 local
int gunzip(z_stream
*strm
, char *inname
, char *outname
, int test
)
545 if (inname
== NULL
|| *inname
== 0) {
547 infile
= 0; /* stdin */
550 infile
= open(inname
, O_RDONLY
, 0);
552 fprintf(stderr
, "gun cannot open %s\n", inname
);
558 else if (outname
== NULL
|| *outname
== 0) {
560 outfile
= 1; /* stdout */
563 outfile
= open(outname
, O_CREAT
| O_TRUNC
| O_WRONLY
, 0666);
566 fprintf(stderr
, "gun cannot create %s\n", outname
);
573 ret
= gunpipe(strm
, infile
, outfile
);
574 if (outfile
> 2) close(outfile
);
575 if (infile
> 2) close(infile
);
577 /* interpret result */
581 if (infile
> 2 && outfile
> 2) {
582 copymeta(inname
, outname
); /* copy attributes */
586 fprintf(stderr
, "gun warning: trailing garbage ignored in %s\n",
590 if (outfile
> 2) unlink(outname
);
591 fprintf(stderr
, "gun data error on %s: %s\n", inname
, strm
->msg
);
594 if (outfile
> 2) unlink(outname
);
595 fprintf(stderr
, "gun out of memory error--aborting\n");
598 if (outfile
> 2) unlink(outname
);
599 if (strm
->next_in
!= Z_NULL
) {
600 fprintf(stderr
, "gun write error on %s: %s\n",
601 outname
, strerror(errno
));
604 fprintf(stderr
, "gun read error on %s: %s\n",
605 inname
, strerror(errno
));
608 fprintf(stderr
, "gun unexpected end of file on %s\n",
613 if (outfile
> 2) unlink(outname
);
614 fprintf(stderr
, "gun internal error--aborting\n");
620 /* Process the gun command line arguments. See the command syntax near the
621 beginning of this source file. */
622 int main(int argc
, char **argv
)
626 unsigned char *window
;
629 /* initialize inflateBack state for repeated use */
630 window
= match
; /* reuse LZW match buffer */
631 strm
.zalloc
= Z_NULL
;
633 strm
.opaque
= Z_NULL
;
634 ret
= inflateBackInit(&strm
, 15, window
);
636 fprintf(stderr
, "gun out of memory error--aborting\n");
640 /* decompress each file to the same name with the suffix removed */
644 if (argc
&& strcmp(*argv
, "-h") == 0) {
645 fprintf(stderr
, "gun 1.3 (12 Jun 2005)\n");
646 fprintf(stderr
, "Copyright (c) 2005 Mark Adler\n");
647 fprintf(stderr
, "usage: gun [-t] [file1.gz [file2.Z ...]]\n");
650 if (argc
&& strcmp(*argv
, "-t") == 0) {
660 len
= (int)strlen(*argv
);
661 if (strcmp(*argv
+ len
- 3, ".gz") == 0 ||
662 strcmp(*argv
+ len
- 3, "-gz") == 0)
664 else if (strcmp(*argv
+ len
- 2, ".z") == 0 ||
665 strcmp(*argv
+ len
- 2, "-z") == 0 ||
666 strcmp(*argv
+ len
- 2, "_z") == 0 ||
667 strcmp(*argv
+ len
- 2, ".Z") == 0)
670 fprintf(stderr
, "gun error: no gz type on %s--skipping\n",
674 outname
= malloc(len
+ 1);
675 if (outname
== NULL
) {
676 fprintf(stderr
, "gun out of memory error--aborting\n");
680 memcpy(outname
, *argv
, len
);
683 ret
= gunzip(&strm
, *argv
, outname
, test
);
684 if (outname
!= NULL
) free(outname
);
686 } while (argv
++, --argc
);
688 ret
= gunzip(&strm
, NULL
, NULL
, test
);
691 inflateBackEnd(&strm
);