2012-12-20 Paul Thomas <pault@gcc.gnu.org>
[official-gcc.git] / zlib / examples / gun.c
blob72b0882ab86fc2c408ca0827f48609297a95dba4
1 /* gun.c -- simple gunzip to give an example of the use of inflateBack()
2 * Copyright (C) 2003, 2005, 2008, 2010 Mark Adler
3 * For conditions of distribution and use, see copyright notice in zlib.h
4 Version 1.6 17 January 2010 Mark Adler */
6 /* Version history:
7 1.0 16 Feb 2003 First version for testing of inflateBack()
8 1.1 21 Feb 2005 Decompress concatenated gzip streams
9 Remove use of "this" variable (C++ keyword)
10 Fix return value for in()
11 Improve allocation failure checking
12 Add typecasting for void * structures
13 Add -h option for command version and usage
14 Add a bunch of comments
15 1.2 20 Mar 2005 Add Unix compress (LZW) decompression
16 Copy file attributes from input file to output file
17 1.3 12 Jun 2005 Add casts for error messages [Oberhumer]
18 1.4 8 Dec 2006 LZW decompression speed improvements
19 1.5 9 Feb 2008 Avoid warning in latest version of gcc
20 1.6 17 Jan 2010 Avoid signed/unsigned comparison warnings
24 gun [ -t ] [ name ... ]
26 decompresses the data in the named gzip files. If no arguments are given,
27 gun will decompress from stdin to stdout. The names must end in .gz, -gz,
28 .z, -z, _z, or .Z. The uncompressed data will be written to a file name
29 with the suffix stripped. On success, the original file is deleted. On
30 failure, the output file is deleted. For most failures, the command will
31 continue to process the remaining names on the command line. A memory
32 allocation failure will abort the command. If -t is specified, then the
33 listed files or stdin will be tested as gzip files for integrity (without
34 checking for a proper suffix), no output will be written, and no files
35 will be deleted.
37 Like gzip, gun allows concatenated gzip streams and will decompress them,
38 writing all of the uncompressed data to the output. Unlike gzip, gun allows
39 an empty file on input, and will produce no error writing an empty output
40 file.
42 gun will also decompress files made by Unix compress, which uses LZW
43 compression. These files are automatically detected by virtue of their
44 magic header bytes. Since the end of Unix compress stream is marked by the
45 end-of-file, they cannot be concantenated. If a Unix compress stream is
46 encountered in an input file, it is the last stream in that file.
48 Like gunzip and uncompress, the file attributes of the orignal compressed
49 file are maintained in the final uncompressed file, to the extent that the
50 user permissions allow it.
52 On my Mac OS X PowerPC G4, gun is almost twice as fast as gunzip (version
53 1.2.4) is on the same file, when gun is linked with zlib 1.2.2. Also the
54 LZW decompression provided by gun is about twice as fast as the standard
55 Unix uncompress command.
58 /* external functions and related types and constants */
59 #include <stdio.h> /* fprintf() */
60 #include <stdlib.h> /* malloc(), free() */
61 #include <string.h> /* strerror(), strcmp(), strlen(), memcpy() */
62 #include <errno.h> /* errno */
63 #include <fcntl.h> /* open() */
64 #include <unistd.h> /* read(), write(), close(), chown(), unlink() */
65 #include <sys/types.h>
66 #include <sys/stat.h> /* stat(), chmod() */
67 #include <utime.h> /* utime() */
68 #include "zlib.h" /* inflateBackInit(), inflateBack(), */
69 /* inflateBackEnd(), crc32() */
71 /* function declaration */
72 #define local static
74 /* buffer constants */
75 #define SIZE 32768U /* input and output buffer sizes */
76 #define PIECE 16384 /* limits i/o chunks for 16-bit int case */
78 /* structure for infback() to pass to input function in() -- it maintains the
79 input file and a buffer of size SIZE */
80 struct ind {
81 int infile;
82 unsigned char *inbuf;
85 /* Load input buffer, assumed to be empty, and return bytes loaded and a
86 pointer to them. read() is called until the buffer is full, or until it
87 returns end-of-file or error. Return 0 on error. */
88 local unsigned in(void *in_desc, unsigned char **buf)
90 int ret;
91 unsigned len;
92 unsigned char *next;
93 struct ind *me = (struct ind *)in_desc;
95 next = me->inbuf;
96 *buf = next;
97 len = 0;
98 do {
99 ret = PIECE;
100 if ((unsigned)ret > SIZE - len)
101 ret = (int)(SIZE - len);
102 ret = (int)read(me->infile, next, ret);
103 if (ret == -1) {
104 len = 0;
105 break;
107 next += ret;
108 len += ret;
109 } while (ret != 0 && len < SIZE);
110 return len;
113 /* structure for infback() to pass to output function out() -- it maintains the
114 output file, a running CRC-32 check on the output and the total number of
115 bytes output, both for checking against the gzip trailer. (The length in
116 the gzip trailer is stored modulo 2^32, so it's ok if a long is 32 bits and
117 the output is greater than 4 GB.) */
118 struct outd {
119 int outfile;
120 int check; /* true if checking crc and total */
121 unsigned long crc;
122 unsigned long total;
125 /* Write output buffer and update the CRC-32 and total bytes written. write()
126 is called until all of the output is written or an error is encountered.
127 On success out() returns 0. For a write failure, out() returns 1. If the
128 output file descriptor is -1, then nothing is written.
130 local int out(void *out_desc, unsigned char *buf, unsigned len)
132 int ret;
133 struct outd *me = (struct outd *)out_desc;
135 if (me->check) {
136 me->crc = crc32(me->crc, buf, len);
137 me->total += len;
139 if (me->outfile != -1)
140 do {
141 ret = PIECE;
142 if ((unsigned)ret > len)
143 ret = (int)len;
144 ret = (int)write(me->outfile, buf, ret);
145 if (ret == -1)
146 return 1;
147 buf += ret;
148 len -= ret;
149 } while (len != 0);
150 return 0;
153 /* next input byte macro for use inside lunpipe() and gunpipe() */
154 #define NEXT() (have ? 0 : (have = in(indp, &next)), \
155 last = have ? (have--, (int)(*next++)) : -1)
157 /* memory for gunpipe() and lunpipe() --
158 the first 256 entries of prefix[] and suffix[] are never used, could
159 have offset the index, but it's faster to waste the memory */
160 unsigned char inbuf[SIZE]; /* input buffer */
161 unsigned char outbuf[SIZE]; /* output buffer */
162 unsigned short prefix[65536]; /* index to LZW prefix string */
163 unsigned char suffix[65536]; /* one-character LZW suffix */
164 unsigned char match[65280 + 2]; /* buffer for reversed match or gzip
165 32K sliding window */
167 /* throw out what's left in the current bits byte buffer (this is a vestigial
168 aspect of the compressed data format derived from an implementation that
169 made use of a special VAX machine instruction!) */
170 #define FLUSHCODE() \
171 do { \
172 left = 0; \
173 rem = 0; \
174 if (chunk > have) { \
175 chunk -= have; \
176 have = 0; \
177 if (NEXT() == -1) \
178 break; \
179 chunk--; \
180 if (chunk > have) { \
181 chunk = have = 0; \
182 break; \
185 have -= chunk; \
186 next += chunk; \
187 chunk = 0; \
188 } while (0)
190 /* Decompress a compress (LZW) file from indp to outfile. The compress magic
191 header (two bytes) has already been read and verified. There are have bytes
192 of buffered input at next. strm is used for passing error information back
193 to gunpipe().
195 lunpipe() will return Z_OK on success, Z_BUF_ERROR for an unexpected end of
196 file, read error, or write error (a write error indicated by strm->next_in
197 not equal to Z_NULL), or Z_DATA_ERROR for invalid input.
199 local int lunpipe(unsigned have, unsigned char *next, struct ind *indp,
200 int outfile, z_stream *strm)
202 int last; /* last byte read by NEXT(), or -1 if EOF */
203 unsigned chunk; /* bytes left in current chunk */
204 int left; /* bits left in rem */
205 unsigned rem; /* unused bits from input */
206 int bits; /* current bits per code */
207 unsigned code; /* code, table traversal index */
208 unsigned mask; /* mask for current bits codes */
209 int max; /* maximum bits per code for this stream */
210 unsigned flags; /* compress flags, then block compress flag */
211 unsigned end; /* last valid entry in prefix/suffix tables */
212 unsigned temp; /* current code */
213 unsigned prev; /* previous code */
214 unsigned final; /* last character written for previous code */
215 unsigned stack; /* next position for reversed string */
216 unsigned outcnt; /* bytes in output buffer */
217 struct outd outd; /* output structure */
218 unsigned char *p;
220 /* set up output */
221 outd.outfile = outfile;
222 outd.check = 0;
224 /* process remainder of compress header -- a flags byte */
225 flags = NEXT();
226 if (last == -1)
227 return Z_BUF_ERROR;
228 if (flags & 0x60) {
229 strm->msg = (char *)"unknown lzw flags set";
230 return Z_DATA_ERROR;
232 max = flags & 0x1f;
233 if (max < 9 || max > 16) {
234 strm->msg = (char *)"lzw bits out of range";
235 return Z_DATA_ERROR;
237 if (max == 9) /* 9 doesn't really mean 9 */
238 max = 10;
239 flags &= 0x80; /* true if block compress */
241 /* clear table */
242 bits = 9;
243 mask = 0x1ff;
244 end = flags ? 256 : 255;
246 /* set up: get first 9-bit code, which is the first decompressed byte, but
247 don't create a table entry until the next code */
248 if (NEXT() == -1) /* no compressed data is ok */
249 return Z_OK;
250 final = prev = (unsigned)last; /* low 8 bits of code */
251 if (NEXT() == -1) /* missing a bit */
252 return Z_BUF_ERROR;
253 if (last & 1) { /* code must be < 256 */
254 strm->msg = (char *)"invalid lzw code";
255 return Z_DATA_ERROR;
257 rem = (unsigned)last >> 1; /* remaining 7 bits */
258 left = 7;
259 chunk = bits - 2; /* 7 bytes left in this chunk */
260 outbuf[0] = (unsigned char)final; /* write first decompressed byte */
261 outcnt = 1;
263 /* decode codes */
264 stack = 0;
265 for (;;) {
266 /* if the table will be full after this, increment the code size */
267 if (end >= mask && bits < max) {
268 FLUSHCODE();
269 bits++;
270 mask <<= 1;
271 mask++;
274 /* get a code of length bits */
275 if (chunk == 0) /* decrement chunk modulo bits */
276 chunk = bits;
277 code = rem; /* low bits of code */
278 if (NEXT() == -1) { /* EOF is end of compressed data */
279 /* write remaining buffered output */
280 if (outcnt && out(&outd, outbuf, outcnt)) {
281 strm->next_in = outbuf; /* signal write error */
282 return Z_BUF_ERROR;
284 return Z_OK;
286 code += (unsigned)last << left; /* middle (or high) bits of code */
287 left += 8;
288 chunk--;
289 if (bits > left) { /* need more bits */
290 if (NEXT() == -1) /* can't end in middle of code */
291 return Z_BUF_ERROR;
292 code += (unsigned)last << left; /* high bits of code */
293 left += 8;
294 chunk--;
296 code &= mask; /* mask to current code length */
297 left -= bits; /* number of unused bits */
298 rem = (unsigned)last >> (8 - left); /* unused bits from last byte */
300 /* process clear code (256) */
301 if (code == 256 && flags) {
302 FLUSHCODE();
303 bits = 9; /* initialize bits and mask */
304 mask = 0x1ff;
305 end = 255; /* empty table */
306 continue; /* get next code */
309 /* special code to reuse last match */
310 temp = code; /* save the current code */
311 if (code > end) {
312 /* Be picky on the allowed code here, and make sure that the code
313 we drop through (prev) will be a valid index so that random
314 input does not cause an exception. The code != end + 1 check is
315 empirically derived, and not checked in the original uncompress
316 code. If this ever causes a problem, that check could be safely
317 removed. Leaving this check in greatly improves gun's ability
318 to detect random or corrupted input after a compress header.
319 In any case, the prev > end check must be retained. */
320 if (code != end + 1 || prev > end) {
321 strm->msg = (char *)"invalid lzw code";
322 return Z_DATA_ERROR;
324 match[stack++] = (unsigned char)final;
325 code = prev;
328 /* walk through linked list to generate output in reverse order */
329 p = match + stack;
330 while (code >= 256) {
331 *p++ = suffix[code];
332 code = prefix[code];
334 stack = p - match;
335 match[stack++] = (unsigned char)code;
336 final = code;
338 /* link new table entry */
339 if (end < mask) {
340 end++;
341 prefix[end] = (unsigned short)prev;
342 suffix[end] = (unsigned char)final;
345 /* set previous code for next iteration */
346 prev = temp;
348 /* write output in forward order */
349 while (stack > SIZE - outcnt) {
350 while (outcnt < SIZE)
351 outbuf[outcnt++] = match[--stack];
352 if (out(&outd, outbuf, outcnt)) {
353 strm->next_in = outbuf; /* signal write error */
354 return Z_BUF_ERROR;
356 outcnt = 0;
358 p = match + stack;
359 do {
360 outbuf[outcnt++] = *--p;
361 } while (p > match);
362 stack = 0;
364 /* loop for next code with final and prev as the last match, rem and
365 left provide the first 0..7 bits of the next code, end is the last
366 valid table entry */
370 /* Decompress a gzip file from infile to outfile. strm is assumed to have been
371 successfully initialized with inflateBackInit(). The input file may consist
372 of a series of gzip streams, in which case all of them will be decompressed
373 to the output file. If outfile is -1, then the gzip stream(s) integrity is
374 checked and nothing is written.
376 The return value is a zlib error code: Z_MEM_ERROR if out of memory,
377 Z_DATA_ERROR if the header or the compressed data is invalid, or if the
378 trailer CRC-32 check or length doesn't match, Z_BUF_ERROR if the input ends
379 prematurely or a write error occurs, or Z_ERRNO if junk (not a another gzip
380 stream) follows a valid gzip stream.
382 local int gunpipe(z_stream *strm, int infile, int outfile)
384 int ret, first, last;
385 unsigned have, flags, len;
386 unsigned char *next = NULL;
387 struct ind ind, *indp;
388 struct outd outd;
390 /* setup input buffer */
391 ind.infile = infile;
392 ind.inbuf = inbuf;
393 indp = &ind;
395 /* decompress concatenated gzip streams */
396 have = 0; /* no input data read in yet */
397 first = 1; /* looking for first gzip header */
398 strm->next_in = Z_NULL; /* so Z_BUF_ERROR means EOF */
399 for (;;) {
400 /* look for the two magic header bytes for a gzip stream */
401 if (NEXT() == -1) {
402 ret = Z_OK;
403 break; /* empty gzip stream is ok */
405 if (last != 31 || (NEXT() != 139 && last != 157)) {
406 strm->msg = (char *)"incorrect header check";
407 ret = first ? Z_DATA_ERROR : Z_ERRNO;
408 break; /* not a gzip or compress header */
410 first = 0; /* next non-header is junk */
412 /* process a compress (LZW) file -- can't be concatenated after this */
413 if (last == 157) {
414 ret = lunpipe(have, next, indp, outfile, strm);
415 break;
418 /* process remainder of gzip header */
419 ret = Z_BUF_ERROR;
420 if (NEXT() != 8) { /* only deflate method allowed */
421 if (last == -1) break;
422 strm->msg = (char *)"unknown compression method";
423 ret = Z_DATA_ERROR;
424 break;
426 flags = NEXT(); /* header flags */
427 NEXT(); /* discard mod time, xflgs, os */
428 NEXT();
429 NEXT();
430 NEXT();
431 NEXT();
432 NEXT();
433 if (last == -1) break;
434 if (flags & 0xe0) {
435 strm->msg = (char *)"unknown header flags set";
436 ret = Z_DATA_ERROR;
437 break;
439 if (flags & 4) { /* extra field */
440 len = NEXT();
441 len += (unsigned)(NEXT()) << 8;
442 if (last == -1) break;
443 while (len > have) {
444 len -= have;
445 have = 0;
446 if (NEXT() == -1) break;
447 len--;
449 if (last == -1) break;
450 have -= len;
451 next += len;
453 if (flags & 8) /* file name */
454 while (NEXT() != 0 && last != -1)
456 if (flags & 16) /* comment */
457 while (NEXT() != 0 && last != -1)
459 if (flags & 2) { /* header crc */
460 NEXT();
461 NEXT();
463 if (last == -1) break;
465 /* set up output */
466 outd.outfile = outfile;
467 outd.check = 1;
468 outd.crc = crc32(0L, Z_NULL, 0);
469 outd.total = 0;
471 /* decompress data to output */
472 strm->next_in = next;
473 strm->avail_in = have;
474 ret = inflateBack(strm, in, indp, out, &outd);
475 if (ret != Z_STREAM_END) break;
476 next = strm->next_in;
477 have = strm->avail_in;
478 strm->next_in = Z_NULL; /* so Z_BUF_ERROR means EOF */
480 /* check trailer */
481 ret = Z_BUF_ERROR;
482 if (NEXT() != (int)(outd.crc & 0xff) ||
483 NEXT() != (int)((outd.crc >> 8) & 0xff) ||
484 NEXT() != (int)((outd.crc >> 16) & 0xff) ||
485 NEXT() != (int)((outd.crc >> 24) & 0xff)) {
486 /* crc error */
487 if (last != -1) {
488 strm->msg = (char *)"incorrect data check";
489 ret = Z_DATA_ERROR;
491 break;
493 if (NEXT() != (int)(outd.total & 0xff) ||
494 NEXT() != (int)((outd.total >> 8) & 0xff) ||
495 NEXT() != (int)((outd.total >> 16) & 0xff) ||
496 NEXT() != (int)((outd.total >> 24) & 0xff)) {
497 /* length error */
498 if (last != -1) {
499 strm->msg = (char *)"incorrect length check";
500 ret = Z_DATA_ERROR;
502 break;
505 /* go back and look for another gzip stream */
508 /* clean up and return */
509 return ret;
512 /* Copy file attributes, from -> to, as best we can. This is best effort, so
513 no errors are reported. The mode bits, including suid, sgid, and the sticky
514 bit are copied (if allowed), the owner's user id and group id are copied
515 (again if allowed), and the access and modify times are copied. */
516 local void copymeta(char *from, char *to)
518 struct stat was;
519 struct utimbuf when;
521 /* get all of from's Unix meta data, return if not a regular file */
522 if (stat(from, &was) != 0 || (was.st_mode & S_IFMT) != S_IFREG)
523 return;
525 /* set to's mode bits, ignore errors */
526 (void)chmod(to, was.st_mode & 07777);
528 /* copy owner's user and group, ignore errors */
529 (void)chown(to, was.st_uid, was.st_gid);
531 /* copy access and modify times, ignore errors */
532 when.actime = was.st_atime;
533 when.modtime = was.st_mtime;
534 (void)utime(to, &when);
537 /* Decompress the file inname to the file outnname, of if test is true, just
538 decompress without writing and check the gzip trailer for integrity. If
539 inname is NULL or an empty string, read from stdin. If outname is NULL or
540 an empty string, write to stdout. strm is a pre-initialized inflateBack
541 structure. When appropriate, copy the file attributes from inname to
542 outname.
544 gunzip() returns 1 if there is an out-of-memory error or an unexpected
545 return code from gunpipe(). Otherwise it returns 0.
547 local int gunzip(z_stream *strm, char *inname, char *outname, int test)
549 int ret;
550 int infile, outfile;
552 /* open files */
553 if (inname == NULL || *inname == 0) {
554 inname = "-";
555 infile = 0; /* stdin */
557 else {
558 infile = open(inname, O_RDONLY, 0);
559 if (infile == -1) {
560 fprintf(stderr, "gun cannot open %s\n", inname);
561 return 0;
564 if (test)
565 outfile = -1;
566 else if (outname == NULL || *outname == 0) {
567 outname = "-";
568 outfile = 1; /* stdout */
570 else {
571 outfile = open(outname, O_CREAT | O_TRUNC | O_WRONLY, 0666);
572 if (outfile == -1) {
573 close(infile);
574 fprintf(stderr, "gun cannot create %s\n", outname);
575 return 0;
578 errno = 0;
580 /* decompress */
581 ret = gunpipe(strm, infile, outfile);
582 if (outfile > 2) close(outfile);
583 if (infile > 2) close(infile);
585 /* interpret result */
586 switch (ret) {
587 case Z_OK:
588 case Z_ERRNO:
589 if (infile > 2 && outfile > 2) {
590 copymeta(inname, outname); /* copy attributes */
591 unlink(inname);
593 if (ret == Z_ERRNO)
594 fprintf(stderr, "gun warning: trailing garbage ignored in %s\n",
595 inname);
596 break;
597 case Z_DATA_ERROR:
598 if (outfile > 2) unlink(outname);
599 fprintf(stderr, "gun data error on %s: %s\n", inname, strm->msg);
600 break;
601 case Z_MEM_ERROR:
602 if (outfile > 2) unlink(outname);
603 fprintf(stderr, "gun out of memory error--aborting\n");
604 return 1;
605 case Z_BUF_ERROR:
606 if (outfile > 2) unlink(outname);
607 if (strm->next_in != Z_NULL) {
608 fprintf(stderr, "gun write error on %s: %s\n",
609 outname, strerror(errno));
611 else if (errno) {
612 fprintf(stderr, "gun read error on %s: %s\n",
613 inname, strerror(errno));
615 else {
616 fprintf(stderr, "gun unexpected end of file on %s\n",
617 inname);
619 break;
620 default:
621 if (outfile > 2) unlink(outname);
622 fprintf(stderr, "gun internal error--aborting\n");
623 return 1;
625 return 0;
628 /* Process the gun command line arguments. See the command syntax near the
629 beginning of this source file. */
630 int main(int argc, char **argv)
632 int ret, len, test;
633 char *outname;
634 unsigned char *window;
635 z_stream strm;
637 /* initialize inflateBack state for repeated use */
638 window = match; /* reuse LZW match buffer */
639 strm.zalloc = Z_NULL;
640 strm.zfree = Z_NULL;
641 strm.opaque = Z_NULL;
642 ret = inflateBackInit(&strm, 15, window);
643 if (ret != Z_OK) {
644 fprintf(stderr, "gun out of memory error--aborting\n");
645 return 1;
648 /* decompress each file to the same name with the suffix removed */
649 argc--;
650 argv++;
651 test = 0;
652 if (argc && strcmp(*argv, "-h") == 0) {
653 fprintf(stderr, "gun 1.6 (17 Jan 2010)\n");
654 fprintf(stderr, "Copyright (C) 2003-2010 Mark Adler\n");
655 fprintf(stderr, "usage: gun [-t] [file1.gz [file2.Z ...]]\n");
656 return 0;
658 if (argc && strcmp(*argv, "-t") == 0) {
659 test = 1;
660 argc--;
661 argv++;
663 if (argc)
664 do {
665 if (test)
666 outname = NULL;
667 else {
668 len = (int)strlen(*argv);
669 if (strcmp(*argv + len - 3, ".gz") == 0 ||
670 strcmp(*argv + len - 3, "-gz") == 0)
671 len -= 3;
672 else if (strcmp(*argv + len - 2, ".z") == 0 ||
673 strcmp(*argv + len - 2, "-z") == 0 ||
674 strcmp(*argv + len - 2, "_z") == 0 ||
675 strcmp(*argv + len - 2, ".Z") == 0)
676 len -= 2;
677 else {
678 fprintf(stderr, "gun error: no gz type on %s--skipping\n",
679 *argv);
680 continue;
682 outname = malloc(len + 1);
683 if (outname == NULL) {
684 fprintf(stderr, "gun out of memory error--aborting\n");
685 ret = 1;
686 break;
688 memcpy(outname, *argv, len);
689 outname[len] = 0;
691 ret = gunzip(&strm, *argv, outname, test);
692 if (outname != NULL) free(outname);
693 if (ret) break;
694 } while (argv++, --argc);
695 else
696 ret = gunzip(&strm, NULL, NULL, test);
698 /* clean up */
699 inflateBackEnd(&strm);
700 return ret;