2 * Copyright (c) Ian F. Darwin 1986-1995.
3 * Software written by Ian F. Darwin and others;
4 * maintained 1995-present by Christos Zoulas and others.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice immediately at the beginning of the file, without modification,
11 * this list of conditions, and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * zmagic() - returns 0 if not recognized, uncompresses and prints
31 * information if recognized
32 * uncompress(method, old, n, newch) - uncompress old into new,
33 * using method, return sizeof new
38 FILE_RCSID("@(#)$File: compress.c,v 1.124 2019/07/21 11:42:09 christos Exp $")
52 typedef void (*sig_t
)(int);
53 #endif /* HAVE_SIG_T */
54 #if !defined(__MINGW32__) && !defined(WIN32)
55 #include <sys/ioctl.h>
57 #ifdef HAVE_SYS_WAIT_H
60 #if defined(HAVE_SYS_TIME_H)
64 #if defined(HAVE_ZLIB_H) && defined(ZLIBSUPPORT)
65 #define BUILTIN_DECOMPRESS
69 #if defined(HAVE_BZLIB_H) || defined(BZLIBSUPPORT)
74 #if defined(HAVE_XZLIB_H) || defined(XZLIBSUPPORT)
81 #define DPRINTF(...) do { \
83 tty = open("/dev/tty", O_RDWR); \
86 dprintf(tty, __VA_ARGS__); \
87 } while (/*CONSTCOND*/0)
94 * The following python code is not really used because ZLIBSUPPORT is only
95 * defined if we have a built-in zlib, and the built-in zlib handles that.
96 * That is not true for android where we have zlib.h and not -lz.
98 static const char zlibcode
[] =
99 "import sys, zlib; sys.stdout.write(zlib.decompress(sys.stdin.read()))";
101 static const char *zlib_args
[] = { "python", "-c", zlibcode
, NULL
};
104 zlibcmp(const unsigned char *buf
)
106 unsigned short x
= 1;
107 unsigned char *s
= CAST(unsigned char *, CAST(void *, &x
));
109 if ((buf
[0] & 0xf) != 8 || (buf
[0] & 0x80) != 0)
111 if (s
[0] != 1) /* endianness test */
112 x
= buf
[0] | (buf
[1] << 8);
114 x
= buf
[1] | (buf
[0] << 8);
122 lzmacmp(const unsigned char *buf
)
124 if (buf
[0] != 0x5d || buf
[1] || buf
[2])
126 if (buf
[12] && buf
[12] != 0xff)
131 #define gzip_flags "-cd"
132 #define lrzip_flags "-do"
133 #define lzip_flags gzip_flags
135 static const char *gzip_args
[] = {
136 "gzip", gzip_flags
, NULL
138 static const char *uncompress_args
[] = {
139 "uncompress", "-c", NULL
141 static const char *bzip2_args
[] = {
144 static const char *lzip_args
[] = {
145 "lzip", lzip_flags
, NULL
147 static const char *xz_args
[] = {
150 static const char *lrzip_args
[] = {
151 "lrzip", lrzip_flags
, NULL
153 static const char *lz4_args
[] = {
156 static const char *zstd_args
[] = {
161 #define do_bzlib NULL
163 private const struct {
169 #define METH_FROZEN 2
174 { "\037\235", 2, gzip_args
, NULL
}, /* 0, compressed */
175 /* Uncompress can get stuck; so use gzip first if we have it
176 * Idea from Damien Clark, thanks! */
177 { "\037\235", 2, uncompress_args
, NULL
}, /* 1, compressed */
178 { "\037\213", 2, gzip_args
, do_zlib
}, /* 2, gzipped */
179 { "\037\236", 2, gzip_args
, NULL
}, /* 3, frozen */
180 { "\037\240", 2, gzip_args
, NULL
}, /* 4, SCO LZH */
181 /* the standard pack utilities do not accept standard input */
182 { "\037\036", 2, gzip_args
, NULL
}, /* 5, packed */
183 { "PK\3\4", 4, gzip_args
, NULL
}, /* 6, pkzipped, */
184 /* ...only first file examined */
185 { "BZh", 3, bzip2_args
, do_bzlib
}, /* 7, bzip2-ed */
186 { "LZIP", 4, lzip_args
, NULL
}, /* 8, lzip-ed */
187 { "\3757zXZ\0", 6, xz_args
, NULL
}, /* 9, XZ Utils */
188 { "LRZI", 4, lrzip_args
, NULL
}, /* 10, LRZIP */
189 { "\004\"M\030",4, lz4_args
, NULL
}, /* 11, LZ4 */
190 { "\x28\xB5\x2F\xFD", 4, zstd_args
, NULL
}, /* 12, zstd */
191 { RCAST(const void *, lzmacmp
), -13, xz_args
, NULL
}, /* 13, lzma */
193 { RCAST(const void *, zlibcmp
), -2, zlib_args
, NULL
}, /* 14, zlib */
201 private ssize_t
swrite(int, const void *, size_t);
203 private size_t ncompr
= __arraycount(compr
);
204 private int uncompressbuf(int, size_t, size_t, const unsigned char *,
205 unsigned char **, size_t *);
206 #ifdef BUILTIN_DECOMPRESS
207 private int uncompresszlib(const unsigned char *, unsigned char **, size_t,
209 private int uncompressgzipped(const unsigned char *, unsigned char **, size_t,
213 private int uncompressbzlib(const unsigned char *, unsigned char **, size_t,
217 private int uncompressxzlib(const unsigned char *, unsigned char **, size_t,
221 static int makeerror(unsigned char **, size_t *, const char *, ...)
222 __attribute__((__format__(__printf__
, 3, 4)));
223 private const char *methodname(size_t);
226 format_decompression_error(struct magic_set
*ms
, size_t i
, unsigned char *buf
)
229 int mime
= ms
->flags
& MAGIC_MIME
;
232 return file_printf(ms
, "ERROR:[%s: %s]", methodname(i
), buf
);
234 for (p
= buf
; *p
; p
++)
238 return file_printf(ms
, "application/x-decompression-error-%s-%s",
243 file_zmagic(struct magic_set
*ms
, const struct buffer
*b
, const char *name
)
245 unsigned char *newbuf
= NULL
;
249 int urv
, prv
, rv
= 0;
250 int mime
= ms
->flags
& MAGIC_MIME
;
252 const unsigned char *buf
= CAST(const unsigned char *, b
->fbuf
);
253 size_t nbytes
= b
->flen
;
255 struct sigaction sig_act
;
257 if ((ms
->flags
& MAGIC_COMPRESS
) == 0)
260 for (i
= 0; i
< ncompr
; i
++) {
262 if (nbytes
< CAST(size_t, abs(compr
[i
].maglen
)))
264 if (compr
[i
].maglen
< 0) {
265 zm
= (RCAST(int (*)(const unsigned char *),
266 CCAST(void *, compr
[i
].magic
)))(buf
);
268 zm
= memcmp(buf
, compr
[i
].magic
,
269 CAST(size_t, compr
[i
].maglen
)) == 0;
275 /* Prevent SIGPIPE death if child dies unexpectedly */
277 //We can use sig_act for both new and old, but
278 struct sigaction new_act
;
279 memset(&new_act
, 0, sizeof(new_act
));
280 new_act
.sa_handler
= SIG_IGN
;
281 sa_saved
= sigaction(SIGPIPE
, &new_act
, &sig_act
) != -1;
285 urv
= uncompressbuf(fd
, ms
->bytes_max
, i
, buf
, &newbuf
, &nsz
);
286 DPRINTF("uncompressbuf = %d, %s, %" SIZE_T_FORMAT
"u\n", urv
,
287 (char *)newbuf
, nsz
);
291 ms
->flags
&= ~MAGIC_COMPRESS
;
293 prv
= format_decompression_error(ms
, i
, newbuf
);
295 prv
= file_buffer(ms
, -1, NULL
, name
, newbuf
, nsz
);
299 if ((ms
->flags
& MAGIC_COMPRESS_TRANSP
) != 0)
301 if (mime
!= MAGIC_MIME
&& mime
!= 0)
304 mime
? " compressed-encoding=" : " (")) == -1)
306 if ((pb
= file_push_buffer(ms
)) == NULL
)
309 * XXX: If file_buffer fails here, we overwrite
310 * the compressed text. FIXME.
312 if (file_buffer(ms
, -1, NULL
, NULL
, buf
, nbytes
) == -1) {
313 if (file_pop_buffer(ms
, pb
) != NULL
)
317 if ((rbuf
= file_pop_buffer(ms
, pb
)) != NULL
) {
318 if (file_printf(ms
, "%s", rbuf
) == -1) {
324 if (!mime
&& file_printf(ms
, ")") == -1)
338 DPRINTF("rv = %d\n", rv
);
340 if (sa_saved
&& sig_act
.sa_handler
!= SIG_IGN
)
341 (void)sigaction(SIGPIPE
, &sig_act
, NULL
);
344 ms
->flags
|= MAGIC_COMPRESS
;
345 DPRINTF("Zmagic returns %d\n", rv
);
350 * `safe' write for sockets and pipes.
353 swrite(int fd
, const void *buf
, size_t n
)
359 switch (rv
= write(fd
, buf
, n
)) {
366 buf
= CAST(const char *, buf
) + rv
;
375 * `safe' read for sockets and pipes.
378 sread(int fd
, void *buf
, size_t n
, int canbepipe
__attribute__((__unused__
)))
386 if (fd
== STDIN_FILENO
)
390 if (canbepipe
&& (ioctl(fd
, FIONREAD
, &t
) == -1 || t
== 0)) {
393 for (cnt
= 0;; cnt
++) {
395 struct timeval tout
= {0, 100 * 1000};
402 * Avoid soft deadlock: do not read if there
403 * is nothing to read from sockets and pipes.
405 selrv
= select(fd
+ 1, &check
, NULL
, NULL
, &tout
);
407 if (errno
== EINTR
|| errno
== EAGAIN
)
409 } else if (selrv
== 0 && cnt
>= 5) {
415 (void)ioctl(fd
, FIONREAD
, &t
);
418 if (t
> 0 && CAST(size_t, t
) < n
) {
426 switch ((rv
= read(fd
, buf
, n
))) {
435 buf
= CAST(char *, CCAST(void *, buf
)) + rv
;
443 file_pipe2file(struct magic_set
*ms
, int fd
, const void *startbuf
,
450 (void)strlcpy(buf
, "/tmp/file.XXXXXX", sizeof buf
);
453 char *ptr
= mktemp(buf
);
454 tfd
= open(ptr
, O_RDWR
|O_TRUNC
|O_EXCL
|O_CREAT
, 0600);
462 mode_t ou
= umask(0);
471 file_error(ms
, errno
,
472 "cannot create temporary file for pipe copy");
476 if (swrite(tfd
, startbuf
, nbytes
) != CAST(ssize_t
, nbytes
))
479 while ((r
= sread(fd
, buf
, sizeof(buf
), 1)) > 0)
480 if (swrite(tfd
, buf
, CAST(size_t, r
)) != r
)
486 file_error(ms
, errno
, "error copying from pipe to temp file");
491 file_error(ms
, errno
, "error while writing to temp file");
496 * We duplicate the file descriptor, because fclose on a
497 * tmpfile will delete the file, but any open descriptors
498 * can still access the phantom inode.
500 if ((fd
= dup2(tfd
, fd
)) == -1) {
501 file_error(ms
, errno
, "could not dup descriptor for temp file");
505 if (lseek(fd
, CAST(off_t
, 0), SEEK_SET
) == CAST(off_t
, -1)) {
512 #ifdef BUILTIN_DECOMPRESS
514 #define FHCRC (1 << 1)
515 #define FEXTRA (1 << 2)
516 #define FNAME (1 << 3)
517 #define FCOMMENT (1 << 4)
521 uncompressgzipped(const unsigned char *old
, unsigned char **newch
,
522 size_t bytes_max
, size_t *n
)
524 unsigned char flg
= old
[3];
525 size_t data_start
= 10;
528 if (data_start
+ 1 >= *n
)
530 data_start
+= 2 + old
[data_start
] + old
[data_start
+ 1] * 256;
533 while(data_start
< *n
&& old
[data_start
])
537 if (flg
& FCOMMENT
) {
538 while(data_start
< *n
&& old
[data_start
])
545 if (data_start
>= *n
)
550 return uncompresszlib(old
, newch
, bytes_max
, n
, 0);
552 return makeerror(newch
, n
, "File too short");
556 uncompresszlib(const unsigned char *old
, unsigned char **newch
,
557 size_t bytes_max
, size_t *n
, int zlib
)
562 if ((*newch
= CAST(unsigned char *, malloc(bytes_max
+ 1))) == NULL
)
563 return makeerror(newch
, n
, "No buffer, %s", strerror(errno
));
565 z
.next_in
= CCAST(Bytef
*, old
);
566 z
.avail_in
= CAST(uint32_t, *n
);
568 z
.avail_out
= CAST(unsigned int, bytes_max
);
573 /* LINTED bug in header macro */
574 rc
= zlib
? inflateInit(&z
) : inflateInit2(&z
, -15);
578 rc
= inflate(&z
, Z_SYNC_FLUSH
);
579 if (rc
!= Z_OK
&& rc
!= Z_STREAM_END
)
582 *n
= CAST(size_t, z
.total_out
);
587 /* let's keep the nul-terminate tradition */
592 strlcpy(RCAST(char *, *newch
), z
.msg
? z
.msg
: zError(rc
), bytes_max
);
593 *n
= strlen(RCAST(char *, *newch
));
600 uncompressbzlib(const unsigned char *old
, unsigned char **newch
,
601 size_t bytes_max
, size_t *n
)
606 memset(&bz
, 0, sizeof(bz
));
607 rc
= BZ2_bzDecompressInit(&bz
, 0, 0);
611 if ((*newch
= CAST(unsigned char *, malloc(bytes_max
+ 1))) == NULL
)
612 return makeerror(newch
, n
, "No buffer, %s", strerror(errno
));
614 bz
.next_in
= CCAST(char *, RCAST(const char *, old
));
615 bz
.avail_in
= CAST(uint32_t, *n
);
616 bz
.next_out
= RCAST(char *, *newch
);
617 bz
.avail_out
= CAST(unsigned int, bytes_max
);
619 rc
= BZ2_bzDecompress(&bz
);
620 if (rc
!= BZ_OK
&& rc
!= BZ_STREAM_END
)
623 /* Assume byte_max is within 32bit */
624 /* assert(bz.total_out_hi32 == 0); */
625 *n
= CAST(size_t, bz
.total_out_lo32
);
626 rc
= BZ2_bzDecompressEnd(&bz
);
630 /* let's keep the nul-terminate tradition */
635 snprintf(RCAST(char *, *newch
), bytes_max
, "bunzip error %d", rc
);
636 *n
= strlen(RCAST(char *, *newch
));
643 uncompressxzlib(const unsigned char *old
, unsigned char **newch
,
644 size_t bytes_max
, size_t *n
)
649 memset(&xz
, 0, sizeof(xz
));
650 rc
= lzma_auto_decoder(&xz
, UINT64_MAX
, 0);
654 if ((*newch
= CAST(unsigned char *, malloc(bytes_max
+ 1))) == NULL
)
655 return makeerror(newch
, n
, "No buffer, %s", strerror(errno
));
657 xz
.next_in
= CCAST(const uint8_t *, old
);
658 xz
.avail_in
= CAST(uint32_t, *n
);
659 xz
.next_out
= RCAST(uint8_t *, *newch
);
660 xz
.avail_out
= CAST(unsigned int, bytes_max
);
662 rc
= lzma_code(&xz
, LZMA_RUN
);
663 if (rc
!= LZMA_OK
&& rc
!= LZMA_STREAM_END
)
666 *n
= CAST(size_t, xz
.total_out
);
670 /* let's keep the nul-terminate tradition */
675 snprintf(RCAST(char *, *newch
), bytes_max
, "unxz error %d", rc
);
676 *n
= strlen(RCAST(char *, *newch
));
683 makeerror(unsigned char **buf
, size_t *len
, const char *fmt
, ...)
690 rv
= vasprintf(&msg
, fmt
, ap
);
697 *buf
= RCAST(unsigned char *, msg
);
703 closefd(int *fd
, size_t i
)
715 for (i
= 0; i
< 2; i
++)
720 copydesc(int i
, int fd
)
723 return 0; /* "no dup was necessary" */
724 if (dup2(fd
, i
) == -1) {
725 DPRINTF("dup(%d, %d) failed (%s)\n", fd
, i
, strerror(errno
));
732 writechild(int fd
, const void *old
, size_t n
)
737 * fork again, to avoid blocking because both
742 DPRINTF("Fork failed (%s)\n", strerror(errno
));
747 if (swrite(fd
, old
, n
) != CAST(ssize_t
, n
)) {
748 DPRINTF("Write failed (%s)\n", strerror(errno
));
758 filter_error(unsigned char *ubuf
, ssize_t n
)
764 buf
= RCAST(char *, ubuf
);
765 while (isspace(CAST(unsigned char, *buf
)))
767 DPRINTF("Filter error[[[%s]]]\n", buf
);
768 if ((p
= strchr(CAST(char *, buf
), '\n')) != NULL
)
770 if ((p
= strchr(CAST(char *, buf
), ';')) != NULL
)
772 if ((p
= strrchr(CAST(char *, buf
), ':')) != NULL
) {
774 while (isspace(CAST(unsigned char, *p
)))
777 memmove(ubuf
, p
, CAST(size_t, n
+ 1));
779 DPRINTF("Filter error after[[[%s]]]\n", (char *)ubuf
);
781 *ubuf
= toupper(*ubuf
);
786 methodname(size_t method
)
789 #ifdef BUILTIN_DECOMPRESS
804 return compr
[method
].argv
[0];
809 uncompressbuf(int fd
, size_t bytes_max
, size_t method
, const unsigned char *old
,
810 unsigned char **newch
, size_t* n
)
820 #ifdef BUILTIN_DECOMPRESS
822 return uncompressgzipped(old
, newch
, bytes_max
, n
);
824 return uncompresszlib(old
, newch
, bytes_max
, n
, 1);
828 return uncompressbzlib(old
, newch
, bytes_max
, n
);
833 return uncompressxzlib(old
, newch
, bytes_max
, n
);
839 (void)fflush(stdout
);
840 (void)fflush(stderr
);
842 for (i
= 0; i
< __arraycount(fdp
); i
++)
843 fdp
[i
][0] = fdp
[i
][1] = -1;
845 if ((fd
== -1 && pipe(fdp
[STDIN_FILENO
]) == -1) ||
846 pipe(fdp
[STDOUT_FILENO
]) == -1 || pipe(fdp
[STDERR_FILENO
]) == -1) {
847 closep(fdp
[STDIN_FILENO
]);
848 closep(fdp
[STDOUT_FILENO
]);
849 return makeerror(newch
, n
, "Cannot create pipe, %s",
853 /* For processes with large mapped virtual sizes, vfork
854 * may be _much_ faster (10-100 times) than fork.
858 return makeerror(newch
, n
, "Cannot vfork, %s",
863 /* Note: we are after vfork, do not modify memory
864 * in a way which confuses parent. In particular,
865 * do not modify fdp[i][j].
868 (void) lseek(fd
, CAST(off_t
, 0), SEEK_SET
);
869 if (copydesc(STDIN_FILENO
, fd
))
872 if (copydesc(STDIN_FILENO
, fdp
[STDIN_FILENO
][0]))
873 (void) close(fdp
[STDIN_FILENO
][0]);
874 if (fdp
[STDIN_FILENO
][1] > 2)
875 (void) close(fdp
[STDIN_FILENO
][1]);
877 ///FIXME: if one of the fdp[i][j] is 0 or 1, this can bomb spectacularly
878 if (copydesc(STDOUT_FILENO
, fdp
[STDOUT_FILENO
][1]))
879 (void) close(fdp
[STDOUT_FILENO
][1]);
880 if (fdp
[STDOUT_FILENO
][0] > 2)
881 (void) close(fdp
[STDOUT_FILENO
][0]);
883 if (copydesc(STDERR_FILENO
, fdp
[STDERR_FILENO
][1]))
884 (void) close(fdp
[STDERR_FILENO
][1]);
885 if (fdp
[STDERR_FILENO
][0] > 2)
886 (void) close(fdp
[STDERR_FILENO
][0]);
888 (void)execvp(compr
[method
].argv
[0],
889 RCAST(char *const *, RCAST(intptr_t, compr
[method
].argv
)));
890 dprintf(STDERR_FILENO
, "exec `%s' failed, %s",
891 compr
[method
].argv
[0], strerror(errno
));
892 _exit(1); /* _exit(), not exit(), because of vfork */
895 /* Close write sides of child stdout/err pipes */
896 for (i
= 1; i
< __arraycount(fdp
); i
++)
898 /* Write the buffer data to child stdin, if we don't have fd */
900 closefd(fdp
[STDIN_FILENO
], 0);
901 writepid
= writechild(fdp
[STDIN_FILENO
][1], old
, *n
);
902 closefd(fdp
[STDIN_FILENO
], 1);
905 *newch
= CAST(unsigned char *, malloc(bytes_max
+ 1));
906 if (*newch
== NULL
) {
907 rv
= makeerror(newch
, n
, "No buffer, %s",
912 r
= sread(fdp
[STDOUT_FILENO
][0], *newch
, bytes_max
, 0);
914 DPRINTF("Read stdout failed %d (%s)\n", fdp
[STDOUT_FILENO
][0],
915 r
!= -1 ? strerror(errno
) : "no data");
919 (r
= sread(fdp
[STDERR_FILENO
][0], *newch
, bytes_max
, 0)) > 0)
921 r
= filter_error(*newch
, r
);
926 rv
= makeerror(newch
, n
, "Read failed, %s",
929 rv
= makeerror(newch
, n
, "No data");
934 /* NUL terminate, as every buffer is handled here. */
937 closefd(fdp
[STDIN_FILENO
], 1);
938 closefd(fdp
[STDOUT_FILENO
], 0);
939 closefd(fdp
[STDERR_FILENO
], 0);
941 w
= waitpid(pid
, &status
, 0);
945 rv
= makeerror(newch
, n
, "Wait failed, %s", strerror(errno
));
946 DPRINTF("Child wait return %#x\n", status
);
947 } else if (!WIFEXITED(status
)) {
948 DPRINTF("Child not exited (%#x)\n", status
);
949 } else if (WEXITSTATUS(status
) != 0) {
950 DPRINTF("Child exited (%#x)\n", WEXITSTATUS(status
));
953 /* _After_ we know decompressor has exited, our input writer
954 * definitely will exit now (at worst, writing fails in it,
955 * since output fd is closed now on the reading size).
957 w
= waitpid(writepid
, &status
, 0);
962 closefd(fdp
[STDIN_FILENO
], 0); //why? it is already closed here!
963 DPRINTF("Returning %p n=%" SIZE_T_FORMAT
"u rv=%d\n", *newch
, *n
, rv
);