2 * Copyright (c) 2009, 2010 Joerg Sonnenberger <joerg@NetBSD.org>
3 * Copyright (c) 2007-2008 Dag-Erling Smørgrav
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer
11 * in this position and unchanged.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * $FreeBSD: head/usr.bin/unzip/unzip.c 294108 2016-01-15 23:04:36Z ak $
30 * This file would be much shorter if we didn't care about command-line
31 * compatibility with Info-ZIP's UnZip, which requires us to duplicate
32 * parts of libarchive in order to gain more detailed control of its
33 * behaviour for the purpose of implementing the -n, -o, -L and -a
37 #include <sys/queue.h>
51 #include <archive_entry.h>
53 /* command-line options */
54 static int a_opt
; /* convert EOL */
55 static int C_opt
; /* match case-insensitively */
56 static int c_opt
; /* extract to stdout */
57 static const char *d_arg
; /* directory */
58 static int f_opt
; /* update existing files only */
59 static int j_opt
; /* junk directories */
60 static int L_opt
; /* lowercase names */
61 static int n_opt
; /* never overwrite */
62 static int o_opt
; /* always overwrite */
63 static int p_opt
; /* extract to stdout, quiet */
64 static int q_opt
; /* quiet */
65 static int t_opt
; /* test */
66 static int u_opt
; /* update */
67 static int v_opt
; /* verbose/list */
68 static const char *y_str
= ""; /* 4 digit year */
69 static int Z1_opt
; /* zipinfo mode list files only */
72 static int unzip_debug
;
75 static int zipinfo_mode
;
80 /* convenience macro */
81 /* XXX should differentiate between ARCHIVE_{WARN,FAIL,RETRY} */
85 if (acret != ARCHIVE_OK) \
86 errorx("%s", archive_error_string(a)); \
90 * Indicates that last info() did not end with EOL. This helps error() et
91 * al. avoid printing an error message on the same line as an incomplete
92 * informational message.
96 /* fatal error message + errno */
98 error(const char *fmt
, ...)
103 fprintf(stdout
, "\n");
105 fprintf(stderr
, "unzip: ");
107 vfprintf(stderr
, fmt
, ap
);
109 fprintf(stderr
, ": %s\n", strerror(errno
));
113 /* fatal error message, no errno */
115 errorx(const char *fmt
, ...)
120 fprintf(stdout
, "\n");
122 fprintf(stderr
, "unzip: ");
124 vfprintf(stderr
, fmt
, ap
);
126 fprintf(stderr
, "\n");
130 /* non-fatal error message + errno */
132 warning(const char *fmt
, ...)
137 fprintf(stdout
, "\n");
139 fprintf(stderr
, "unzip: ");
141 vfprintf(stderr
, fmt
, ap
);
143 fprintf(stderr
, ": %s\n", strerror(errno
));
146 /* non-fatal error message, no errno */
148 warningx(const char *fmt
, ...)
153 fprintf(stdout
, "\n");
155 fprintf(stderr
, "unzip: ");
157 vfprintf(stderr
, fmt
, ap
);
159 fprintf(stderr
, "\n");
162 /* informational message (if not -q) */
164 info(const char *fmt
, ...)
168 if (q_opt
&& !unzip_debug
)
171 vfprintf(stdout
, fmt
, ap
);
178 noeol
= fmt
[strlen(fmt
) - 1] != '\n';
181 /* debug message (if unzip_debug) */
183 debug(const char *fmt
, ...)
190 vfprintf(stderr
, fmt
, ap
);
197 noeol
= fmt
[strlen(fmt
) - 1] != '\n';
200 /* duplicate a path name, possibly converting to lower case */
202 pathdup(const char *path
)
208 while (len
&& path
[len
- 1] == '/')
210 if ((str
= malloc(len
+ 1)) == NULL
) {
215 for (i
= 0; i
< len
; ++i
)
216 str
[i
] = tolower((unsigned char)path
[i
]);
218 memcpy(str
, path
, len
);
225 /* concatenate two path names */
227 pathcat(const char *prefix
, const char *path
)
232 prelen
= prefix
? strlen(prefix
) + 1 : 0;
233 len
= strlen(path
) + 1;
234 if ((str
= malloc(prelen
+ len
)) == NULL
) {
239 memcpy(str
, prefix
, prelen
); /* includes zero */
240 str
[prelen
- 1] = '/'; /* splat zero */
242 memcpy(str
+ prelen
, path
, len
); /* includes zero */
248 * Pattern lists for include / exclude processing
251 STAILQ_ENTRY(pattern
) link
;
255 STAILQ_HEAD(pattern_list
, pattern
);
256 static struct pattern_list include
= STAILQ_HEAD_INITIALIZER(include
);
257 static struct pattern_list exclude
= STAILQ_HEAD_INITIALIZER(exclude
);
260 * Add an entry to a pattern list
263 add_pattern(struct pattern_list
*list
, const char *pattern
)
265 struct pattern
*entry
;
268 debug("adding pattern '%s'\n", pattern
);
269 len
= strlen(pattern
);
270 if ((entry
= malloc(sizeof *entry
+ len
+ 1)) == NULL
) {
274 memcpy(entry
->pattern
, pattern
, len
+ 1);
275 STAILQ_INSERT_TAIL(list
, entry
, link
);
279 * Match a string against a list of patterns
282 match_pattern(struct pattern_list
*list
, const char *str
)
284 struct pattern
*entry
;
286 STAILQ_FOREACH(entry
, list
, link
) {
287 if (fnmatch(entry
->pattern
, str
, C_opt
? FNM_CASEFOLD
: 0) == 0)
294 * Verify that a given pathname is in the include list and not in the
298 accept_pathname(const char *pathname
)
301 if (!STAILQ_EMPTY(&include
) && !match_pattern(&include
, pathname
))
303 if (!STAILQ_EMPTY(&exclude
) && match_pattern(&exclude
, pathname
))
309 * Create the specified directory with the specified mode, taking certain
310 * precautions on they way.
313 make_dir(const char *path
, int mode
)
317 if (lstat(path
, &sb
) == 0) {
318 if (S_ISDIR(sb
.st_mode
))
321 * Normally, we should either ask the user about removing
322 * the non-directory of the same name as a directory we
323 * wish to create, or respect the -n or -o command-line
324 * options. However, this may lead to a later failure or
325 * even compromise (if this non-directory happens to be a
326 * symlink to somewhere unsafe), so we don't.
330 * Don't check unlink() result; failure will cause mkdir()
331 * to fail later, which we will catch.
335 if (mkdir(path
, mode
) != 0 && errno
!= EEXIST
)
336 error("mkdir('%s')", path
);
340 * Ensure that all directories leading up to (but not including) the
341 * specified path exist.
343 * XXX inefficient + modifies the file in-place
346 make_parent(char *path
)
351 sep
= strrchr(path
, '/');
352 if (sep
== NULL
|| sep
== path
)
355 if (lstat(path
, &sb
) == 0) {
356 if (S_ISDIR(sb
.st_mode
)) {
367 for (sep
= path
; (sep
= strchr(sep
, '/')) != NULL
; sep
++) {
368 /* root in case of absolute d_arg */
372 make_dir(path
, 0755);
379 * Extract a directory.
382 extract_dir(struct archive
*a
, struct archive_entry
*e
, const char *path
)
386 mode
= archive_entry_mode(e
) & 0777;
391 * Some zipfiles contain directories with weird permissions such
392 * as 0644 or 0444. This can cause strange issues such as being
393 * unable to extract files into the directory we just created, or
394 * the user being unable to remove the directory later without
395 * first manually changing its permissions. Therefore, we whack
396 * the permissions into shape, assuming that the user wants full
397 * access and that anyone who gets read access also gets execute
406 info(" creating: %s/\n", path
);
407 make_dir(path
, mode
);
408 ac(archive_read_data_skip(a
));
411 static unsigned char buffer
[8192];
412 static char spinner
[] = { '|', '/', '-', '\\' };
415 handle_existing_file(char **path
)
423 "replace %s? [y]es, [n]o, [A]ll, [N]one, [r]ename: ",
425 if (fgets(buf
, sizeof(buf
), stdin
) == NULL
) {
427 printf("NULL\n(EOF or read error, "
428 "treating as \"[N]one\"...)\n");
447 printf("New name: ");
452 len
= getdelim(path
, &alen
, '\n', stdin
);
453 if ((*path
)[len
- 1] == '\n')
454 (*path
)[len
- 1] = '\0';
463 * Detect binary files by a combination of character white list and
464 * black list. NUL bytes and other control codes without use in text files
465 * result directly in switching the file to binary mode. Otherwise, at least
466 * one white-listed byte has to be found.
468 * Black-listed: 0..6, 14..25, 28..31
469 * 0xf3ffc07f = 11110011111111111100000001111111b
470 * White-listed: 9..10, 13, >= 32
471 * 0x00002600 = 00000000000000000010011000000000b
473 * See the proginfo/txtvsbin.txt in the zip sources for a detailed discussion.
475 #define BYTE_IS_BINARY(x) ((x) < 32 && (0xf3ffc07fU & (1U << (x))))
476 #define BYTE_IS_TEXT(x) ((x) >= 32 || (0x00002600U & (1U << (x))))
479 check_binary(const unsigned char *buf
, size_t len
)
482 for (rv
= 1; len
--; ++buf
) {
483 if (BYTE_IS_BINARY(*buf
))
485 if (BYTE_IS_TEXT(*buf
))
493 * Extract to a file descriptor
496 extract2fd(struct archive
*a
, char *pathname
, int fd
)
500 unsigned char *p
, *q
, *end
;
506 /* loop over file contents and write to fd */
507 for (int n
= 0; ; n
++) {
508 if (fd
!= STDOUT_FILENO
)
509 if (tty
&& (n
% 4) == 0)
510 info(" %c\b\b", spinner
[(n
/ 4) % sizeof spinner
]);
512 len
= archive_read_data(a
, buffer
, sizeof buffer
);
517 /* left over CR from previous buffer */
519 if (len
== 0 || buffer
[0] != '\n')
520 if (write(fd
, "\r", 1) != 1)
521 error("write('%s')", pathname
);
531 * Detect whether this is a text file. The correct way to
532 * do this is to check the least significant bit of the
533 * "internal file attributes" field of the corresponding
534 * file header in the central directory, but libarchive
535 * does not provide access to this field, so we have to
536 * guess by looking for non-ASCII characters in the
537 * buffer. Hopefully we won't guess wrong. If we do
538 * guess wrong, we print a warning message later.
540 if (a_opt
&& n
== 0) {
541 if (check_binary(buffer
, len
))
546 if (!a_opt
|| !text
) {
547 if (write(fd
, buffer
, len
) != len
)
548 error("write('%s')", pathname
);
552 /* hard case: convert \r\n to \n (sigh...) */
553 for (p
= buffer
; p
< end
; p
= q
+ 1) {
554 for (q
= p
; q
< end
; q
++) {
555 if (!warn
&& BYTE_IS_BINARY(*q
)) {
556 warningx("%s may be corrupted due"
557 " to weak text file detection"
558 " heuristic", pathname
);
570 if (write(fd
, p
, q
- p
) != q
- p
)
571 error("write('%s')", pathname
);
579 * Extract a regular file.
582 extract_file(struct archive
*a
, struct archive_entry
*e
, char **path
)
585 struct timespec mtime
;
587 struct timespec ts
[2];
589 const char *linkname
;
591 mode
= archive_entry_mode(e
) & 0777;
594 mtime
.tv_sec
= archive_entry_mtime(e
);
595 mtime
.tv_nsec
= archive_entry_mtime_nsec(e
);
597 /* look for existing file of same name */
599 if (lstat(*path
, &sb
) == 0) {
600 if (u_opt
|| f_opt
) {
601 /* check if up-to-date */
602 if ((S_ISREG(sb
.st_mode
) || S_ISLNK(sb
.st_mode
)) &&
603 (sb
.st_mtim
.tv_sec
> mtime
.tv_sec
||
604 (sb
.st_mtim
.tv_sec
== mtime
.tv_sec
&&
605 sb
.st_mtim
.tv_nsec
>= mtime
.tv_nsec
)))
612 /* do not overwrite */
615 check
= handle_existing_file(path
);
619 return; /* do not overwrite */
627 ts
[0].tv_nsec
= UTIME_NOW
;
630 /* process symlinks */
631 linkname
= archive_entry_symlink(e
);
632 if (linkname
!= NULL
) {
633 if (symlink(linkname
, *path
) != 0)
634 error("symlink('%s')", *path
);
635 info(" extracting: %s -> %s\n", *path
, linkname
);
636 if (lchmod(*path
, mode
) != 0)
637 warning("Cannot set mode for '%s'", *path
);
638 /* set access and modification time */
639 if (utimensat(AT_FDCWD
, *path
, ts
, AT_SYMLINK_NOFOLLOW
) != 0)
640 warning("utimensat('%s')", *path
);
644 if ((fd
= open(*path
, O_RDWR
|O_CREAT
|O_TRUNC
, mode
)) < 0)
645 error("open('%s')", *path
);
647 info(" extracting: %s", *path
);
649 text
= extract2fd(a
, *path
, fd
);
657 /* set access and modification time */
658 if (futimens(fd
, ts
) != 0)
659 error("futimens('%s')", *path
);
661 error("close('%s')", *path
);
665 * Extract a zipfile entry: first perform some sanity checks to ensure
666 * that it is either a directory or a regular file and that the path is
667 * not absolute and does not try to break out of the current directory;
668 * then call either extract_dir() or extract_file() as appropriate.
670 * This is complicated a bit by the various ways in which we need to
671 * manipulate the path name. Case conversion (if requested by the -L
672 * option) happens first, but the include / exclude patterns are applied
673 * to the full converted path name, before the directory part of the path
674 * is removed in accordance with the -j option. Sanity checks are
675 * intentionally done earlier than they need to be, so the user will get a
676 * warning about insecure paths even for files or directories which
677 * wouldn't be extracted anyway.
680 extract(struct archive
*a
, struct archive_entry
*e
)
682 char *pathname
, *realpathname
;
686 pathname
= pathdup(archive_entry_pathname(e
));
687 filetype
= archive_entry_filetype(e
);
690 if (pathname
[0] == '/' ||
691 strncmp(pathname
, "../", 3) == 0 ||
692 strstr(pathname
, "/../") != NULL
) {
693 warningx("skipping insecure entry '%s'", pathname
);
694 ac(archive_read_data_skip(a
));
699 /* I don't think this can happen in a zipfile.. */
700 if (!S_ISDIR(filetype
) && !S_ISREG(filetype
) && !S_ISLNK(filetype
)) {
701 warningx("skipping non-regular entry '%s'", pathname
);
702 ac(archive_read_data_skip(a
));
707 /* skip directories in -j case */
708 if (S_ISDIR(filetype
) && j_opt
) {
709 ac(archive_read_data_skip(a
));
714 /* apply include / exclude patterns */
715 if (!accept_pathname(pathname
)) {
716 ac(archive_read_data_skip(a
));
721 /* apply -j and -d */
723 for (p
= q
= pathname
; *p
; ++p
)
726 realpathname
= pathcat(d_arg
, q
);
728 realpathname
= pathcat(d_arg
, pathname
);
731 /* ensure that parent directory exists */
732 make_parent(realpathname
);
734 if (S_ISDIR(filetype
))
735 extract_dir(a
, e
, realpathname
);
737 extract_file(a
, e
, &realpathname
);
744 extract_stdout(struct archive
*a
, struct archive_entry
*e
)
749 pathname
= pathdup(archive_entry_pathname(e
));
750 filetype
= archive_entry_filetype(e
);
752 /* I don't think this can happen in a zipfile.. */
753 if (!S_ISDIR(filetype
) && !S_ISREG(filetype
) && !S_ISLNK(filetype
)) {
754 warningx("skipping non-regular entry '%s'", pathname
);
755 ac(archive_read_data_skip(a
));
760 /* skip directories in -j case */
761 if (S_ISDIR(filetype
)) {
762 ac(archive_read_data_skip(a
));
767 /* apply include / exclude patterns */
768 if (!accept_pathname(pathname
)) {
769 ac(archive_read_data_skip(a
));
775 info("x %s\n", pathname
);
777 (void)extract2fd(a
, pathname
, STDOUT_FILENO
);
783 * Print the name of an entry to stdout.
786 list(struct archive
*a
, struct archive_entry
*e
)
792 mtime
= archive_entry_mtime(e
);
793 tm
= localtime(&mtime
);
795 strftime(buf
, sizeof(buf
), "%m-%d-%G %R", tm
);
797 strftime(buf
, sizeof(buf
), "%m-%d-%g %R", tm
);
801 printf(" %8ju %s %s\n",
802 (uintmax_t)archive_entry_size(e
),
803 buf
, archive_entry_pathname(e
));
804 } else if (v_opt
== 2) {
805 printf("%8ju Stored %7ju 0%% %s %08x %s\n",
806 (uintmax_t)archive_entry_size(e
),
807 (uintmax_t)archive_entry_size(e
),
810 archive_entry_pathname(e
));
814 printf("%s\n",archive_entry_pathname(e
));
816 ac(archive_read_data_skip(a
));
820 * Extract to memory to check CRC
823 test(struct archive
*a
, struct archive_entry
*e
)
829 if (S_ISDIR(archive_entry_filetype(e
)))
832 info(" testing: %s\t", archive_entry_pathname(e
));
833 while ((len
= archive_read_data(a
, buffer
, sizeof buffer
)) > 0)
836 info(" %s\n", archive_error_string(a
));
842 /* shouldn't be necessary, but it doesn't hurt */
843 ac(archive_read_data_skip(a
));
849 * Main loop: open the zipfile, iterate over its contents and decide what
850 * to do with each entry.
853 unzip(const char *fn
)
856 struct archive_entry
*e
;
858 uintmax_t total_size
, file_count
, error_count
;
860 if ((a
= archive_read_new()) == NULL
)
861 error("archive_read_new failed");
863 ac(archive_read_support_format_zip(a
));
864 ac(archive_read_open_filename(a
, fn
, 8192));
867 if (!p_opt
&& !q_opt
)
868 printf("Archive: %s\n", fn
);
870 printf(" Length %sDate Time Name\n", y_str
);
871 printf(" -------- %s---- ---- ----\n", y_str
);
872 } else if (v_opt
== 2) {
873 printf(" Length Method Size Ratio %sDate Time CRC-32 Name\n", y_str
);
874 printf("-------- ------ ------- ----- %s---- ---- ------ ----\n", y_str
);
882 ret
= archive_read_next_header(a
, &e
);
883 if (ret
== ARCHIVE_EOF
)
888 error_count
+= test(a
, e
);
891 else if (p_opt
|| c_opt
)
892 extract_stdout(a
, e
);
900 total_size
+= archive_entry_size(e
);
906 printf(" -------- %s-------\n", y_str
);
907 printf(" %8ju %s%ju file%s\n",
908 total_size
, y_str
, file_count
, file_count
!= 1 ? "s" : "");
909 } else if (v_opt
== 2) {
910 printf("-------- ------- --- %s-------\n", y_str
);
911 printf("%8ju %7ju 0%% %s%ju file%s\n",
912 total_size
, total_size
, y_str
, file_count
,
913 file_count
!= 1 ? "s" : "");
917 ac(archive_read_close(a
));
918 (void)archive_read_free(a
);
921 if (error_count
> 0) {
922 errorx("%ju checksum error(s) found.", error_count
);
925 printf("No errors detected in compressed data of %s.\n",
935 fprintf(stderr
, "Usage: unzip [-aCcfjLlnopqtuvyZ1] [-d dir] [-x pattern] "
941 getopts(int argc
, char *argv
[])
945 optreset
= optind
= 1;
946 while ((opt
= getopt(argc
, argv
, "aCcd:fjLlnopqtuvx:yZ1")) != -1)
999 add_pattern(&exclude
, optarg
);
1015 main(int argc
, char *argv
[])
1017 const char *zipfile
;
1020 if (isatty(STDOUT_FILENO
))
1023 if (getenv("UNZIP_DEBUG") != NULL
)
1025 for (int i
= 0; i
< argc
; ++i
)
1026 debug("%s%c", argv
[i
], (i
< argc
- 1) ? ' ' : '\n');
1029 * Info-ZIP's unzip(1) expects certain options to come before the
1030 * zipfile name, and others to come after - though it does not
1031 * enforce this. For simplicity, we accept *all* options both
1032 * before and after the zipfile name.
1034 nopts
= getopts(argc
, argv
);
1037 * When more of the zipinfo mode options are implemented, this
1038 * will need to change.
1040 if (zipinfo_mode
&& !Z1_opt
) {
1041 printf("Zipinfo mode needs additional options\n");
1047 zipfile
= argv
[nopts
++];
1049 if (strcmp(zipfile
, "-") == 0)
1050 zipfile
= NULL
; /* STDIN */
1052 while (nopts
< argc
&& *argv
[nopts
] != '-')
1053 add_pattern(&include
, argv
[nopts
++]);
1055 nopts
--; /* fake argv[0] */
1056 nopts
+= getopts(argc
- nopts
, argv
+ nopts
);
1058 if (n_opt
+ o_opt
+ u_opt
> 1)
1059 errorx("-n, -o and -u are contradictory");