2 * Copyright (c) 2003-2007 Tim Kientzle
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #include "bsdtar_platform.h"
27 __FBSDID("$FreeBSD: src/usr.bin/tar/util.c,v 1.23 2008/12/15 06:00:25 kientzle Exp $");
29 #ifdef HAVE_SYS_STAT_H
32 #ifdef HAVE_SYS_TYPES_H
33 #include <sys/types.h> /* Linux doesn't define mode_t, etc. in sys/stat.h. */
58 /* If we don't have wctype, we need to hack up some version of iswprint(). */
59 #define iswprint isprint
64 #include "passphrase.h"
66 static size_t bsdtar_expand_char(char *, size_t, char);
67 static const char *strip_components(const char *path
, int elements
);
69 #if defined(_WIN32) && !defined(__CYGWIN__)
73 /* TODO: Hack up a version of mbtowc for platforms with no wide
74 * character support at all. I think the following might suffice,
75 * but it needs careful testing.
77 * #define mbtowc(wcp, p, n) ((*wcp = *p), 1)
82 * Print a string, taking care with any non-printable characters.
84 * Note that we use a stack-allocated buffer to receive the formatted
85 * string if we can. This is partly performance (avoiding a call to
86 * malloc()), partly out of expedience (we have to call vsnprintf()
87 * before malloc() anyway to find out how big a buffer we need; we may
88 * as well point that first call at a small local buffer in case it
89 * works), but mostly for safety (so we can use this to print messages
90 * about out-of-memory conditions).
94 safe_fprintf(FILE *f
, const char *fmt
, ...)
96 char fmtbuff_stack
[256]; /* Place to format the printf() string. */
97 char outbuff
[256]; /* Buffer for outgoing characters. */
98 char *fmtbuff_heap
; /* If fmtbuff_stack is too small, we use malloc */
99 char *fmtbuff
; /* Pointer to fmtbuff_stack or fmtbuff_heap. */
108 /* Use a stack-allocated buffer if we can, for speed and safety. */
110 fmtbuff_length
= sizeof(fmtbuff_stack
);
111 fmtbuff
= fmtbuff_stack
;
113 /* Try formatting into the stack buffer. */
115 length
= vsnprintf(fmtbuff
, fmtbuff_length
, fmt
, ap
);
118 /* If the result was too large, allocate a buffer on the heap. */
119 while (length
< 0 || length
>= fmtbuff_length
) {
120 if (length
>= fmtbuff_length
)
121 fmtbuff_length
= length
+1;
122 else if (fmtbuff_length
< 8192)
124 else if (fmtbuff_length
< 1000000)
125 fmtbuff_length
+= fmtbuff_length
/ 4;
127 length
= fmtbuff_length
;
128 fmtbuff_heap
[length
-1] = '\0';
132 fmtbuff_heap
= malloc(fmtbuff_length
);
134 /* Reformat the result into the heap buffer if we can. */
135 if (fmtbuff_heap
!= NULL
) {
136 fmtbuff
= fmtbuff_heap
;
138 length
= vsnprintf(fmtbuff
, fmtbuff_length
, fmt
, ap
);
141 /* Leave fmtbuff pointing to the truncated
142 * string in fmtbuff_stack. */
143 length
= sizeof(fmtbuff_stack
) - 1;
148 /* Note: mbrtowc() has a cleaner API, but mbtowc() seems a bit
149 * more portable, so we use that here instead. */
150 if (mbtowc(NULL
, NULL
, 1) == -1) { /* Reset the shift state. */
151 /* mbtowc() should never fail in practice, but
152 * handle the theoretical error anyway. */
157 /* Write data, expanding unprintable characters. */
163 /* Convert to wide char, test if the wide
164 * char is printable in the current locale. */
165 if (try_wc
&& (n
= mbtowc(&wc
, p
, length
)) != -1) {
167 if (iswprint(wc
) && wc
!= L
'\\') {
168 /* Printable, copy the bytes through. */
172 /* Not printable, format the bytes. */
174 i
+= (unsigned)bsdtar_expand_char(
178 /* After any conversion failure, don't bother
179 * trying to convert the rest. */
180 i
+= (unsigned)bsdtar_expand_char(outbuff
, i
, *p
++);
184 /* If our output buffer is full, dump it and keep going. */
185 if (i
> (sizeof(outbuff
) - 20)) {
187 fprintf(f
, "%s", outbuff
);
192 fprintf(f
, "%s", outbuff
);
194 /* If we allocated a heap-based formatting buffer, free it now. */
199 * Render an arbitrary sequence of bytes into printable ASCII characters.
202 bsdtar_expand_char(char *buff
, size_t offset
, char c
)
206 if (isprint((unsigned char)c
) && c
!= '\\')
211 case '\a': buff
[i
++] = 'a'; break;
212 case '\b': buff
[i
++] = 'b'; break;
213 case '\f': buff
[i
++] = 'f'; break;
214 case '\n': buff
[i
++] = 'n'; break;
216 /* On some platforms, \n and \r are the same. */
217 case '\r': buff
[i
++] = 'r'; break;
219 case '\t': buff
[i
++] = 't'; break;
220 case '\v': buff
[i
++] = 'v'; break;
221 case '\\': buff
[i
++] = '\\'; break;
223 sprintf(buff
+ i
, "%03o", 0xFF & (int)c
);
232 yes(const char *fmt
, ...)
240 vfprintf(stderr
, fmt
, ap
);
242 fprintf(stderr
, " (y/N)? ");
245 l
= read(2, buff
, sizeof(buff
) - 1);
247 fprintf(stderr
, "Keyboard read failed\n");
254 for (p
= buff
; *p
!= '\0'; p
++) {
255 if (isspace((unsigned char)*p
))
271 * The logic here for -C <dir> attempts to avoid
272 * chdir() as long as possible. For example:
273 * "-C /foo -C /bar file" needs chdir("/bar") but not chdir("/foo")
274 * "-C /foo -C bar file" needs chdir("/foo/bar")
275 * "-C /foo -C bar /file1" does not need chdir()
276 * "-C /foo -C bar /file1 file2" needs chdir("/foo/bar") before file2
278 * The only correct way to handle this is to record a "pending" chdir
279 * request and combine multiple requests intelligently until we
280 * need to process a non-absolute file. set_chdir() adds the new dir
281 * to the pending list; do_chdir() actually executes any pending chdir.
283 * This way, programs that build tar command lines don't have to worry
284 * about -C with non-existent directories; such requests will only
285 * fail if the directory must be accessed.
289 set_chdir(struct bsdtar
*bsdtar
, const char *newdir
)
291 #if defined(_WIN32) && !defined(__CYGWIN__)
292 if (newdir
[0] == '/' || newdir
[0] == '\\' ||
293 /* Detect this type, for example, "C:\" or "C:/" */
294 (((newdir
[0] >= 'a' && newdir
[0] <= 'z') ||
295 (newdir
[0] >= 'A' && newdir
[0] <= 'Z')) &&
296 newdir
[1] == ':' && (newdir
[2] == '/' || newdir
[2] == '\\'))) {
298 if (newdir
[0] == '/') {
300 /* The -C /foo -C /bar case; dump first one. */
301 free(bsdtar
->pending_chdir
);
302 bsdtar
->pending_chdir
= NULL
;
304 if (bsdtar
->pending_chdir
== NULL
)
305 /* Easy case: no previously-saved dir. */
306 bsdtar
->pending_chdir
= strdup(newdir
);
308 /* The -C /foo -C bar case; concatenate */
309 char *old_pending
= bsdtar
->pending_chdir
;
310 size_t old_len
= strlen(old_pending
);
311 bsdtar
->pending_chdir
= malloc(old_len
+ strlen(newdir
) + 2);
312 if (old_pending
[old_len
- 1] == '/')
313 old_pending
[old_len
- 1] = '\0';
314 if (bsdtar
->pending_chdir
!= NULL
)
315 sprintf(bsdtar
->pending_chdir
, "%s/%s",
316 old_pending
, newdir
);
319 if (bsdtar
->pending_chdir
== NULL
)
320 lafe_errc(1, errno
, "No memory");
324 do_chdir(struct bsdtar
*bsdtar
)
326 if (bsdtar
->pending_chdir
== NULL
)
329 if (chdir(bsdtar
->pending_chdir
) != 0) {
330 lafe_errc(1, 0, "could not chdir to '%s'\n",
331 bsdtar
->pending_chdir
);
333 free(bsdtar
->pending_chdir
);
334 bsdtar
->pending_chdir
= NULL
;
338 strip_components(const char *p
, int elements
)
340 /* Skip as many elements as necessary. */
341 while (elements
> 0) {
344 #if defined(_WIN32) && !defined(__CYGWIN__)
345 case '\\': /* Support \ path sep on Windows ONLY. */
350 /* Path is too short, skip it. */
355 /* Skip any / characters. This handles short paths that have
356 * additional / termination. This also handles the case where
357 * the logic above stops in the middle of a duplicate //
358 * sequence (which would otherwise get converted to an
363 #if defined(_WIN32) && !defined(__CYGWIN__)
364 case '\\': /* Support \ path sep on Windows ONLY. */
377 warn_strip_leading_char(struct bsdtar
*bsdtar
, const char *c
)
379 if (!bsdtar
->warned_lead_slash
) {
381 "Removing leading '%c' from member names",
383 bsdtar
->warned_lead_slash
= 1;
388 warn_strip_drive_letter(struct bsdtar
*bsdtar
)
390 if (!bsdtar
->warned_lead_slash
) {
392 "Removing leading drive letter from "
394 bsdtar
->warned_lead_slash
= 1;
399 * Convert absolute path to non-absolute path by skipping leading
400 * absolute path prefixes.
403 strip_absolute_path(struct bsdtar
*bsdtar
, const char *p
)
407 /* Remove leading "//./" or "//?/" or "//?/UNC/"
408 * (absolute path prefixes used by Windows API) */
409 if ((p
[0] == '/' || p
[0] == '\\') &&
410 (p
[1] == '/' || p
[1] == '\\') &&
411 (p
[2] == '.' || p
[2] == '?') &&
412 (p
[3] == '/' || p
[3] == '\\'))
415 (p
[4] == 'U' || p
[4] == 'u') &&
416 (p
[5] == 'N' || p
[5] == 'n') &&
417 (p
[6] == 'C' || p
[6] == 'c') &&
418 (p
[7] == '/' || p
[7] == '\\'))
422 warn_strip_drive_letter(bsdtar
);
425 /* Remove multiple leading slashes and Windows drive letters. */
428 if (((p
[0] >= 'a' && p
[0] <= 'z') ||
429 (p
[0] >= 'A' && p
[0] <= 'Z')) &&
432 warn_strip_drive_letter(bsdtar
);
435 /* Remove leading "/../", "/./", "//", etc. */
436 while (p
[0] == '/' || p
[0] == '\\') {
439 (p
[3] == '/' || p
[3] == '\\')) {
440 p
+= 3; /* Remove "/..", leave "/" for next pass. */
441 } else if (p
[1] == '.' &&
442 (p
[2] == '/' || p
[2] == '\\')) {
443 p
+= 2; /* Remove "/.", leave "/" for next pass. */
445 p
+= 1; /* Remove "/". */
446 warn_strip_leading_char(bsdtar
, rp
);
454 * Handle --strip-components and any future path-rewriting options.
455 * Returns non-zero if the pathname should not be extracted.
457 * Note: The rewrites are applied uniformly to pathnames and hardlink
458 * names but not to symlink bodies. This is deliberate: Symlink
459 * bodies are not necessarily filenames. Even when they are, they
460 * need to be interpreted relative to the directory containing them,
461 * so simple rewrites like this are rarely appropriate.
463 * TODO: Support pax-style regex path rewrites.
466 edit_pathname(struct bsdtar
*bsdtar
, struct archive_entry
*entry
)
468 const char *name
= archive_entry_pathname(entry
);
469 const char *original_name
= name
;
470 const char *hardlinkname
= archive_entry_hardlink(entry
);
471 const char *original_hardlinkname
= hardlinkname
;
472 #if defined(HAVE_REGEX_H) || defined(HAVE_PCREPOSIX_H)
476 /* Apply user-specified substitution to pathname. */
477 r
= apply_substitution(bsdtar
, name
, &subst_name
, 0, 0);
479 lafe_warnc(0, "Invalid substitution, skipping entry");
483 archive_entry_copy_pathname(entry
, subst_name
);
484 if (*subst_name
== '\0') {
489 name
= archive_entry_pathname(entry
);
490 original_name
= name
;
493 /* Apply user-specified substitution to hardlink target. */
494 if (hardlinkname
!= NULL
) {
495 r
= apply_substitution(bsdtar
, hardlinkname
, &subst_name
, 0, 1);
497 lafe_warnc(0, "Invalid substitution, skipping entry");
501 archive_entry_copy_hardlink(entry
, subst_name
);
504 hardlinkname
= archive_entry_hardlink(entry
);
505 original_hardlinkname
= hardlinkname
;
508 /* Apply user-specified substitution to symlink body. */
509 if (archive_entry_symlink(entry
) != NULL
) {
510 r
= apply_substitution(bsdtar
, archive_entry_symlink(entry
), &subst_name
, 1, 0);
512 lafe_warnc(0, "Invalid substitution, skipping entry");
516 archive_entry_copy_symlink(entry
, subst_name
);
522 /* Strip leading dir names as per --strip-components option. */
523 if (bsdtar
->strip_components
> 0) {
524 name
= strip_components(name
, bsdtar
->strip_components
);
528 if (hardlinkname
!= NULL
) {
529 hardlinkname
= strip_components(hardlinkname
,
530 bsdtar
->strip_components
);
531 if (hardlinkname
== NULL
)
536 if (!bsdtar
->option_absolute_paths
) {
537 /* By default, don't write or restore absolute pathnames. */
538 name
= strip_absolute_path(bsdtar
, name
);
542 if (hardlinkname
!= NULL
) {
543 hardlinkname
= strip_absolute_path(bsdtar
, hardlinkname
);
544 if (*hardlinkname
== '\0')
548 /* Strip redundant leading '/' characters. */
549 while (name
[0] == '/' && name
[1] == '/')
553 /* Replace name in archive_entry. */
554 if (name
!= original_name
) {
555 archive_entry_copy_pathname(entry
, name
);
557 if (hardlinkname
!= original_hardlinkname
) {
558 archive_entry_copy_hardlink(entry
, hardlinkname
);
564 * It would be nice to just use printf() for formatting large numbers,
565 * but the compatibility problems are quite a headache. Hence the
566 * following simple utility function.
569 tar_i64toa(int64_t n0
)
571 static char buff
[24];
572 uint64_t n
= n0
< 0 ? -n0
: n0
;
573 char *p
= buff
+ sizeof(buff
);
577 *--p
= '0' + (int)(n
% 10);
585 * Like strcmp(), but try to be a little more aware of the fact that
586 * we're comparing two paths. Right now, it just handles leading
587 * "./" and trailing '/' specially, so that "a/b/" == "./a/b"
589 * TODO: Make this better, so that "./a//b/./c/" == "a/b/c"
590 * TODO: After this works, push it down into libarchive.
591 * TODO: Publish the path normalization routines in libarchive so
592 * that bsdtar can normalize paths and use fast strcmp() instead
595 * Note: This is currently only used within write.c, so should
596 * not handle \ path separators.
600 pathcmp(const char *a
, const char *b
)
602 /* Skip leading './' */
603 if (a
[0] == '.' && a
[1] == '/' && a
[2] != '\0')
605 if (b
[0] == '.' && b
[1] == '/' && b
[2] != '\0')
607 /* Find the first difference, or return (0) if none. */
615 * If one ends in '/' and the other one doesn't,
618 if (a
[0] == '/' && a
[1] == '\0' && b
[0] == '\0')
620 if (a
[0] == '\0' && b
[0] == '/' && b
[1] == '\0')
622 /* They're really different, return the correct sign. */
623 return (*(const unsigned char *)a
- *(const unsigned char *)b
);
626 #define PPBUFF_SIZE 1024
628 passphrase_callback(struct archive
*a
, void *_client_data
)
630 struct bsdtar
*bsdtar
= (struct bsdtar
*)_client_data
;
631 (void)a
; /* UNUSED */
633 if (bsdtar
->ppbuff
== NULL
) {
634 bsdtar
->ppbuff
= malloc(PPBUFF_SIZE
);
635 if (bsdtar
->ppbuff
== NULL
)
636 lafe_errc(1, errno
, "Out of memory");
638 return lafe_readpassphrase("Enter passphrase:",
639 bsdtar
->ppbuff
, PPBUFF_SIZE
);
643 passphrase_free(char *ppbuff
)
645 if (ppbuff
!= NULL
) {
646 memset(ppbuff
, 0, PPBUFF_SIZE
);
652 * Display information about the current file.
654 * The format here roughly duplicates the output of 'ls -l'.
655 * This is based on SUSv2, where 'tar tv' is documented as
656 * listing additional information in an "unspecified format,"
657 * and 'pax -l' is documented as using the same format as 'ls -l'.
660 list_item_verbose(struct bsdtar
*bsdtar
, FILE *out
, struct archive_entry
*entry
)
670 * We avoid collecting the entire list in memory at once by
671 * listing things as we see them. However, that also means we can't
672 * just pre-compute the field widths. Instead, we start with guesses
673 * and just widen them as necessary. These numbers are completely
676 if (!bsdtar
->u_width
) {
678 bsdtar
->gs_width
= 13;
682 fprintf(out
, "%s %d ",
683 archive_entry_strmode(entry
),
684 archive_entry_nlink(entry
));
686 /* Use uname if it's present, else uid. */
687 p
= archive_entry_uname(entry
);
688 if ((p
== NULL
) || (*p
== '\0')) {
690 (unsigned long)archive_entry_uid(entry
));
694 if (w
> bsdtar
->u_width
)
696 fprintf(out
, "%-*s ", (int)bsdtar
->u_width
, p
);
698 /* Use gname if it's present, else gid. */
699 p
= archive_entry_gname(entry
);
700 if (p
!= NULL
&& p
[0] != '\0') {
701 fprintf(out
, "%s", p
);
705 (unsigned long)archive_entry_gid(entry
));
707 fprintf(out
, "%s", tmp
);
711 * Print device number or file size, right-aligned so as to make
712 * total width of group and devnum/filesize fields be gs_width.
713 * If gs_width is too small, grow it.
715 if (archive_entry_filetype(entry
) == AE_IFCHR
716 || archive_entry_filetype(entry
) == AE_IFBLK
) {
717 sprintf(tmp
, "%lu,%lu",
718 (unsigned long)archive_entry_rdevmajor(entry
),
719 (unsigned long)archive_entry_rdevminor(entry
));
721 strcpy(tmp
, tar_i64toa(archive_entry_size(entry
)));
723 if (w
+ strlen(tmp
) >= bsdtar
->gs_width
)
724 bsdtar
->gs_width
= w
+strlen(tmp
)+1;
725 fprintf(out
, "%*s", (int)(bsdtar
->gs_width
- w
), tmp
);
727 /* Format the time using 'ls -l' conventions. */
728 tim
= archive_entry_mtime(entry
);
729 #define HALF_YEAR (time_t)365 * 86400 / 2
730 #if defined(_WIN32) && !defined(__CYGWIN__)
731 #define DAY_FMT "%d" /* Windows' strftime function does not support %e format. */
733 #define DAY_FMT "%e" /* Day number without leading zeros */
735 if (tim
< now
- HALF_YEAR
|| tim
> now
+ HALF_YEAR
)
736 fmt
= bsdtar
->day_first
? DAY_FMT
" %b %Y" : "%b " DAY_FMT
" %Y";
738 fmt
= bsdtar
->day_first
? DAY_FMT
" %b %H:%M" : "%b " DAY_FMT
" %H:%M";
739 strftime(tmp
, sizeof(tmp
), fmt
, localtime(&tim
));
740 fprintf(out
, " %s ", tmp
);
741 safe_fprintf(out
, "%s", archive_entry_pathname(entry
));
743 /* Extra information for links. */
744 if (archive_entry_hardlink(entry
)) /* Hard link */
745 safe_fprintf(out
, " link to %s",
746 archive_entry_hardlink(entry
));
747 else if (archive_entry_symlink(entry
)) /* Symbolic link */
748 safe_fprintf(out
, " -> %s", archive_entry_symlink(entry
));